@@ -22,34 +22,128 @@ await_binderhub() {
2222 kubectl rollout status --watch --timeout 300s deployment/binder
2323}
2424
25- full_namespace_report () {
26- # list config (secret,configmap)
27- kubectl get secret,cm
28- # list networking (service,ingress)
29- kubectl get svc,ing
25+ full_namespace_report () {
26+ # This was copied from z2jh 2021-01-06. Work to make it a dedicated GitHub
27+ # action and avoid a duplicated code base is planned. / @consideRatio
28+ # ------------------------------------------------------------------------
29+ #
30+ # Purpose:
31+ # - To chart agnostically print relevant information of the resources in a
32+ # namespace.
33+ #
34+ # Arguments:
35+ # - Accepts a sequence of arguments such as "deploy/hub" "deploy/proxy". It
36+ # will do `kubectl logs --all-containers <arg>` on them.
37+ #
38+ # Relevant references:
39+ # - ContainerStatus ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#containerstatus-v1-core
40+
41+ # printf formatting: a bold bold colored topic with a divider.
42+ yellow=33
43+ red=31
44+ divider=' --------------------------------------------------------------------------------'
45+ # GitHub Workflows resets formatting after \n, so its reapplied.
46+ export format=" \n\033[${yellow} ;1m%s\n\033[${yellow} ;1m${divider} \033[0m\n"
47+ export format_error=" \n\033[${red} ;1m%s\n\033[${red} ;1m${divider} \033[0m\n"
48+
49+ printf " $format " " # Full namespace report"
50+ printf " $format " " ## Resource overview"
3051 # list workloads (deployment,statefulset,daemonset,pod)
52+ printf " $format " " ### \$ kubectl get deploy,sts,ds,pod"
3153 kubectl get deploy,sts,ds,pod
54+ # list config (secret,configmap)
55+ printf " $format " " ### \$ kubectl get secret,cm"
56+ kubectl get secret,cm
57+ # list networking (service,ingress,networkpolicy)
58+ printf " $format " " ### \$ kubectl get svc,ing,netpol"
59+ kubectl get svc,ing,netpol
60+ # list rbac (serviceaccount,role,rolebinding)
61+ printf " $format " " ### \$ kubectl get sa,role,rolebinding"
62+ kubectl get sa,role,rolebinding
63+
64+ # Check if any container of any pod has a non ready status
65+ PODS_WITH_NON_READY_CONTAINER=$(
66+ kubectl get pods -o json \
67+ | jq -r '
68+ .items[]
69+ | select(
70+ any(.status.initContainerStatuses[]?; .ready == false)
71+ or
72+ any(.status.containerStatuses[]?; .ready == false)
73+ )
74+ | .metadata.name
75+ '
76+ )
77+ if [ -n " $PODS_WITH_NON_READY_CONTAINER " ]; then
78+ printf " $format_error " " ## Pods with non-ready container(s) detected!"
79+ echo " $PODS_WITH_NON_READY_CONTAINER " | xargs --max-args=1 echo -
3280
33- # if any pod has any non-ready -> show its containers' logs
34- kubectl get pods -o json \
35- | jq '
36- .items[]
37- | select(
38- any(.status.containerStatuses[]?; .ready == false)
39- )
40- | .metadata.name' \
41- | xargs --max-args 1 --no-run-if-empty \
42- sh -c ' printf "\nPod with non-ready container detected\n - Logs of $0:\n"; kubectl logs --all-containers $0'
81+ for var in $PODS_WITH_NON_READY_CONTAINER ; do
82+ printf " $format_error " " ### \$ kubectl describe pod/$var "
83+ kubectl describe pod/$var
84+ printf " $format_error " " ### \$ kubectl logs --all-containers pod/$var "
85+ kubectl logs --all-containers pod/$var || echo # a newline on failure for consistency with non-failure
86+ done
87+
88+ fi
89+
90+ # Check if any container of any pod has a restartCount > 0. Then, we inspect
91+ # their logs with --previous. We also add --follow and --ignore-errors in
92+ # order to ensure we get the information from all containers, and combined
93+ # with --previous it will exit and not get stuck.
94+ #
95+ # ref: https://github.com/kubernetes/kubernetes/issues/97530
96+ PODS_WITH_RESTARTED_CONTAINERS=$(
97+ kubectl get pods -o json \
98+ | jq -r '
99+ .items[]
100+ | select(
101+ any(.status.initContainerStatuses[]?; .restartCount > 0)
102+ or
103+ any(.status.containerStatuses[]?; .restartCount > 0)
104+ )
105+ | .metadata.name
106+ '
107+ )
108+ if [ -n " $PODS_WITH_RESTARTED_CONTAINERS " ]; then
109+ printf " $format_error " " ## Pods with restarted containers detected!"
110+ echo " $PODS_WITH_RESTARTED_CONTAINERS " | xargs --max-args=1 echo -
111+
112+ for var in $PODS_WITH_RESTARTED_CONTAINERS ; do
113+ printf " $format_error " " ### \$ kubectl describe pod/$var "
114+ kubectl describe pod/$var
115+ printf " $format_error " " ### \$ kubectl logs --previous --all-containers --follow --ignore-errors pod/$var "
116+ kubectl logs --previous --all-containers --follow --ignore-errors pod/$var
117+ done
118+ fi
43119
44120 # if any pods that should be scheduled by the user-scheduler are pending ->
45121 # show user-scheduler's logs
46- (
122+ PENDING_USER_PODS= $ (
47123 kubectl get pods -l " component in (user-placeholder,singleuser-server)" -o json \
48- | jq -r '
49- .items[]
50- | select(.status.phase == "Pending")
51- | .metadata.name
124+ | jq -r '
125+ .items[]
126+ | select(.status.phase == "Pending")
127+ | .metadata.name
52128 '
53- ) | xargs --max-args 1 --no-run-if-empty --max-lines \
54- sh -c ' printf "\nPending user pod detected ($0)\n - Logs of deploy/user-scheduler:\n"; kubectl logs --all-containers deploy/user-scheduler'
129+ )
130+ if [ -n " $PENDING_USER_PODS " ]; then
131+ printf " $format_error " " ## Pending pods detected!"
132+ echo " $PENDING_USER_PODS " | xargs --max-args=1 echo -
133+
134+ printf " $format_error " " ### \$ kubectl logs --all-containers deploy/user-scheduler"
135+ kubectl logs --all-containers deploy/user-scheduler || echo # a newline on failure for consistency with non-failure
136+ fi
137+
138+ # show container logs of all important workloads passed to the function,
139+ # "deploy/hub" and "deploy/proxy" for example.
140+ if [ " $# " -gt 0 ]; then
141+ printf " $format " " ## Important workload's logs"
142+ echo " $@ " | xargs --max-args=1 echo -
143+
144+ for var in " $@ " ; do
145+ printf " $format " " ### \$ kubectl logs --all-containers $var "
146+ kubectl logs --all-containers $var || echo # a newline on failure for consistency with non-failure
147+ done
148+ fi
55149}
0 commit comments