Skip to content

Commit 84fc257

Browse files
authored
Merge pull request #1238 from consideRatio/pr/full_namespace_report-update
ci: sync full_namespace_report from z2jh
2 parents f76e37e + 89268a9 commit 84fc257

File tree

2 files changed

+119
-24
lines changed

2 files changed

+119
-24
lines changed

.github/workflows/test.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,10 @@ jobs:
160160
- name: Kubernetes namespace report
161161
if: ${{ always() }}
162162
run: |
163-
# Display debugging information
163+
# Display debugging information and always provide the logs from
164+
# certain important k8s deployments.
164165
. ci/common
165-
full_namespace_report
166+
full_namespace_report deploy/binder deploy/hub deploy/proxy
166167
- name: Upload coverage stats
167168
uses: codecov/codecov-action@v1
168169
if: ${{ always() }}

ci/common

Lines changed: 116 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,34 +22,128 @@ await_binderhub() {
2222
kubectl rollout status --watch --timeout 300s deployment/binder
2323
}
2424

25-
full_namespace_report() {
26-
# list config (secret,configmap)
27-
kubectl get secret,cm
28-
# list networking (service,ingress)
29-
kubectl get svc,ing
25+
full_namespace_report () {
26+
# This was copied from z2jh 2021-01-06. Work to make it a dedicated GitHub
27+
# action and avoid a duplicated code base is planned. / @consideRatio
28+
# ------------------------------------------------------------------------
29+
#
30+
# Purpose:
31+
# - To chart agnostically print relevant information of the resources in a
32+
# namespace.
33+
#
34+
# Arguments:
35+
# - Accepts a sequence of arguments such as "deploy/hub" "deploy/proxy". It
36+
# will do `kubectl logs --all-containers <arg>` on them.
37+
#
38+
# Relevant references:
39+
# - ContainerStatus ref: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#containerstatus-v1-core
40+
41+
# printf formatting: a bold bold colored topic with a divider.
42+
yellow=33
43+
red=31
44+
divider='--------------------------------------------------------------------------------'
45+
# GitHub Workflows resets formatting after \n, so its reapplied.
46+
export format="\n\033[${yellow};1m%s\n\033[${yellow};1m${divider}\033[0m\n"
47+
export format_error="\n\033[${red};1m%s\n\033[${red};1m${divider}\033[0m\n"
48+
49+
printf "$format" "# Full namespace report"
50+
printf "$format" "## Resource overview"
3051
# list workloads (deployment,statefulset,daemonset,pod)
52+
printf "$format" "### \$ kubectl get deploy,sts,ds,pod"
3153
kubectl get deploy,sts,ds,pod
54+
# list config (secret,configmap)
55+
printf "$format" "### \$ kubectl get secret,cm"
56+
kubectl get secret,cm
57+
# list networking (service,ingress,networkpolicy)
58+
printf "$format" "### \$ kubectl get svc,ing,netpol"
59+
kubectl get svc,ing,netpol
60+
# list rbac (serviceaccount,role,rolebinding)
61+
printf "$format" "### \$ kubectl get sa,role,rolebinding"
62+
kubectl get sa,role,rolebinding
63+
64+
# Check if any container of any pod has a non ready status
65+
PODS_WITH_NON_READY_CONTAINER=$(
66+
kubectl get pods -o json \
67+
| jq -r '
68+
.items[]
69+
| select(
70+
any(.status.initContainerStatuses[]?; .ready == false)
71+
or
72+
any(.status.containerStatuses[]?; .ready == false)
73+
)
74+
| .metadata.name
75+
'
76+
)
77+
if [ -n "$PODS_WITH_NON_READY_CONTAINER" ]; then
78+
printf "$format_error" "## Pods with non-ready container(s) detected!"
79+
echo "$PODS_WITH_NON_READY_CONTAINER" | xargs --max-args=1 echo -
3280

33-
# if any pod has any non-ready -> show its containers' logs
34-
kubectl get pods -o json \
35-
| jq '
36-
.items[]
37-
| select(
38-
any(.status.containerStatuses[]?; .ready == false)
39-
)
40-
| .metadata.name' \
41-
| xargs --max-args 1 --no-run-if-empty \
42-
sh -c 'printf "\nPod with non-ready container detected\n - Logs of $0:\n"; kubectl logs --all-containers $0'
81+
for var in $PODS_WITH_NON_READY_CONTAINER; do
82+
printf "$format_error" "### \$ kubectl describe pod/$var"
83+
kubectl describe pod/$var
84+
printf "$format_error" "### \$ kubectl logs --all-containers pod/$var"
85+
kubectl logs --all-containers pod/$var || echo # a newline on failure for consistency with non-failure
86+
done
87+
88+
fi
89+
90+
# Check if any container of any pod has a restartCount > 0. Then, we inspect
91+
# their logs with --previous. We also add --follow and --ignore-errors in
92+
# order to ensure we get the information from all containers, and combined
93+
# with --previous it will exit and not get stuck.
94+
#
95+
# ref: https://github.com/kubernetes/kubernetes/issues/97530
96+
PODS_WITH_RESTARTED_CONTAINERS=$(
97+
kubectl get pods -o json \
98+
| jq -r '
99+
.items[]
100+
| select(
101+
any(.status.initContainerStatuses[]?; .restartCount > 0)
102+
or
103+
any(.status.containerStatuses[]?; .restartCount > 0)
104+
)
105+
| .metadata.name
106+
'
107+
)
108+
if [ -n "$PODS_WITH_RESTARTED_CONTAINERS" ]; then
109+
printf "$format_error" "## Pods with restarted containers detected!"
110+
echo "$PODS_WITH_RESTARTED_CONTAINERS" | xargs --max-args=1 echo -
111+
112+
for var in $PODS_WITH_RESTARTED_CONTAINERS; do
113+
printf "$format_error" "### \$ kubectl describe pod/$var"
114+
kubectl describe pod/$var
115+
printf "$format_error" "### \$ kubectl logs --previous --all-containers --follow --ignore-errors pod/$var"
116+
kubectl logs --previous --all-containers --follow --ignore-errors pod/$var
117+
done
118+
fi
43119

44120
# if any pods that should be scheduled by the user-scheduler are pending ->
45121
# show user-scheduler's logs
46-
(
122+
PENDING_USER_PODS=$(
47123
kubectl get pods -l "component in (user-placeholder,singleuser-server)" -o json \
48-
| jq -r '
49-
.items[]
50-
| select(.status.phase == "Pending")
51-
| .metadata.name
124+
| jq -r '
125+
.items[]
126+
| select(.status.phase == "Pending")
127+
| .metadata.name
52128
'
53-
) | xargs --max-args 1 --no-run-if-empty --max-lines \
54-
sh -c 'printf "\nPending user pod detected ($0)\n - Logs of deploy/user-scheduler:\n"; kubectl logs --all-containers deploy/user-scheduler'
129+
)
130+
if [ -n "$PENDING_USER_PODS" ]; then
131+
printf "$format_error" "## Pending pods detected!"
132+
echo "$PENDING_USER_PODS" | xargs --max-args=1 echo -
133+
134+
printf "$format_error" "### \$ kubectl logs --all-containers deploy/user-scheduler"
135+
kubectl logs --all-containers deploy/user-scheduler || echo # a newline on failure for consistency with non-failure
136+
fi
137+
138+
# show container logs of all important workloads passed to the function,
139+
# "deploy/hub" and "deploy/proxy" for example.
140+
if [ "$#" -gt 0 ]; then
141+
printf "$format" "## Important workload's logs"
142+
echo "$@" | xargs --max-args=1 echo -
143+
144+
for var in "$@"; do
145+
printf "$format" "### \$ kubectl logs --all-containers $var"
146+
kubectl logs --all-containers $var || echo # a newline on failure for consistency with non-failure
147+
done
148+
fi
55149
}

0 commit comments

Comments
 (0)