Skip to content

Commit b54b2b7

Browse files
committed
improve output, fix mode pods to report all issues
1 parent fc3cc6e commit b54b2b7

File tree

1 file changed

+23
-15
lines changed

1 file changed

+23
-15
lines changed

check_kubernetes.sh

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ mode_apiserver() {
144144
data=$(getJSON "" "healthz")
145145
[ $? -gt 0 ] && die "$data"
146146
if [ "$data" = ok ]; then
147-
OUTPUT="OK. Kubernetes apiserver health is OK"
147+
OUTPUT="OK. Kubernetes apiserver is healthy"
148148
EXITCODE=0
149149
else
150150
data=$(echo "$data" | grep "\[\-\]")
@@ -164,15 +164,15 @@ mode_nodes() {
164164
.status")"
165165
if [ "$ready" != True ]; then
166166
EXITCODE=2
167-
OUTPUT="${OUTPUT}Node $node not ready. "
167+
OUTPUT="ERROR. ${OUTPUT}Node $node not ready\n"
168168
fi
169169
for condition in OutOfDisk MemoryPressure DiskPressure; do
170170
state="$(echo "$data" | jq -r ".items[] | select(.metadata.name==\"$node\") | \
171171
.status.conditions[] | select(.type==\"$condition\") | \
172172
.status")"
173173
if [ "$state" = True ]; then
174174
[ $EXITCODE -lt 1 ] && EXITCODE=1
175-
OUTPUT="$OUTPUT $node $condition."
175+
OUTPUT="WARN. ${OUTPUT} $node $condition\n"
176176
fi
177177
done
178178
done
@@ -182,7 +182,7 @@ mode_nodes() {
182182
OUTPUT="No nodes found"
183183
EXITCODE="$MISSING_EXITCODE"
184184
else
185-
OUTPUT="OK. ${#nodes[@]} nodes are Ready"
185+
OUTPUT="OK. ${#nodes[@]} nodes are ready"
186186
fi
187187
fi
188188
}
@@ -420,22 +420,30 @@ mode_pods() {
420420
else
421421
((count_failed++))
422422
fi
423+
if [ "$restart_count" -ge "$WARN" ]; then
424+
OUTPUT="${OUTPUT}Container $bad_container: $restart_count restarts.\n"
425+
EXITCODE=1
426+
if [ "$restart_count" -ge "$CRIT" ]; then
427+
EXITCODE=2
428+
fi
429+
fi
423430
done
424431
done
425432

433+
if [ $EXITCODE = 0 ]; then
426434
if [ -z "$ns" ]; then
427435
OUTPUT="No pods found"
428436
EXITCODE="$MISSING_EXITCODE"
429437
else
430-
if [ "$max_restart_count" -ge "$WARN" ]; then
431-
OUTPUT="Container $bad_container: $max_restart_count restarts. "
432-
EXITCODE=1
433-
if [ "$max_restart_count" -ge "$CRIT" ]; then
434-
EXITCODE=2
438+
OUTPUT="OK. $count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready\n${OUTPUT}"
439+
fi
440+
else
441+
if [ $EXITCODE = 1 ]; then
442+
OUTPUT="WARNING. $count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready\n${OUTPUT}"
443+
else
444+
OUTPUT="ERROR. $count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready\n${OUTPUT}"
435445
fi
436446
fi
437-
OUTPUT="$OUTPUT$count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready"
438-
fi
439447
}
440448

441449
mode_deployments() {
@@ -647,7 +655,7 @@ mode_statefulsets() {
647655
done < <(echo "$data" | \
648656
jq -r ".items[] | select(.metadata.namespace==\"$ns\" and .metadata.name==\"$rs\") | \
649657
.status | to_entries | map(\"\(.key)=\(.value)\") | .[]")
650-
OUTPUT="Statefulset $ns/$rs ${statusArr[readyReplicas]}/${statusArr[currentReplicas]} ready"
658+
OUTPUT="${OUTPUT}Statefulset $ns/$rs ${statusArr[readyReplicas]}/${statusArr[currentReplicas]} ready\n"
651659
if [ "${statusArr[readyReplicas]}" != "${statusArr[currentReplicas]}" ]; then
652660
((count_failed++))
653661
EXITCODE=2
@@ -711,7 +719,7 @@ mode_jobs() {
711719
job_fail_count=$(echo "$data" | jq -r ".items[] | select(.status.failed and .metadata.name==\"$job\") | .status.failed")
712720
total_failed_count="$((total_failed_count+job_fail_count))"
713721
if [ "$job_fail_count" -ge "${WARN}" ]; then
714-
OUTPUT="${OUTPUT}Job $job has $job_fail_count failures. "
722+
OUTPUT="${OUTPUT}Job $job has $job_fail_count failures\n"
715723
EXITCODE=1
716724
elif [ "$job_fail_count" -ge "${CRIT}" ]; then
717725
EXITCODE=2
@@ -728,7 +736,7 @@ mode_jobs() {
728736
if [ -z "$ns" ]; then
729737
OUTPUT="No jobs found"
730738
else
731-
OUTPUT="OK. $total_jobs checked. ${total_failed_count} failed jobs is below threshold"
739+
OUTPUT="OK. $total_jobs checked. ${total_failed_count} failed jobs is below threshold\n"
732740
fi
733741
else
734742
if [ "$EXITCODE" -eq 1 ] ; then
@@ -737,7 +745,7 @@ mode_jobs() {
737745
OUTPUT="CRITICAL. ${OUTPUT}"
738746
fi
739747
if [ -z "$NAME" ] && [ "$EXITCODE" -ge 1 ] ; then
740-
OUTPUT="${OUTPUT}${total_failed_count} jobs in total have failed"
748+
OUTPUT="${OUTPUT}${total_failed_count} jobs have failed"
741749
fi
742750
fi
743751
}

0 commit comments

Comments
 (0)