@@ -33,7 +33,6 @@ usage() {
33
33
- Unbound Persistent Volumes in unboundpvs mode; default is 5
34
34
- Job failed count in jobs mode; default is 2
35
35
- Pvc storage utilization; default is 90%
36
- -b Brief mode (more suitable for Zabbix)
37
36
-M EXIT_CODE Exit code when resource is missing; default is 2 (CRITICAL)
38
37
-h Show this help and exit
39
38
@@ -49,25 +48,20 @@ usage() {
49
48
tls Check for tls secrets expiration dates
50
49
pvc Check for pvc utilization
51
50
unboundpvs Check for unbound persistent volumes
52
- components Check for health of k8s components (deprecated in K8s 1.19+)
53
51
EOF
54
52
55
53
exit 2
56
54
}
57
55
58
- BRIEF=0
59
56
TIMEOUT=15
57
+ unset NAME
60
58
61
59
die () {
62
- if [ " $BRIEF " = 1 ]; then
63
- echo " -1"
64
- else
65
60
echo " $1 "
66
- fi
67
61
exit " ${2:- 2} "
68
62
}
69
63
70
- while getopts " :m:M:H:T:t:K:N:n:o:c:w:bh " arg; do
64
+ while getopts " :m:M:H:T:t:K:N:n:o:c:w:h " arg; do
71
65
case $arg in
72
66
h) usage ;;
73
67
m) MODE=" $OPTARG " ;;
@@ -81,7 +75,6 @@ while getopts ":m:M:H:T:t:K:N:n:o:c:w:bh" arg; do
81
75
n) NAME=" $OPTARG " ;;
82
76
w) WARN=" $OPTARG " ;;
83
77
c) CRIT=" $OPTARG " ;;
84
- b) BRIEF=1 ;;
85
78
* ) usage ;;
86
79
esac
87
80
done
@@ -151,7 +144,7 @@ mode_apiserver() {
151
144
data=$( getJSON " " " healthz" )
152
145
[ $? -gt 0 ] && die " $data "
153
146
if [ " $data " = ok ]; then
154
- OUTPUT=" OK. Kubernetes apiserver health is OK "
147
+ OUTPUT=" OK. Kubernetes apiserver is healthy "
155
148
EXITCODE=0
156
149
else
157
150
data=$( echo " $data " | grep " \[\-\]" )
@@ -171,15 +164,15 @@ mode_nodes() {
171
164
.status" ) "
172
165
if [ " $ready " != True ]; then
173
166
EXITCODE=2
174
- OUTPUT=" ${OUTPUT} Node $node not ready. "
167
+ OUTPUT=" ERROR. ${OUTPUT} Node $node not ready\n "
175
168
fi
176
169
for condition in OutOfDisk MemoryPressure DiskPressure; do
177
170
state=" $( echo " $data " | jq -r " .items[] | select(.metadata.name==\" $node \" ) | \
178
171
.status.conditions[] | select(.type==\" $condition \" ) | \
179
172
.status" ) "
180
173
if [ " $state " = True ]; then
181
174
[ $EXITCODE -lt 1 ] && EXITCODE=1
182
- OUTPUT=" $ OUTPUT $node $condition . "
175
+ OUTPUT=" WARN. ${ OUTPUT} $node $condition \n "
183
176
fi
184
177
done
185
178
done
@@ -189,43 +182,8 @@ mode_nodes() {
189
182
OUTPUT=" No nodes found"
190
183
EXITCODE=" $MISSING_EXITCODE "
191
184
else
192
- OUTPUT=" OK. ${# nodes[@]} nodes are Ready"
193
- BRIEF_OUTPUT=" ${# nodes[@]} "
185
+ OUTPUT=" OK. ${# nodes[@]} nodes are ready"
194
186
fi
195
- else
196
- BRIEF_OUTPUT=" -1"
197
- fi
198
- }
199
-
200
- mode_components () {
201
- healthy_comps=" "
202
- unhealthy_comps=" "
203
- data=" $( getJSON " get cs" " api/v1/componentstatuses" ) "
204
- [ $? -gt 0 ] && die " $data "
205
- components=($( echo " $data " | jq -r " .items[].metadata.name" ) )
206
-
207
- for comp in " ${components[@]} " ; do
208
- healthy=$( echo " $data " | jq -r " .items[] | select(.metadata.name==\" $comp \" ) | \
209
- .conditions[] | select(.type==\" Healthy\" ) | \
210
- .status" )
211
- if [ " $healthy " != True ]; then
212
- EXITCODE=2
213
- unhealthy_comps=" $unhealthy_comps $comp "
214
- else
215
- healthy_comps=" $healthy_comps $comp "
216
- fi
217
- done
218
-
219
- BRIEF_OUTPUT=" $healthy_comps "
220
- if [ $EXITCODE = 0 ]; then
221
- if [ -z " ${components[*]} " ]; then
222
- OUTPUT=" No components found"
223
- EXITCODE=" $MISSING_EXITCODE "
224
- else
225
- OUTPUT=" OK. Healthy: $healthy_comps "
226
- fi
227
- else
228
- OUTPUT=" CRITICAL. Unhealthy: $unhealthy_comps ; Healthy: $healthy_comps "
229
187
fi
230
188
}
231
189
@@ -246,9 +204,7 @@ mode_unboundpvs() {
246
204
select(.status.phase!=\" Bound\" ) | \
247
205
\" \(.metadata.name):\(.status.phase):\(.spec.claimRef.uid)\" " )
248
206
249
- BRIEF_OUTPUT=" ${# pvsArr[*]} "
250
207
if [ ${# unboundPvsArr[*]} -gt 0 ]; then
251
- BRIEF_OUTPUT=" -${# unboundPvsArr[*]} "
252
208
if [ ${# unboundPvsArr[*]} -ge " $CRIT " ]; then
253
209
OUTPUT=" CRITICAL. Unbound persistentvolumes:\n$OUTPUT "
254
210
EXITCODE=2
@@ -387,7 +343,6 @@ mode_tls() {
387
343
done
388
344
done
389
345
390
- BRIEF_OUTPUT=" $count_ok "
391
346
if [ $EXITCODE = 0 ]; then
392
347
if [ -z " $ns " ]; then
393
348
OUTPUT=" No TLS certs found"
@@ -465,28 +420,30 @@ mode_pods() {
465
420
else
466
421
(( count_failed++ ))
467
422
fi
423
+ if [ " $restart_count " -ge " $WARN " ]; then
424
+ OUTPUT=" ${OUTPUT} Container $bad_container : $restart_count restarts.\n"
425
+ EXITCODE=1
426
+ if [ " $restart_count " -ge " $CRIT " ]; then
427
+ EXITCODE=2
428
+ fi
429
+ fi
468
430
done
469
431
done
470
432
471
- if [ " $max_restart_count " -ge " $WARN " ]; then
472
- BRIEF_OUTPUT=" -$max_restart_count "
473
- else
474
- BRIEF_OUTPUT=" $count_ready "
475
- fi
476
-
433
+ if [ $EXITCODE = 0 ]; then
477
434
if [ -z " $ns " ]; then
478
435
OUTPUT=" No pods found"
479
436
EXITCODE=" $MISSING_EXITCODE "
480
437
else
481
- if [ " $max_restart_count " -ge " $WARN " ]; then
482
- OUTPUT=" Container $bad_container : $max_restart_count restarts. "
483
- EXITCODE=1
484
- if [ " $max_restart_count " -ge " $CRIT " ]; then
485
- EXITCODE=2
438
+ OUTPUT=" OK. $count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready\n${OUTPUT} "
439
+ fi
440
+ else
441
+ if [ $EXITCODE = 1 ]; then
442
+ OUTPUT=" WARNING. $count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready\n${OUTPUT} "
443
+ else
444
+ OUTPUT=" ERROR. $count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready\n${OUTPUT} "
486
445
fi
487
446
fi
488
- OUTPUT=" $OUTPUT$count_ready pods ready, $count_succeeded pods succeeded, $count_failed pods not ready"
489
- fi
490
447
}
491
448
492
449
mode_deployments () {
@@ -525,7 +482,6 @@ mode_deployments() {
525
482
done
526
483
done
527
484
528
- BRIEF_OUTPUT=" $count_avail "
529
485
if [ $EXITCODE = 0 ]; then
530
486
if [ -z " $ns " ]; then
531
487
OUTPUT=" No deployments found"
@@ -586,7 +542,6 @@ mode_daemonsets() {
586
542
done
587
543
done
588
544
589
- BRIEF_OUTPUT=" $count_avail "
590
545
if [ $EXITCODE = 0 ]; then
591
546
if [ -z " $ns " ]; then
592
547
OUTPUT=" No daemonsets found"
@@ -648,7 +603,6 @@ mode_replicasets() {
648
603
done
649
604
done
650
605
651
- BRIEF_OUTPUT=" $count_avail "
652
606
if [ $EXITCODE = 0 ]; then
653
607
if [ -z " $ns " ]; then
654
608
OUTPUT=" No replicasets found"
@@ -701,7 +655,7 @@ mode_statefulsets() {
701
655
done < <( echo " $data " | \
702
656
jq -r " .items[] | select(.metadata.namespace==\" $ns \" and .metadata.name==\" $rs \" ) | \
703
657
.status | to_entries | map(\" \(.key)=\(.value)\" ) | .[]" )
704
- OUTPUT=" Statefulset $ns /$rs ${statusArr[readyReplicas]} /${statusArr[currentReplicas]} ready"
658
+ OUTPUT=" ${OUTPUT} Statefulset $ns /$rs ${statusArr[readyReplicas]} /${statusArr[currentReplicas]} ready\n "
705
659
if [ " ${statusArr[readyReplicas]} " != " ${statusArr[currentReplicas]} " ]; then
706
660
(( count_failed++ ))
707
661
EXITCODE=2
@@ -711,7 +665,6 @@ mode_statefulsets() {
711
665
done
712
666
done
713
667
714
- BRIEF_OUTPUT=" $count_avail "
715
668
if [ $EXITCODE = 0 ]; then
716
669
if [ -z " $ns " ]; then
717
670
OUTPUT=" No statefulsets found"
@@ -766,7 +719,7 @@ mode_jobs() {
766
719
job_fail_count=$( echo " $data " | jq -r " .items[] | select(.status.failed and .metadata.name==\" $job \" ) | .status.failed" )
767
720
total_failed_count=" $(( total_failed_count+ job_fail_count)) "
768
721
if [ " $job_fail_count " -ge " ${WARN} " ]; then
769
- OUTPUT=" ${OUTPUT} Job $job has $job_fail_count failures. "
722
+ OUTPUT=" ${OUTPUT} Job $job has $job_fail_count failures\n "
770
723
EXITCODE=1
771
724
elif [ " $job_fail_count " -ge " ${CRIT} " ]; then
772
725
EXITCODE=2
@@ -783,7 +736,7 @@ mode_jobs() {
783
736
if [ -z " $ns " ]; then
784
737
OUTPUT=" No jobs found"
785
738
else
786
- OUTPUT=" OK. $total_jobs checked. ${total_failed_count} failed jobs is below threshold"
739
+ OUTPUT=" OK. $total_jobs checked. ${total_failed_count} failed jobs is below threshold\n "
787
740
fi
788
741
else
789
742
if [ " $EXITCODE " -eq 1 ] ; then
@@ -792,14 +745,13 @@ mode_jobs() {
792
745
OUTPUT=" CRITICAL. ${OUTPUT} "
793
746
fi
794
747
if [ -z " $NAME " ] && [ " $EXITCODE " -ge 1 ] ; then
795
- OUTPUT=" ${OUTPUT}${total_failed_count} jobs in total have failed"
748
+ OUTPUT=" ${OUTPUT}${total_failed_count} jobs have failed"
796
749
fi
797
750
fi
798
751
}
799
752
800
753
case " $MODE " in
801
754
(apiserver) mode_apiserver ;;
802
- (components) mode_components ;;
803
755
(daemonsets) mode_daemonsets ;;
804
756
(deployments) mode_deployments ;;
805
757
(nodes) mode_nodes ;;
@@ -813,16 +765,6 @@ case "$MODE" in
813
765
(* ) usage ;;
814
766
esac
815
767
816
- if [ " $BRIEF " = 1 ]; then
817
- if [ " $EXITCODE " = 0 ]; then
818
- echo " ${BRIEF_OUTPUT:- 1} "
819
- elif [ -z " $BRIEF_FAIL_OUTPUT " ]; then
820
- echo " ${BRIEF_OUTPUT:- 0} "
821
- else
822
- echo " ${BRIEF_FAIL_OUTPUT} "
823
- fi
824
- else
825
- echo " $OUTPUT "
826
- fi
768
+ printf " $OUTPUT "
827
769
828
770
exit $EXITCODE
0 commit comments