@@ -16,7 +16,6 @@ usage() {
16
16
17
17
Options are:
18
18
-m MODE Which check to perform
19
- -M EXIT_CODE Override default exit code when resource is missing
20
19
-H APISERVER API URL to query, kubectl is used if this option is not set
21
20
-T TOKEN Authorization token for API
22
21
-t TOKENFILE Path to file with token in it
@@ -33,15 +32,16 @@ usage() {
33
32
- Unbound Persistent Volumes in unboundpvs mode; default is 5
34
33
- Job failed count in jobs mode; default is 2
35
34
-b Brief mode (more suitable for Zabbix)
35
+ -M EXIT_CODE Exit code when resource is missing; default is 2 (CRITICAL)
36
36
-h Show this help and exit
37
37
38
38
Modes are:
39
39
apiserver Not for kubectl, should be used for each apiserver independently
40
40
components Check for health of k8s components (etcd, controller-manager, scheduler etc.)
41
+ nodes Check for active nodes
41
42
daemonsets Check for daemonsets readiness
42
43
deployments Check for deployments availability
43
44
jobs Check for failed jobs
44
- nodes Check for active nodes
45
45
pods Check for restart count of containters in the pods
46
46
replicasets Check for replicasets readiness
47
47
statefulsets Check for statefulsets readiness
@@ -68,7 +68,7 @@ while getopts ":m:M:H:T:t:K:N:n:o:c:w:bh" arg; do
68
68
case $arg in
69
69
h) usage ;;
70
70
m) MODE=" $OPTARG " ;;
71
- M) MISSING =" ${OPTARG} " ;;
71
+ M) MISSING_EXITCODE =" ${OPTARG} " ;;
72
72
o) TIMEOUT=" ${OPTARG} " ;;
73
73
H) APISERVER=" ${OPTARG%/ } " ;;
74
74
T) TOKEN=" $OPTARG " ;;
@@ -84,6 +84,7 @@ while getopts ":m:M:H:T:t:K:N:n:o:c:w:bh" arg; do
84
84
done
85
85
86
86
[ -z " $MODE " ] && usage
87
+ MISSING_EXITCODE=" ${MISSING_EXITCODE:- 2} "
87
88
88
89
if [ " $APISERVER " ]; then
89
90
[ -z " $TOKEN " ] && [ -z " $TOKENFILE " ] && usage
@@ -182,7 +183,7 @@ mode_nodes() {
182
183
if [ $EXITCODE = 0 ]; then
183
184
if [ -z " ${nodes[*]} " ]; then
184
185
OUTPUT=" No nodes found"
185
- [ -z ${MISSING} ] && EXITCODE=2 || EXITCODE= ${MISSING}
186
+ EXITCODE=" $MISSING_EXITCODE "
186
187
else
187
188
OUTPUT=" OK. ${# nodes[@]} nodes are Ready"
188
189
BRIEF_OUTPUT=" ${# nodes[@]} "
@@ -215,7 +216,7 @@ mode_components() {
215
216
if [ $EXITCODE = 0 ]; then
216
217
if [ -z " ${components[*]} " ]; then
217
218
OUTPUT=" No components found"
218
- [ -z ${MISSING} ] && EXITCODE=2 || EXITCODE= ${MISSING}
219
+ EXITCODE=" $MISSING_EXITCODE "
219
220
else
220
221
OUTPUT=" OK. Healthy: $healthy_comps "
221
222
fi
@@ -314,7 +315,7 @@ mode_tls() {
314
315
if [ $EXITCODE = 0 ]; then
315
316
if [ -z " $ns " ]; then
316
317
OUTPUT=" No TLS certs found"
317
- [ -z ${MISSING} ] && EXITCODE=2 || EXITCODE= ${MISSING}
318
+ EXITCODE=" $MISSING_EXITCODE "
318
319
else
319
320
if [ $count_ok -gt 1 ]; then
320
321
OUTPUT=" OK. $count_ok TLS secrets are OK"
@@ -399,7 +400,7 @@ mode_pods() {
399
400
400
401
if [ -z " $ns " ]; then
401
402
OUTPUT=" No pods found"
402
- [ -z ${MISSING} ] && EXITCODE=2 || EXITCODE= ${MISSING}
403
+ EXITCODE=" $MISSING_EXITCODE "
403
404
else
404
405
if [ " $max_restart_count " -ge " $WARN " ]; then
405
406
OUTPUT=" Container $bad_container : $max_restart_count restarts. "
@@ -450,7 +451,7 @@ mode_deployments() {
450
451
if [ $EXITCODE = 0 ]; then
451
452
if [ -z " $ns " ]; then
452
453
OUTPUT=" No deployments found"
453
- [ -z ${MISSING} ] && EXITCODE=2 || EXITCODE= ${MISSING}
454
+ EXITCODE=" $MISSING_EXITCODE "
454
455
else
455
456
if [ $count_avail -gt 1 ]; then
456
457
OUTPUT=" OK. $count_avail deploymens are available"
@@ -495,7 +496,7 @@ mode_daemonsets() {
495
496
select(.metadata.namespace==\" $ns \" and .metadata.name==\" $ds \" ) | \
496
497
.status | to_entries | map(\" \(.key)=\(.value)\" ) | \
497
498
.[]" )
498
- if [ $EXITCODE == 0 ]; then
499
+ if [ " $EXITCODE " == 0 ]; then
499
500
OUTPUT=" Daemonset $ns /$ds ${statusArr[numberReady]} /${statusArr[desiredNumberScheduled]} ready"
500
501
fi
501
502
if [ " ${statusArr[numberReady]} " != " ${statusArr[desiredNumberScheduled]} " ]; then
@@ -511,7 +512,7 @@ mode_daemonsets() {
511
512
if [ $EXITCODE = 0 ]; then
512
513
if [ -z " $ns " ]; then
513
514
OUTPUT=" No daemonsets found"
514
- [ -z ${MISSING} ] && EXITCODE=2 || EXITCODE= ${MISSING}
515
+ EXITCODE=" $MISSING_EXITCODE "
515
516
else
516
517
if [ $count_avail -gt 1 ]; then
517
518
OUTPUT=" OK. $count_avail daemonsets are ready"
@@ -573,7 +574,7 @@ mode_replicasets() {
573
574
if [ $EXITCODE = 0 ]; then
574
575
if [ -z " $ns " ]; then
575
576
OUTPUT=" No replicasets found"
576
- [ -z ${MISSING} ] && EXITCODE=2 || EXITCODE= ${MISSING}
577
+ EXITCODE=" $MISSING_EXITCODE "
577
578
else
578
579
if [ $count_avail -gt 1 ]; then
579
580
OUTPUT=" OK. $count_avail replicasets are ready"
@@ -636,7 +637,7 @@ mode_statefulsets() {
636
637
if [ $EXITCODE = 0 ]; then
637
638
if [ -z " $ns " ]; then
638
639
OUTPUT=" No statefulsets found"
639
- [ -z ${MISSING} ] && EXITCODE=2 || EXITCODE= ${MISSING}
640
+ EXITCODE=" $MISSING_EXITCODE "
640
641
else
641
642
if [ $count_avail -gt 1 ]; then
642
643
OUTPUT=" OK. $count_avail statefulsets are ready"
@@ -684,8 +685,8 @@ mode_jobs() {
684
685
fi
685
686
for job in " ${jobs[@]} " ; do
686
687
(( total_jobs++ ))
687
- job_fail_count=$( echo $data | jq -r " .items[] | select(.status.failed and .metadata.name==\" $job \" ) | .status.failed" )
688
- let " total_failed_count= $ total_failed_count + $ job_fail_count"
688
+ job_fail_count=$( echo " $data " | jq -r " .items[] | select(.status.failed and .metadata.name==\" $job \" ) | .status.failed" )
689
+ total_failed_count=" $(( total_failed_count+ job_fail_count)) "
689
690
if [ " $job_fail_count " -ge " ${WARN} " ]; then
690
691
OUTPUT=" ${OUTPUT} Job $job has $job_fail_count failures. "
691
692
EXITCODE=1
0 commit comments