Skip to content

Commit 63fbd40

Browse files
committed
Add pvc utilization
1 parent 2a7866a commit 63fbd40

File tree

1 file changed

+66
-2
lines changed

1 file changed

+66
-2
lines changed

check_kubernetes.sh

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@ usage() {
2727
- TLS expiration days for TLS mode; default is 30
2828
- Pod restart count in pods mode; default is 30
2929
- Job failed count in jobs mode; default is 1
30+
- Pvc storage utilization; default is 80%
3031
-c CRIT Critical threshold for
3132
- Pod restart count (in pods mode); default is 150
3233
- Unbound Persistent Volumes in unboundpvs mode; default is 5
3334
- Job failed count in jobs mode; default is 2
35+
- Pvc storage utilization; default is 90%
3436
-b Brief mode (more suitable for Zabbix)
3537
-M EXIT_CODE Exit code when resource is missing; default is 2 (CRITICAL)
3638
-h Show this help and exit
@@ -45,7 +47,8 @@ usage() {
4547
replicasets Check for replicasets readiness
4648
statefulsets Check for statefulsets readiness
4749
tls Check for tls secrets expiration dates
48-
unboundpvs Check for unbound persistent volumes.
50+
pvc Check for pvc utilization
51+
unboundpvs Check for unbound persistent volumes
4952
components Check for health of k8s components (deprecated in K8s 1.19+)
5053
EOF
5154

@@ -258,6 +261,66 @@ mode_unboundpvs() {
258261
fi
259262
}
260263

264+
mode_pvc() {
265+
WARN=${WARN:-80}
266+
CRIT=${CRIT:-90}
267+
WARN_ERROR=0
268+
CRIT_ERROR=0
269+
data="$(getJSON "get nodes" "api/v1/nodes")"
270+
[ $? -gt 0 ] && die "$data"
271+
nodes=($(echo "$data" | jq -r ".items[].metadata.name"))
272+
273+
for node in "${nodes[@]}"; do
274+
data="$(getJSON "get nodes" "api/v1/nodes/$node/proxy/stats/summary")"
275+
[ $? -gt 0 ] && die "$data"
276+
pods=($(echo "$data" | jq -r ".pods[].podRef.name"))
277+
278+
for pod in "${pods[@]}"; do
279+
pod_volumes="$(echo "$data" | jq -r ".pods[] | select(.podRef.name==\"$pod\") | .volume" 2>/dev/null)"
280+
[ "$pod_volumes" == "null" ] && continue
281+
for volumes in "${pod_volumes[@]}"; do
282+
volumes_list="$(echo "$volumes" | jq -r ".[] | select(.pvcRef.name!=null)")"
283+
volumes_namespace=$(echo "$volumes_list" | jq -r ".pvcRef.namespace" | uniq)
284+
for pvc_volumes in "${volumes_list[@]}"; do
285+
[ "x$pvc_volumes" == "x" ] && continue
286+
for volume_name in $(echo $pvc_volumes | jq -r ".name"); do
287+
volume_bytes_available=$(echo "$pvc_volumes" | jq -r ". | select(.name==\"$volume_name\") | .availableBytes")
288+
volume_bytes_capacity=$(echo "$pvc_volumes" | jq -r ". | select(.name==\"$volume_name\") | .capacityBytes")
289+
volume_bytes_used=$(echo "$pvc_volumes" | jq -r ". | select(.name==\"$volume_name\") | .usedBytes")
290+
volume_inodes_free=$(echo "$pvc_volumes" | jq -r ". | select(.name==\"$volume_name\") | .inodesFree")
291+
volume_inodes_used=$(echo "$pvc_volumes" | jq -r ". | select(.name==\"$volume_name\") | .inodesUsed")
292+
volume_inodes_capacity=$(echo "$pvc_volumes" | jq -r ". | select(.name==\"$volume_name\") | .inodes")
293+
volume_bytes_utilization=$(echo "100 * $volume_bytes_used / $volume_bytes_capacity" | bc)
294+
volume_inodes_utilization=$(echo "100 * $volume_inodes_used / $volume_inodes_capacity" | bc)
295+
if [ "$volume_bytes_utilization" -gt "$WARN" -a "$volume_bytes_utilization" -lt "$CRIT" ]; then
296+
echo "WARNING. High storage utilization on pvc $volume_name (namespace:$volumes_namespace): $volume_bytes_utilization% ($volume_bytes_used/$volume_bytes_capacity Bytes)" && WARN_ERROR=$(($WARN_ERROR+1))
297+
fi
298+
if [ "$volume_bytes_utilization" -gt "$CRIT" ]; then
299+
echo "CRITICAL. Very high storage utilization on pvc $volume_name: $volume_bytes_utilization% ($volume_bytes_used/$volume_bytes_capacity Bytes)" && CRIT_ERROR=$(($CRIT_ERROR+1))
300+
fi
301+
if [ "$volume_inodes_utilization" -gt "$WARN" -a "$volume_inodes_utilization" -lt "$CRIT" ]; then
302+
echo "WARNING. High inodes utilization on pvc $volume_name: $volume_inodes_utilization% ($volume_inodes_used/$volume_inodes_capacity)" && WARN_ERROR=$(($WARN_ERROR+1))
303+
fi
304+
if [ "$volume_inodes_utilization" -gt "$CRIT" ]; then
305+
echo "CRITICAL. Very high inodes utilization on pvc $volume_name: $volume_inodes_utilization% ($volume_inodes_used/$volume_inodes_capacity)" && CRIT_ERROR=$(($CRIT_ERROR+1))
306+
fi
307+
done
308+
done
309+
done
310+
done
311+
done
312+
313+
if [ "$WARN_ERROR" -eq "0" -a "$CRIT_ERROR" -eq "0" ]; then
314+
echo "OK. No problem on pvc storage"
315+
elif [ "$WARN_ERROR" -ne "0" -a "$CRIT_ERROR" -eq "0" ]; then
316+
exit 1
317+
elif [ "$CRIT_ERROR" -ne "0" ]; then
318+
exit 2
319+
else
320+
exit 3
321+
fi
322+
}
323+
261324
mode_tls() {
262325
WARN=${WARN:-30}
263326

@@ -388,7 +451,7 @@ mode_pods() {
388451
elif [ "$count_status" == "Succeeded" ]; then
389452
((count_succeeded++))
390453
else
391-
((count_failed++))
454+
((count_failed++))
392455
fi
393456
done
394457
done
@@ -734,6 +797,7 @@ case "$MODE" in
734797
(statefulsets) mode_statefulsets ;;
735798
(tls) mode_tls ;;
736799
(jobs) mode_jobs ;;
800+
(pvc) mode_pvc ;;
737801
(*) usage ;;
738802
esac
739803

0 commit comments

Comments
 (0)