Skip to content

Commit 2496df9

Browse files
committed
K8S-270 consider container status in health-check
1 parent 7d202b6 commit 2496df9

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

scripts/check-install.sh

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Options:
1010
-r=, --remote-api= check remote api availability, defaults to false
1111
-s=, --storage= check NFS storage, defaults to false
1212
-j=, --jaeger= check Jaeger installation
13-
-app=, --sample-app check either defualt Hello World app (cc) or a custom syntax [cmd], defaults to cc
13+
-app=, --sample-app check either default Hello World app (cc) or a custom syntax [cmd], defaults to cc
1414
-h, --help show this help
1515
"
1616

@@ -378,6 +378,11 @@ checkNfsStorage() {
378378
printInfo "Checking status on NFS provisioner pods"
379379
END="2"
380380
for ((i=0;i<=END;i++)); do
381+
NODENAME=$(kubectl get pods -l=app=nfs-client-provisioner -n default -o jsonpath="{.items[$i].spec.nodeName}" 2> /dev/null)
382+
if [ $? -ne 0 ]; then
383+
printWarning "Failed to get node name because of array index out of bounds"
384+
break
385+
fi
381386
PODNAME=$(kubectl get pods -l=app=nfs-client-provisioner -n default -o jsonpath="{.items[$i].metadata.name}" 2> /dev/null)
382387
if [ $? -ne 0 ]; then
383388
printError "Failed to find NFS provisioner pods. NFS privioners may have failed or has not been deployed"
@@ -386,9 +391,11 @@ checkNfsStorage() {
386391
NFS_STORAGE_STATUS="FAIL"
387392
break
388393
fi
389-
STATUS=$(kubectl get pods -l=app=nfs-client-provisioner -n default -o jsonpath="{.items[$i].status.phase}" 2> /dev/null)
390-
if [ "$STATUS" != "Running" ]; then
391-
printError "Failed pod ${PODNAME} with status $STATUS"
394+
POD_STATUS=$(kubectl get pods -l=app=nfs-client-provisioner -n default -o jsonpath="{.items[$i].status.phase}" 2> /dev/null)
395+
CT_STATUS=$(kubectl get pods -l=app=nfs-client-provisioner -n default -o jsonpath="{.items[$i].status.containerStatuses[0].ready}" 2> /dev/null)
396+
printInfo "Checking nfs-client-provisioner pod status on Node $NODENAME"
397+
if [ "$POD_STATUS" != "Running" ] || [ "$CT_STATUS" != "true" ]; then
398+
printError "Failed pod ${PODNAME} with pod_status $POD_STATUS and ct_status $CT_STATUS"
392399
kubectl logs ${PODNAME} -n default > ${K8S_LOG_DIR}/${PODNAME}.log
393400
printError "Check logs in ${K8S_LOG_DIR}/${PODNAME}.log"
394401
WITH_ERROR="true"
@@ -509,10 +516,11 @@ checkNodeProblemDetector(){
509516
break
510517
fi
511518
PODNAME=$(kubectl get pods -l=app=node-problem-detector -n default -o jsonpath="{.items[$i].metadata.name}" 2> /dev/null)
512-
STATUS=$(kubectl get pods -l=app=node-problem-detector -n default -o jsonpath="{.items[$i].status.phase}" 2> /dev/null)
519+
POD_STATUS=$(kubectl get pods -l=app=node-problem-detector -n default -o jsonpath="{.items[$i].status.phase}" 2> /dev/null)
520+
CT_STATUS=$(kubectl get pods -l=app=node-problem-detector -n default -o jsonpath="{.items[$i].status.containerStatuses[0].ready}" 2> /dev/null)
513521
printInfo "Checking node-problem-detector pod status on Node $NODENAME"
514-
if [ "$STATUS" != "Running" ]; then
515-
printError "Failed node-problem-detector pod ${PODNAME} on $NODENAME with status $STATUS"
522+
if [ "$POD_STATUS" != "Running" ] || [ "$CT_STATUS" != "true" ]; then
523+
printError "Failed node-problem-detector pod ${PODNAME} on $NODENAME with pod_status $POD_STATUS and ct_status $CT_STATUS"
516524
writeLog "kubectl logs ${PODNAME} -n default" "${K8S_LOG_DIR}/${PODNAME}.log"
517525
printError "Check logs in ${K8S_LOG_DIR}/${PODNAME}.log"
518526
WITH_ERROR="true"

0 commit comments

Comments
 (0)