Skip to content

Commit 545e562

Browse files
memodiclaude
andcommitted
Improve daemonset startup diagnostics and increase timeout
- Increase waitDaemonset timeout from 50s to 5 minutes (30×10s) * CI environments often have slow image pulls * Previous timeout was too aggressive for registry operations - Add comprehensive diagnostic output on pod startup failure: * Pod status with node placement (get pods -o wide) * Recent events to identify ImagePullBackOff, etc * Pod event details from describe output * Daemonset logs if containers started This helps diagnose ContainerCreating issues in CI where pods fail to start due to image pull problems or resource constraints. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 86d43d0 commit 545e562

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

scripts/functions.sh

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -780,9 +780,11 @@ function defaultValue() {
780780

781781
function waitDaemonset(){
782782
echo "Waiting for daemonset pods to be ready..."
783-
retries=10
783+
# Increase timeout for CI environments where image pulls can be slow
784+
# 30 retries × 10 seconds = 5 minutes total
785+
retries=30
784786
while [[ $retries -ge 0 ]];do
785-
sleep 5
787+
sleep 10
786788
ready=$($K8S_CLI_BIN -n "$namespace" get daemonset netobserv-cli -o jsonpath="{.status.numberReady}")
787789
required=$($K8S_CLI_BIN -n "$namespace" get daemonset netobserv-cli -o jsonpath="{.status.desiredNumberScheduled}")
788790
reasons=$($K8S_CLI_BIN get pods -n "$namespace" -o jsonpath='{.items[*].status.containerStatuses[*].state.waiting.reason}')
@@ -796,8 +798,20 @@ function waitDaemonset(){
796798
((retries--))
797799
done
798800
echo
799-
echo "ERROR: Daemonset pods failed to start:"
800-
${K8S_CLI_BIN} logs daemonset/netobserv-cli -n "$namespace" --tail=1
801+
echo "ERROR: Daemonset pods failed to start within timeout"
802+
echo "Collecting diagnostic information..."
803+
echo
804+
echo "=== Pod Status ==="
805+
${K8S_CLI_BIN} get pods -n "$namespace" -o wide
806+
echo
807+
echo "=== Pod Events ==="
808+
${K8S_CLI_BIN} get events -n "$namespace" --sort-by='.lastTimestamp' | tail -20
809+
echo
810+
echo "=== Pod Descriptions ==="
811+
${K8S_CLI_BIN} describe pods -n "$namespace" | grep -A 10 "Events:"
812+
echo
813+
echo "=== Daemonset Logs (if available) ==="
814+
${K8S_CLI_BIN} logs daemonset/netobserv-cli -n "$namespace" --tail=50 2>&1 || echo "No logs available yet"
801815
echo
802816
exit 1
803817
}

0 commit comments

Comments
 (0)