Skip to content

Commit d806aec

Browse files
committed
G2P-4380 Wait added between charts
1 parent 1e4b62d commit d806aec

File tree

1 file changed

+59
-27
lines changed

1 file changed

+59
-27
lines changed

automation/lib/env-phase2.sh

Lines changed: 59 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ helm_install_chart() {
129129
local -a helm_args=(
130130
install "$release_name" "$chart_ref"
131131
-n "$env_name"
132+
--wait
132133
--timeout 20m
133134
)
134135

@@ -228,19 +229,36 @@ env_phase2_step1_commons_base() {
228229
"${extra[@]}" \
229230
|| return 1
230231

231-
# Verify StatefulSets and Deployments are ready
232-
log_info "Verifying infrastructure resources are ready..."
233-
local not_ready
234-
not_ready=$(kubectl get deployments,statefulsets -n "$env_name" -o json 2>/dev/null | \
235-
jq -r '.items[] | select((.status.readyReplicas // 0) != (.status.replicas // 1)) | "\(.kind)/\(.metadata.name)"' 2>/dev/null || true)
236-
237-
if [[ -n "$not_ready" ]]; then
238-
log_warn "Some resources are not yet fully ready:"
239-
echo "$not_ready" | while read -r res; do
240-
log_warn " - ${res}"
241-
done
242-
else
243-
log_success "All base infrastructure resources in '${env_name}' are ready."
232+
# Wait until ALL StatefulSets and Deployments are fully ready.
233+
# This is critical — services chart must not start until PostgreSQL,
234+
# Kafka, Redis, etc. are all accepting connections.
235+
log_info "Waiting for all base infrastructure to be fully ready..."
236+
local wait_timeout=900 # 15 minutes
237+
local wait_interval=15
238+
local wait_elapsed=0
239+
240+
while [[ $wait_elapsed -lt $wait_timeout ]]; do
241+
local not_ready
242+
not_ready=$(kubectl get deployments,statefulsets -n "$env_name" -o json 2>/dev/null | \
243+
jq -r '.items[] | select((.status.readyReplicas // 0) != (.status.replicas // 1)) | "\(.kind)/\(.metadata.name)"' 2>/dev/null || true)
244+
245+
if [[ -z "$not_ready" ]]; then
246+
log_success "All base infrastructure resources in '${env_name}' are ready."
247+
break
248+
fi
249+
250+
echo -ne "\r Waiting for: $(echo "$not_ready" | tr '\n' ', ')... ${wait_elapsed}s/${wait_timeout}s"
251+
sleep "$wait_interval"
252+
wait_elapsed=$((wait_elapsed + wait_interval))
253+
done
254+
255+
if [[ $wait_elapsed -ge $wait_timeout ]]; then
256+
echo ""
257+
log_error "Base infrastructure not ready after ${wait_timeout}s" \
258+
"Some resources did not become ready in time" \
259+
"Check pod status" \
260+
"kubectl get pods -n ${env_name} --field-selector=status.phase!=Running"
261+
return 1
244262
fi
245263

246264
mark_step_done "$step_id"
@@ -320,20 +338,34 @@ env_phase2_step2_commons_services() {
320338
"${extra[@]}" \
321339
|| return 1
322340

323-
# Verify deployments are ready
324-
log_info "Verifying all service deployments are ready..."
325-
local not_ready
326-
not_ready=$(kubectl get deployments -n "$env_name" -o json 2>/dev/null | \
327-
jq -r '.items[] | select((.status.availableReplicas // 0) != (.status.replicas // 1)) | .metadata.name' 2>/dev/null || true)
328-
329-
if [[ -n "$not_ready" ]]; then
330-
log_warn "Some deployments are not yet fully ready:"
331-
echo "$not_ready" | while read -r dep; do
332-
log_warn " - ${dep}"
333-
done
334-
log_warn "They may still be starting. Check: kubectl get pods -n ${env_name}"
335-
else
336-
log_success "All deployments in '${env_name}' are ready."
341+
# Wait until ALL deployments are fully ready before marking complete.
342+
log_info "Waiting for all service deployments to be fully ready..."
343+
local wait_timeout=900 # 15 minutes
344+
local wait_interval=15
345+
local wait_elapsed=0
346+
347+
while [[ $wait_elapsed -lt $wait_timeout ]]; do
348+
local not_ready
349+
not_ready=$(kubectl get deployments -n "$env_name" -o json 2>/dev/null | \
350+
jq -r '.items[] | select((.status.availableReplicas // 0) != (.status.replicas // 1)) | .metadata.name' 2>/dev/null || true)
351+
352+
if [[ -z "$not_ready" ]]; then
353+
log_success "All deployments in '${env_name}' are ready."
354+
break
355+
fi
356+
357+
echo -ne "\r Waiting for: $(echo "$not_ready" | tr '\n' ', ')... ${wait_elapsed}s/${wait_timeout}s"
358+
sleep "$wait_interval"
359+
wait_elapsed=$((wait_elapsed + wait_interval))
360+
done
361+
362+
if [[ $wait_elapsed -ge $wait_timeout ]]; then
363+
echo ""
364+
log_error "Service deployments not ready after ${wait_timeout}s" \
365+
"Some deployments did not become ready in time" \
366+
"Check pod status" \
367+
"kubectl get pods -n ${env_name} --field-selector=status.phase!=Running"
368+
return 1
337369
fi
338370

339371
mark_step_done "$step_id"

0 commit comments

Comments
 (0)