@@ -129,6 +129,7 @@ helm_install_chart() {
129129 local -a helm_args=(
130130 install " $release_name " " $chart_ref "
131131 -n " $env_name "
132+ --wait
132133 --timeout 20m
133134 )
134135
@@ -228,19 +229,36 @@ env_phase2_step1_commons_base() {
228229 " ${extra[@]} " \
229230 || return 1
230231
231- # Verify StatefulSets and Deployments are ready
232- log_info " Verifying infrastructure resources are ready..."
233- local not_ready
234- not_ready=$( kubectl get deployments,statefulsets -n " $env_name " -o json 2> /dev/null | \
235- jq -r ' .items[] | select((.status.readyReplicas // 0) != (.status.replicas // 1)) | "\(.kind)/\(.metadata.name)"' 2> /dev/null || true)
236-
237- if [[ -n " $not_ready " ]]; then
238- log_warn " Some resources are not yet fully ready:"
239- echo " $not_ready " | while read -r res; do
240- log_warn " - ${res} "
241- done
242- else
243- log_success " All base infrastructure resources in '${env_name} ' are ready."
232+ # Wait until ALL StatefulSets and Deployments are fully ready.
233+ # This is critical — services chart must not start until PostgreSQL,
234+ # Kafka, Redis, etc. are all accepting connections.
235+ log_info " Waiting for all base infrastructure to be fully ready..."
236+ local wait_timeout=900 # 15 minutes
237+ local wait_interval=15
238+ local wait_elapsed=0
239+
240+ while [[ $wait_elapsed -lt $wait_timeout ]]; do
241+ local not_ready
242+ not_ready=$( kubectl get deployments,statefulsets -n " $env_name " -o json 2> /dev/null | \
243+ jq -r ' .items[] | select((.status.readyReplicas // 0) != (.status.replicas // 1)) | "\(.kind)/\(.metadata.name)"' 2> /dev/null || true)
244+
245+ if [[ -z " $not_ready " ]]; then
246+ log_success " All base infrastructure resources in '${env_name} ' are ready."
247+ break
248+ fi
249+
250+ echo -ne " \r Waiting for: $( echo " $not_ready " | tr ' \n' ' , ' ) ... ${wait_elapsed} s/${wait_timeout} s"
251+ sleep " $wait_interval "
252+ wait_elapsed=$(( wait_elapsed + wait_interval))
253+ done
254+
255+ if [[ $wait_elapsed -ge $wait_timeout ]]; then
256+ echo " "
257+ log_error " Base infrastructure not ready after ${wait_timeout} s" \
258+ " Some resources did not become ready in time" \
259+ " Check pod status" \
260+ " kubectl get pods -n ${env_name} --field-selector=status.phase!=Running"
261+ return 1
244262 fi
245263
246264 mark_step_done " $step_id "
@@ -320,20 +338,34 @@ env_phase2_step2_commons_services() {
320338 " ${extra[@]} " \
321339 || return 1
322340
323- # Verify deployments are ready
324- log_info " Verifying all service deployments are ready..."
325- local not_ready
326- not_ready=$( kubectl get deployments -n " $env_name " -o json 2> /dev/null | \
327- jq -r ' .items[] | select((.status.availableReplicas // 0) != (.status.replicas // 1)) | .metadata.name' 2> /dev/null || true)
328-
329- if [[ -n " $not_ready " ]]; then
330- log_warn " Some deployments are not yet fully ready:"
331- echo " $not_ready " | while read -r dep; do
332- log_warn " - ${dep} "
333- done
334- log_warn " They may still be starting. Check: kubectl get pods -n ${env_name} "
335- else
336- log_success " All deployments in '${env_name} ' are ready."
341+ # Wait until ALL deployments are fully ready before marking complete.
342+ log_info " Waiting for all service deployments to be fully ready..."
343+ local wait_timeout=900 # 15 minutes
344+ local wait_interval=15
345+ local wait_elapsed=0
346+
347+ while [[ $wait_elapsed -lt $wait_timeout ]]; do
348+ local not_ready
349+ not_ready=$( kubectl get deployments -n " $env_name " -o json 2> /dev/null | \
350+ jq -r ' .items[] | select((.status.availableReplicas // 0) != (.status.replicas // 1)) | .metadata.name' 2> /dev/null || true)
351+
352+ if [[ -z " $not_ready " ]]; then
353+ log_success " All deployments in '${env_name} ' are ready."
354+ break
355+ fi
356+
357+ echo -ne " \r Waiting for: $( echo " $not_ready " | tr ' \n' ' , ' ) ... ${wait_elapsed} s/${wait_timeout} s"
358+ sleep " $wait_interval "
359+ wait_elapsed=$(( wait_elapsed + wait_interval))
360+ done
361+
362+ if [[ $wait_elapsed -ge $wait_timeout ]]; then
363+ echo " "
364+ log_error " Service deployments not ready after ${wait_timeout} s" \
365+ " Some deployments did not become ready in time" \
366+ " Check pod status" \
367+ " kubectl get pods -n ${env_name} --field-selector=status.phase!=Running"
368+ return 1
337369 fi
338370
339371 mark_step_done " $step_id "
0 commit comments