Skip to content

Commit fe8799f

Browse files
committed
bug: harden deploy-azd ingress readiness and class detection
1 parent 508cca9 commit fe8799f

File tree

6 files changed

+112
-48
lines changed

6 files changed

+112
-48
lines changed

.github/workflows/deploy-azd.yml

Lines changed: 91 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -75,46 +75,46 @@ jobs:
7575
7676
CHANGED_FILES=$(git diff --name-only "origin/$DEFAULT_BRANCH...HEAD")
7777
78-
mapfile -t AGENT_SERVICES < <(python3 - <<'PY'
79-
import re
80-
81-
with open('azure.yaml', encoding='utf-8') as f:
82-
lines = f.readlines()
83-
84-
in_services = False
85-
current_service = None
86-
current_host = None
87-
services = []
88-
89-
for raw in lines:
90-
line = raw.rstrip('\n')
91-
if not in_services:
92-
if re.match(r'^services:\s*$', line):
93-
in_services = True
94-
continue
95-
96-
if re.match(r'^[^\s]', line):
97-
break
98-
99-
service_match = re.match(r'^ ([a-z0-9\-]+):\s*$', line)
100-
if service_match:
101-
if current_service and current_host == 'aks' and current_service != 'crud-service':
102-
services.append(current_service)
103-
current_service = service_match.group(1)
104-
current_host = None
105-
continue
106-
107-
host_match = re.match(r'^ host:\s*(\S+)\s*$', line)
108-
if host_match:
109-
current_host = host_match.group(1)
110-
111-
if current_service and current_host == 'aks' and current_service != 'crud-service':
112-
services.append(current_service)
113-
114-
for service in services:
115-
print(service)
116-
PY
117-
)
78+
mapfile -t AGENT_SERVICES < <(python3 - <<'PY'
79+
import re
80+
81+
with open('azure.yaml', encoding='utf-8') as f:
82+
lines = f.readlines()
83+
84+
in_services = False
85+
current_service = None
86+
current_host = None
87+
services = []
88+
89+
for raw in lines:
90+
line = raw.rstrip('\n')
91+
if not in_services:
92+
if re.match(r'^services:\s*$', line):
93+
in_services = True
94+
continue
95+
96+
if re.match(r'^[^\s]', line):
97+
break
98+
99+
service_match = re.match(r'^ ([a-z0-9\-]+):\s*$', line)
100+
if service_match:
101+
if current_service and current_host == 'aks' and current_service != 'crud-service':
102+
services.append(current_service)
103+
current_service = service_match.group(1)
104+
current_host = None
105+
continue
106+
107+
host_match = re.match(r'^ host:\s*(\S+)\s*$', line)
108+
if host_match:
109+
current_host = host_match.group(1)
110+
111+
if current_service and current_host == 'aks' and current_service != 'crud-service':
112+
services.append(current_service)
113+
114+
for service in services:
115+
print(service)
116+
PY
117+
)
118118

119119
CRUD_CHANGED=false
120120
if echo "$CHANGED_FILES" | grep -Eq '^apps/crud-service/'; then
@@ -453,14 +453,60 @@ jobs:
453453
--query "identityProfile.kubeletidentity.clientId" -o tsv)
454454
echo "WORKLOAD_AZURE_CLIENT_ID=${AKS_MI_CLIENT_ID}" >> "$GITHUB_ENV"
455455
456-
- name: Deploy CRUD service
456+
- name: Resolve ingress class
457+
shell: bash
457458
run: |
458-
if ! azd deploy --service crud-service --no-prompt -e "${{ inputs.environment }}"; then
459-
echo "Initial CRUD deploy failed; retrying once after short wait..."
460-
sleep 60
461-
azd deploy --service crud-service --no-prompt -e "${{ inputs.environment }}"
459+
set -euo pipefail
460+
kubectl get ingressclass -o wide || true
461+
462+
if [ -n "${INGRESS_CLASS_NAME:-}" ] && kubectl get ingressclass "${INGRESS_CLASS_NAME}" >/dev/null 2>&1; then
463+
echo "Using preconfigured ingress class: ${INGRESS_CLASS_NAME}"
464+
echo "INGRESS_CLASS_NAME=${INGRESS_CLASS_NAME}" >> "$GITHUB_ENV"
465+
exit 0
462466
fi
467+
468+
for cls in webapprouting.kubernetes.azure.com nginx azure-application-gateway; do
469+
if kubectl get ingressclass "$cls" >/dev/null 2>&1; then
470+
echo "Using detected ingress class: $cls"
471+
echo "INGRESS_CLASS_NAME=$cls" >> "$GITHUB_ENV"
472+
exit 0
473+
fi
474+
done
475+
476+
echo "No supported IngressClass found. Enable AKS Web App Routing or provide INGRESS_CLASS_NAME." >&2
477+
exit 1
478+
479+
- name: Deploy CRUD service
480+
timeout-minutes: 25
481+
shell: bash
482+
run: |
483+
set -euo pipefail
484+
max_attempts=4
485+
486+
for attempt in $(seq 1 "$max_attempts"); do
487+
echo "Deploy attempt ${attempt}/${max_attempts}"
488+
if azd deploy --service crud-service --no-prompt -e "${{ inputs.environment }}"; then
489+
echo "CRUD deploy succeeded."
490+
exit 0
491+
fi
492+
493+
echo "Attempt ${attempt} failed; collecting ingress diagnostics..."
494+
kubectl get ingressclass -o wide || true
495+
kubectl get ingress -n holiday-peak -o wide || true
496+
kubectl get pods -n app-routing-system -o wide || true
497+
kubectl get pods -n ingress-nginx -o wide || true
498+
499+
if [ "$attempt" -eq "$max_attempts" ]; then
500+
echo "CRUD deploy failed after ${max_attempts} attempts." >&2
501+
exit 1
502+
fi
503+
504+
backoff=$((attempt * 45))
505+
echo "Retrying after ${backoff}s..."
506+
sleep "$backoff"
507+
done
463508
env:
509+
INGRESS_CLASS_NAME: ${{ env.INGRESS_CLASS_NAME }}
464510
AZURE_CLIENT_ID: ${{ env.WORKLOAD_AZURE_CLIENT_ID }}
465511
AZURE_TENANT_ID: ${{ env.AZURE_TENANT_ID }}
466512
PROJECT_ENDPOINT: ${{ needs.provision.outputs.PROJECT_ENDPOINT }}

.infra/azd/hooks/render-helm.ps1

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ $namespace = if ($env:K8S_NAMESPACE) { $env:K8S_NAMESPACE } else { "holiday-peak
77
$imagePrefix = if ($env:IMAGE_PREFIX) { $env:IMAGE_PREFIX } else { "ghcr.io/azure-samples" }
88
$imageTag = if ($env:IMAGE_TAG) { $env:IMAGE_TAG } else { "latest" }
99
$kedaEnabled = if ($env:KEDA_ENABLED) { $env:KEDA_ENABLED } else { "false" }
10+
$ingressEnabled = if ($env:INGRESS_ENABLED) { $env:INGRESS_ENABLED } else { "true" }
11+
$ingressClassName = if ($env:INGRESS_CLASS_NAME) { $env:INGRESS_CLASS_NAME } else { "webapprouting.kubernetes.azure.com" }
12+
$canaryEnabled = if ($env:CANARY_ENABLED) { $env:CANARY_ENABLED } else { "false" }
1013
$readinessPath = "/ready"
1114

1215
if ($ServiceName -eq "crud-service") {
@@ -48,7 +51,13 @@ $helmArgs = @(
4851
'--set',
4952
"image.tag=$imageTag",
5053
'--set',
51-
"keda.enabled=$kedaEnabled"
54+
"keda.enabled=$kedaEnabled",
55+
'--set',
56+
"ingress.enabled=$ingressEnabled",
57+
'--set-string',
58+
"ingress.className=$ingressClassName",
59+
'--set',
60+
"canary.enabled=$canaryEnabled",
5261
'--set',
5362
"probes.readiness.path=$readinessPath"
5463
)

.infra/azd/hooks/render-helm.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ IMAGE_PREFIX="${IMAGE_PREFIX:-ghcr.io/azure-samples}"
88
IMAGE_TAG="${IMAGE_TAG:-latest}"
99
KEDA_ENABLED="${KEDA_ENABLED:-false}"
1010
INGRESS_ENABLED="${INGRESS_ENABLED:-true}"
11+
INGRESS_CLASS_NAME="${INGRESS_CLASS_NAME:-webapprouting.kubernetes.azure.com}"
1112
CANARY_ENABLED="${CANARY_ENABLED:-false}"
1213
READINESS_PATH="/ready"
1314

@@ -48,6 +49,7 @@ HELM_ARGS="$HELM_ARGS --set image.repository=$IMAGE_PREFIX"
4849
HELM_ARGS="$HELM_ARGS --set image.tag=$IMAGE_TAG"
4950
HELM_ARGS="$HELM_ARGS --set keda.enabled=$KEDA_ENABLED"
5051
HELM_ARGS="$HELM_ARGS --set ingress.enabled=$INGRESS_ENABLED"
52+
HELM_ARGS="$HELM_ARGS --set-string ingress.className=$INGRESS_CLASS_NAME"
5153
HELM_ARGS="$HELM_ARGS --set canary.enabled=$CANARY_ENABLED"
5254
HELM_ARGS="$HELM_ARGS --set probes.readiness.path=$READINESS_PATH"
5355

.kubernetes/chart/templates/ingress.yaml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ metadata:
1010
{{- toYaml . | nindent 4 }}
1111
{{- end }}
1212
spec:
13-
ingressClassName: webapprouting.kubernetes.azure.com
13+
{{- if .Values.ingress.className }}
14+
ingressClassName: {{ .Values.ingress.className | quote }}
15+
{{- end }}
1416
{{- if .Values.ingress.tls }}
1517
tls:
1618
{{- range .Values.ingress.tls }}
@@ -53,7 +55,9 @@ metadata:
5355
{{- toYaml . | nindent 4 }}
5456
{{- end }}
5557
spec:
56-
ingressClassName: webapprouting.kubernetes.azure.com
58+
{{- if .Values.ingress.className }}
59+
ingressClassName: {{ .Values.ingress.className | quote }}
60+
{{- end }}
5761
rules:
5862
{{- if .Values.ingress.host }}
5963
- host: {{ .Values.ingress.host | quote }}

.kubernetes/chart/values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ service:
1010
# Ingress configuration for AKS Web Application Routing (NGINX-based add-on)
1111
ingress:
1212
enabled: true
13+
className: "webapprouting.kubernetes.azure.com"
1314
host: "" # Leave empty for path-based routing, or set hostname for host-based
1415
path: "" # Defaults to /{serviceName}
1516
pathType: "Prefix"

docs/implementation/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ Target: **100%**
155155
- App deployments in `deploy-azd` are now strictly changed-only (CRUD, UI, and agent matrix entries are deployed only when their app paths change).
156156
- Post-deploy hooks (`sync-apim-agents` and `ensure-foundry-agents`) consume these lists through `CHANGED_SERVICES` and run only for changed services.
157157
- Foundry readiness verification in deployment workflow is scoped to changed agent services under changed-only mode.
158+
- CRUD deployment now preflights `IngressClass` availability and passes `INGRESS_CLASS_NAME` into Helm rendering to avoid class/controller drift.
159+
- CRUD deployment retries are now bounded with diagnostics (`kubectl get ingressclass`, ingress, and controller pods) to improve root-cause visibility for endpoint readiness delays.
158160

159161
---
160162

0 commit comments

Comments
 (0)