fix: template HTTPRoute names when RELEASE_NAME_POSTFIX is overridden

clubanderson · claude · clubanderson · commit fb54d51fc733 · 2026-02-13T16:32:40.000-05:00
The static httproute.yaml uses hardcoded resource names matching the
helmfile's default RELEASE_NAME_POSTFIX ("workload-autoscaler"). When
the nightly CI overrides this to "workload-autoscaling" (the guide
directory name), the deployed gateway and InferencePool get different
names than what the HTTPRoute references, causing the route to never
bind to the gateway. This results in all gateway requests returning
HTTP 404 — the root cause of the persistent health check failure
(curl exit code 22).

Fix: use yq to template the HTTPRoute's parentRef gateway name and
backendRef pool name based on the actual RELEASE_NAME_POSTFIX before
applying it.

Also improve the health check Job to capture and log the HTTP status
code separately (replacing -sf which suppresses it), making future
gateway routing issues much easier to diagnose.

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
Signed-off-by: Andrew Anderson &lt;andy@clubanderson.com&gt;
diff --git a/deploy/install.sh b/deploy/install.sh
@@ -785,7 +785,26 @@ deploy_llm_d_infrastructure() {
       log_info "Skipping WVA in helmfile (will be deployed separately from local chart)"
     fi
     helmfile apply -e $GATEWAY_PROVIDER -n ${LLMD_NS} $helmfile_selector
-    kubectl apply -f httproute.yaml -n ${LLMD_NS}
+
+    # Apply HTTPRoute with correct resource name references.
+    # The static httproute.yaml uses resource names matching the helmfile's default
+    # RELEASE_NAME_POSTFIX (e.g. "workload-autoscaler"). When RELEASE_NAME_POSTFIX
+    # is overridden (e.g. in CI), gateway and InferencePool names change, so we
+    # must template the HTTPRoute references to match the actual deployed resources.
+    if [ -f httproute.yaml ]; then
+        local rn="${RELEASE_NAME_POSTFIX:-}"
+        if [ -n "$rn" ]; then
+            local gw_name="infra-${rn}-inference-gateway"
+            local pool_name="gaie-${rn}"
+            log_info "Applying HTTPRoute (gateway=$gw_name, pool=$pool_name)"
+            yq eval "
+                .spec.parentRefs[0].name = \"${gw_name}\" |
+                .spec.rules[0].backendRefs[0].name = \"${pool_name}\"
+            " httproute.yaml | kubectl apply -f - -n ${LLMD_NS}
+        else
+            kubectl apply -f httproute.yaml -n ${LLMD_NS}
+        fi
+    fi
 
     # Patch llm-d-inference-scheduler deployment if scale-to-zero is enabled
     if [ "$ENABLE_SCALE_TO_ZERO" == "true" ]; then
diff --git a/test/e2e-openshift/sharegpt_scaleup_test.go b/test/e2e-openshift/sharegpt_scaleup_test.go
@@ -359,10 +359,17 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
 									Command: []string{"/bin/sh", "-c"},
 									Args: []string{fmt.Sprintf(`
 echo "Checking gateway readiness at %s:80..."
-RESPONSE=$(curl -sf --max-time 10 http://%s:80/v1/models 2>&1)
+# Capture HTTP status code separately to aid debugging
+HTTP_CODE=$(curl -s -o /tmp/response.txt -w "%%{http_code}" --max-time 10 http://%s:80/v1/models 2>/dev/null)
 CURL_EXIT=$?
+RESPONSE=$(cat /tmp/response.txt 2>/dev/null)
 if [ $CURL_EXIT -ne 0 ]; then
-  echo "Gateway not responding (curl exit code: $CURL_EXIT)"
+  echo "Gateway not responding (curl exit code: $CURL_EXIT, HTTP status: $HTTP_CODE)"
+  echo "Response: $RESPONSE"
+  exit 1
+fi
+if [ "$HTTP_CODE" -ge 400 ] 2>/dev/null; then
+  echo "Gateway returned HTTP $HTTP_CODE"
   echo "Response: $RESPONSE"
   exit 1
 fi
@@ -372,7 +379,7 @@ if echo "$RESPONSE" | grep -q '"id":'; then
   echo "Response: $RESPONSE"
   exit 0
 fi
-echo "Gateway responded but no model data found in response"
+echo "Gateway responded (HTTP $HTTP_CODE) but no model data found in response"
 echo "Response: $RESPONSE"
 exit 1`,
 										model.gatewayService, model.gatewayService)},