Skip to content

Commit fb54d51

Browse files
clubandersonclaude
andcommitted
fix: template HTTPRoute names when RELEASE_NAME_POSTFIX is overridden
The static httproute.yaml uses hardcoded resource names matching the helmfile's default RELEASE_NAME_POSTFIX ("workload-autoscaler"). When the nightly CI overrides this to "workload-autoscaling" (the guide directory name), the deployed gateway and InferencePool get different names than what the HTTPRoute references, causing the route to never bind to the gateway. This results in all gateway requests returning HTTP 404 — the root cause of the persistent health check failure (curl exit code 22). Fix: use yq to template the HTTPRoute's parentRef gateway name and backendRef pool name based on the actual RELEASE_NAME_POSTFIX before applying it. Also improve the health check Job to capture and log the HTTP status code separately (replacing -sf which suppresses it), making future gateway routing issues much easier to diagnose. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> Signed-off-by: Andrew Anderson <andy@clubanderson.com>
1 parent a316a3b commit fb54d51

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

deploy/install.sh

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,26 @@ deploy_llm_d_infrastructure() {
785785
log_info "Skipping WVA in helmfile (will be deployed separately from local chart)"
786786
fi
787787
helmfile apply -e $GATEWAY_PROVIDER -n ${LLMD_NS} $helmfile_selector
788-
kubectl apply -f httproute.yaml -n ${LLMD_NS}
788+
789+
# Apply HTTPRoute with correct resource name references.
790+
# The static httproute.yaml uses resource names matching the helmfile's default
791+
# RELEASE_NAME_POSTFIX (e.g. "workload-autoscaler"). When RELEASE_NAME_POSTFIX
792+
# is overridden (e.g. in CI), gateway and InferencePool names change, so we
793+
# must template the HTTPRoute references to match the actual deployed resources.
794+
if [ -f httproute.yaml ]; then
795+
local rn="${RELEASE_NAME_POSTFIX:-}"
796+
if [ -n "$rn" ]; then
797+
local gw_name="infra-${rn}-inference-gateway"
798+
local pool_name="gaie-${rn}"
799+
log_info "Applying HTTPRoute (gateway=$gw_name, pool=$pool_name)"
800+
yq eval "
801+
.spec.parentRefs[0].name = \"${gw_name}\" |
802+
.spec.rules[0].backendRefs[0].name = \"${pool_name}\"
803+
" httproute.yaml | kubectl apply -f - -n ${LLMD_NS}
804+
else
805+
kubectl apply -f httproute.yaml -n ${LLMD_NS}
806+
fi
807+
fi
789808

790809
# Patch llm-d-inference-scheduler deployment if scale-to-zero is enabled
791810
if [ "$ENABLE_SCALE_TO_ZERO" == "true" ]; then

test/e2e-openshift/sharegpt_scaleup_test.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,17 @@ var _ = Describe("ShareGPT Scale-Up Test", Ordered, func() {
359359
Command: []string{"/bin/sh", "-c"},
360360
Args: []string{fmt.Sprintf(`
361361
echo "Checking gateway readiness at %s:80..."
362-
RESPONSE=$(curl -sf --max-time 10 http://%s:80/v1/models 2>&1)
362+
# Capture HTTP status code separately to aid debugging
363+
HTTP_CODE=$(curl -s -o /tmp/response.txt -w "%%{http_code}" --max-time 10 http://%s:80/v1/models 2>/dev/null)
363364
CURL_EXIT=$?
365+
RESPONSE=$(cat /tmp/response.txt 2>/dev/null)
364366
if [ $CURL_EXIT -ne 0 ]; then
365-
echo "Gateway not responding (curl exit code: $CURL_EXIT)"
367+
echo "Gateway not responding (curl exit code: $CURL_EXIT, HTTP status: $HTTP_CODE)"
368+
echo "Response: $RESPONSE"
369+
exit 1
370+
fi
371+
if [ "$HTTP_CODE" -ge 400 ] 2>/dev/null; then
372+
echo "Gateway returned HTTP $HTTP_CODE"
366373
echo "Response: $RESPONSE"
367374
exit 1
368375
fi
@@ -372,7 +379,7 @@ if echo "$RESPONSE" | grep -q '"id":'; then
372379
echo "Response: $RESPONSE"
373380
exit 0
374381
fi
375-
echo "Gateway responded but no model data found in response"
382+
echo "Gateway responded (HTTP $HTTP_CODE) but no model data found in response"
376383
echo "Response: $RESPONSE"
377384
exit 1`,
378385
model.gatewayService, model.gatewayService)},

0 commit comments

Comments
 (0)