add HTTPRoute manifests and update profile

samzong · web-flow · commit 74d5686a75e8 · 2025-11-21T08:01:54.000Z
Signed-off-by: samzong &lt;samzong.lu@gmail.com&gt;
Signed-off-by: GitHub &lt;noreply@github.com&gt;
diff --git a/e2e/profiles/llm-d/manifests/httproute-services.yaml b/e2e/profiles/llm-d/manifests/httproute-services.yaml
@@ -0,0 +1,70 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: vsr-default-svc
+  namespace: default
+spec:
+  parentRefs:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: inference-gateway
+  rules:
+    - backendRefs:
+        - name: vllm-llama3-8b-instruct
+          port: 8000
+      matches:
+        - path:
+            type: PathPrefix
+            value: /
+      timeouts:
+        request: 300s
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: vsr-llama8b-svc
+  namespace: default
+spec:
+  parentRefs:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: inference-gateway
+  rules:
+    - backendRefs:
+        - name: vllm-llama3-8b-instruct
+          port: 8000
+      matches:
+        - path:
+            type: PathPrefix
+            value: /
+          headers:
+            - type: Exact
+              name: x-selected-model
+              value: llama3-8b
+      timeouts:
+        request: 300s
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: vsr-phi4-mini-svc
+  namespace: default
+spec:
+  parentRefs:
+    - group: gateway.networking.k8s.io
+      kind: Gateway
+      name: inference-gateway
+  rules:
+    - backendRefs:
+        - name: phi4-mini
+          port: 8000
+      matches:
+        - path:
+            type: PathPrefix
+            value: /
+          headers:
+            - type: Exact
+              name: x-selected-model
+              value: phi4-mini
+      timeouts:
+        request: 300s
diff --git a/e2e/profiles/llm-d/profile.go b/e2e/profiles/llm-d/profile.go
@@ -88,8 +88,7 @@ func (p *Profile) Setup(ctx context.Context, opts *framework.SetupOptions) error
 
 func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) error {
 	p.verbose = opts.Verbose
-	_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/httproute-llama-pool.yaml")
-	_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/httproute-phi4-pool.yaml")
+	_ = p.kubectlDelete(ctx, "e2e/profiles/llm-d/manifests/httproute-services.yaml")
 	_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/dest-rule-epp-llama.yaml")
 	_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/dest-rule-epp-phi4.yaml")
 	_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/inferencepool-llama.yaml")
@@ -223,10 +222,7 @@ func (p *Profile) deployGatewayRoutes(ctx context.Context) error {
 	if err := p.kubectlApply(ctx, "deploy/kubernetes/istio/gateway.yaml"); err != nil {
 		return err
 	}
-	if err := p.kubectlApply(ctx, "deploy/kubernetes/llmd-base/httproute-llama-pool.yaml"); err != nil {
-		return err
-	}
-	if err := p.kubectlApply(ctx, "deploy/kubernetes/llmd-base/httproute-phi4-pool.yaml"); err != nil {
+	if err := p.kubectlApply(ctx, "e2e/profiles/llm-d/manifests/httproute-services.yaml"); err != nil {
 		return err
 	}
 	if err := p.kubectlApply(ctx, "deploy/kubernetes/istio/destinationrule.yaml"); err != nil {
@@ -247,6 +243,25 @@ func (p *Profile) verifyEnvironment(ctx context.Context, opts *framework.SetupOp
 	if err != nil {
 		return err
 	}
+
+	// Actively wait for critical deployments to become Available before checking readiness counts.
+	// This avoids flakiness when resources are still pulling images just after creation.
+	deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)
+	deploymentsToWait := []struct {
+		ns, name string
+	}{
+		{semanticNamespace, "semantic-router"},
+		{gatewayNamespace, "istiod"},
+		{"default", "llm-d-inference-scheduler-llama3-8b"},
+		{"default", "llm-d-inference-scheduler-phi4-mini"},
+		{"default", "inference-gateway-istio"},
+	}
+	for _, d := range deploymentsToWait {
+		if err := deployer.WaitForDeployment(ctx, d.ns, d.name, 10*time.Minute); err != nil {
+			return fmt.Errorf("wait for deployment %s/%s: %w", d.ns, d.name, err)
+		}
+	}
+
 	if err := helpers.CheckDeployment(ctx, client, semanticNamespace, "semantic-router", p.verbose); err != nil {
 		return err
 	}
diff --git a/e2e/profiles/llm-d/values.yaml b/e2e/profiles/llm-d/values.yaml
@@ -1,4 +1,7 @@
 config:
+  # Allow Envoy to re-run route matching after Semantic Router sets x-selected-model.
+  # Without this, Gateway API routes that depend on that header won't be chosen and return 404.
+  clear_route_cache: true
   default_model: llama3-8b
   decisions:
     - name: math_route