Skip to content

Commit 74d5686

Browse files
authored
add HTTPRoute manifests and update profile
Signed-off-by: samzong <[email protected]> Signed-off-by: GitHub <[email protected]>
1 parent 7fd4057 commit 74d5686

File tree

3 files changed

+94
-6
lines changed

3 files changed

+94
-6
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
apiVersion: gateway.networking.k8s.io/v1
2+
kind: HTTPRoute
3+
metadata:
4+
name: vsr-default-svc
5+
namespace: default
6+
spec:
7+
parentRefs:
8+
- group: gateway.networking.k8s.io
9+
kind: Gateway
10+
name: inference-gateway
11+
rules:
12+
- backendRefs:
13+
- name: vllm-llama3-8b-instruct
14+
port: 8000
15+
matches:
16+
- path:
17+
type: PathPrefix
18+
value: /
19+
timeouts:
20+
request: 300s
21+
---
22+
apiVersion: gateway.networking.k8s.io/v1
23+
kind: HTTPRoute
24+
metadata:
25+
name: vsr-llama8b-svc
26+
namespace: default
27+
spec:
28+
parentRefs:
29+
- group: gateway.networking.k8s.io
30+
kind: Gateway
31+
name: inference-gateway
32+
rules:
33+
- backendRefs:
34+
- name: vllm-llama3-8b-instruct
35+
port: 8000
36+
matches:
37+
- path:
38+
type: PathPrefix
39+
value: /
40+
headers:
41+
- type: Exact
42+
name: x-selected-model
43+
value: llama3-8b
44+
timeouts:
45+
request: 300s
46+
---
47+
apiVersion: gateway.networking.k8s.io/v1
48+
kind: HTTPRoute
49+
metadata:
50+
name: vsr-phi4-mini-svc
51+
namespace: default
52+
spec:
53+
parentRefs:
54+
- group: gateway.networking.k8s.io
55+
kind: Gateway
56+
name: inference-gateway
57+
rules:
58+
- backendRefs:
59+
- name: phi4-mini
60+
port: 8000
61+
matches:
62+
- path:
63+
type: PathPrefix
64+
value: /
65+
headers:
66+
- type: Exact
67+
name: x-selected-model
68+
value: phi4-mini
69+
timeouts:
70+
request: 300s

e2e/profiles/llm-d/profile.go

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,7 @@ func (p *Profile) Setup(ctx context.Context, opts *framework.SetupOptions) error
8888

8989
func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) error {
9090
p.verbose = opts.Verbose
91-
_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/httproute-llama-pool.yaml")
92-
_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/httproute-phi4-pool.yaml")
91+
_ = p.kubectlDelete(ctx, "e2e/profiles/llm-d/manifests/httproute-services.yaml")
9392
_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/dest-rule-epp-llama.yaml")
9493
_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/dest-rule-epp-phi4.yaml")
9594
_ = p.kubectlDelete(ctx, "deploy/kubernetes/llmd-base/inferencepool-llama.yaml")
@@ -223,10 +222,7 @@ func (p *Profile) deployGatewayRoutes(ctx context.Context) error {
223222
if err := p.kubectlApply(ctx, "deploy/kubernetes/istio/gateway.yaml"); err != nil {
224223
return err
225224
}
226-
if err := p.kubectlApply(ctx, "deploy/kubernetes/llmd-base/httproute-llama-pool.yaml"); err != nil {
227-
return err
228-
}
229-
if err := p.kubectlApply(ctx, "deploy/kubernetes/llmd-base/httproute-phi4-pool.yaml"); err != nil {
225+
if err := p.kubectlApply(ctx, "e2e/profiles/llm-d/manifests/httproute-services.yaml"); err != nil {
230226
return err
231227
}
232228
if err := p.kubectlApply(ctx, "deploy/kubernetes/istio/destinationrule.yaml"); err != nil {
@@ -247,6 +243,25 @@ func (p *Profile) verifyEnvironment(ctx context.Context, opts *framework.SetupOp
247243
if err != nil {
248244
return err
249245
}
246+
247+
// Actively wait for critical deployments to become Available before checking readiness counts.
248+
// This avoids flakiness when resources are still pulling images just after creation.
249+
deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose)
250+
deploymentsToWait := []struct {
251+
ns, name string
252+
}{
253+
{semanticNamespace, "semantic-router"},
254+
{gatewayNamespace, "istiod"},
255+
{"default", "llm-d-inference-scheduler-llama3-8b"},
256+
{"default", "llm-d-inference-scheduler-phi4-mini"},
257+
{"default", "inference-gateway-istio"},
258+
}
259+
for _, d := range deploymentsToWait {
260+
if err := deployer.WaitForDeployment(ctx, d.ns, d.name, 10*time.Minute); err != nil {
261+
return fmt.Errorf("wait for deployment %s/%s: %w", d.ns, d.name, err)
262+
}
263+
}
264+
250265
if err := helpers.CheckDeployment(ctx, client, semanticNamespace, "semantic-router", p.verbose); err != nil {
251266
return err
252267
}

e2e/profiles/llm-d/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
config:
2+
# Allow Envoy to re-run route matching after Semantic Router sets x-selected-model.
3+
# Without this, Gateway API routes that depend on that header won't be chosen and return 404.
4+
clear_route_cache: true
25
default_model: llama3-8b
36
decisions:
47
- name: math_route

0 commit comments

Comments
 (0)