Skip to content

Commit d5dae34

Browse files
committed
Remove accidentally committed test report files
Signed-off-by: Senan Zedan <[email protected]>
1 parent 32c08fb commit d5dae34

File tree

8 files changed

+281
-74
lines changed

8 files changed

+281
-74
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: vllm-llama3-8b-instruct
5+
namespace: default
6+
spec:
7+
replicas: 1
8+
selector:
9+
matchLabels:
10+
app: vllm-llama3-8b-instruct
11+
template:
12+
metadata:
13+
labels:
14+
app: vllm-llama3-8b-instruct
15+
spec:
16+
containers:
17+
- name: vllm-sim
18+
image: ghcr.io/llm-d/llm-d-inference-sim:v0.5.0
19+
imagePullPolicy: IfNotPresent
20+
args:
21+
- --model
22+
- base-model
23+
- --port
24+
- '8000'
25+
- --max-loras
26+
- '6'
27+
- --lora-modules
28+
- '{"name": "math-expert"}'
29+
- '{"name": "science-expert"}'
30+
- '{"name": "social-expert"}'
31+
- '{"name": "humanities-expert"}'
32+
- '{"name": "law-expert"}'
33+
- '{"name": "general-expert"}'
34+
env:
35+
- name: POD_NAME
36+
valueFrom:
37+
fieldRef:
38+
fieldPath: metadata.name
39+
- name: NAMESPACE
40+
valueFrom:
41+
fieldRef:
42+
fieldPath: metadata.namespace
43+
ports:
44+
- containerPort: 8000
45+
name: http
46+
protocol: TCP
47+
resources:
48+
requests:
49+
cpu: 10m
50+
---
51+
apiVersion: v1
52+
kind: Service
53+
metadata:
54+
name: vllm-llama3-8b-instruct
55+
namespace: default
56+
labels:
57+
app: vllm-llama3-8b-instruct
58+
spec:
59+
type: ClusterIP
60+
ports:
61+
- port: 8000
62+
targetPort: 8000
63+
protocol: TCP
64+
selector:
65+
app: vllm-llama3-8b-instruct
66+
---
67+
apiVersion: aigateway.envoyproxy.io/v1alpha1
68+
kind: AIServiceBackend
69+
metadata:
70+
name: vllm-llama3-8b-instruct
71+
namespace: default
72+
spec:
73+
schema:
74+
name: OpenAI
75+
backendRef:
76+
name: vllm-llama3-8b-instruct
77+
kind: Backend
78+
group: gateway.envoyproxy.io
79+
---
80+
apiVersion: gateway.envoyproxy.io/v1alpha1
81+
kind: Backend
82+
metadata:
83+
name: vllm-llama3-8b-instruct
84+
namespace: default
85+
spec:
86+
endpoints:
87+
- fqdn:
88+
hostname: vllm-llama3-8b-instruct.default.svc.cluster.local
89+
port: 8000
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
apiVersion: gateway.networking.k8s.io/v1
2+
kind: GatewayClass
3+
metadata:
4+
name: semantic-router
5+
spec:
6+
controllerName: gateway.envoyproxy.io/gatewayclass-controller
7+
---
8+
apiVersion: gateway.envoyproxy.io/v1alpha1
9+
kind: EnvoyProxy
10+
metadata:
11+
name: semantic-router
12+
namespace: default
13+
spec:
14+
provider:
15+
type: Kubernetes
16+
kubernetes:
17+
envoyDeployment:
18+
replicas: 1
19+
container:
20+
resources: {}
21+
logging:
22+
level:
23+
default: trace
24+
---
25+
apiVersion: gateway.networking.k8s.io/v1
26+
kind: Gateway
27+
metadata:
28+
name: semantic-router
29+
namespace: default
30+
spec:
31+
gatewayClassName: semantic-router
32+
listeners:
33+
- name: http
34+
protocol: HTTP
35+
port: 80
36+
infrastructure:
37+
parametersRef:
38+
group: gateway.envoyproxy.io
39+
kind: EnvoyProxy
40+
name: semantic-router
41+
---
42+
# By default, Envoy Gateway sets the buffer limit to 32kiB which is not sufficient for AI workloads.
43+
# This ClientTrafficPolicy sets the buffer limit to 50MiB as an example.
44+
apiVersion: gateway.envoyproxy.io/v1alpha1
45+
kind: ClientTrafficPolicy
46+
metadata:
47+
name: semantic-router
48+
namespace: default
49+
spec:
50+
targetRefs:
51+
- group: gateway.networking.k8s.io
52+
kind: Gateway
53+
name: semantic-router
54+
connection:
55+
bufferLimit: 50Mi
56+
---
57+
apiVersion: aigateway.envoyproxy.io/v1alpha1
58+
kind: AIGatewayRoute
59+
metadata:
60+
name: semantic-router
61+
namespace: default
62+
spec:
63+
parentRefs:
64+
- name: semantic-router
65+
kind: Gateway
66+
group: gateway.networking.k8s.io
67+
rules:
68+
- matches:
69+
- headers:
70+
- type: Exact
71+
name: x-ai-eg-model
72+
value: math-expert
73+
backendRefs:
74+
- name: vllm-llama3-8b-instruct
75+
timeouts:
76+
request: 60s
77+
backendRequest: 60s
78+
- matches:
79+
- headers:
80+
- type: Exact
81+
name: x-ai-eg-model
82+
value: science-expert
83+
backendRefs:
84+
- name: vllm-llama3-8b-instruct
85+
timeouts:
86+
request: 60s
87+
backendRequest: 60s
88+
- matches:
89+
- headers:
90+
- type: Exact
91+
name: x-ai-eg-model
92+
value: social-expert
93+
backendRefs:
94+
- name: vllm-llama3-8b-instruct
95+
timeouts:
96+
request: 60s
97+
backendRequest: 60s
98+
- matches:
99+
- headers:
100+
- type: Exact
101+
name: x-ai-eg-model
102+
value: humanities-expert
103+
backendRefs:
104+
- name: vllm-llama3-8b-instruct
105+
timeouts:
106+
request: 60s
107+
backendRequest: 60s
108+
- matches:
109+
- headers:
110+
- type: Exact
111+
name: x-ai-eg-model
112+
value: law-expert
113+
backendRefs:
114+
- name: vllm-llama3-8b-instruct
115+
timeouts:
116+
request: 60s
117+
backendRequest: 60s
118+
- matches:
119+
- headers:
120+
- type: Exact
121+
name: x-ai-eg-model
122+
value: general-expert
123+
backendRefs:
124+
- name: vllm-llama3-8b-instruct
125+
timeouts:
126+
request: 60s
127+
backendRequest: 60s
128+
- backendRefs:
129+
- name: vllm-llama3-8b-instruct
130+
timeouts:
131+
request: 60s
132+
backendRequest: 60s
133+
---
134+
apiVersion: gateway.envoyproxy.io/v1alpha1
135+
kind: EnvoyPatchPolicy
136+
metadata:
137+
name: ai-gateway-prepost-extproc-patch-policy
138+
namespace: default
139+
spec:
140+
jsonPatches:
141+
- name: default/semantic-router/http
142+
operation:
143+
op: add
144+
path: /default_filter_chain/filters/0/typed_config/http_filters/0
145+
value:
146+
name: semantic-router-extproc
147+
typedConfig:
148+
'@type': type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
149+
allow_mode_override: true
150+
grpcService:
151+
envoyGrpc:
152+
authority: semantic-router.vllm-semantic-router-system:50051
153+
clusterName: semantic-router
154+
timeout: 60s
155+
message_timeout: 60s
156+
processing_mode:
157+
request_body_mode: BUFFERED
158+
request_header_mode: SEND
159+
request_trailer_mode: SKIP
160+
response_body_mode: BUFFERED
161+
response_header_mode: SEND
162+
response_trailer_mode: SKIP
163+
type: type.googleapis.com/envoy.config.listener.v3.Listener
164+
- name: semantic-router
165+
operation:
166+
op: add
167+
path: ''
168+
value:
169+
connect_timeout: 60s
170+
http2_protocol_options: {}
171+
lb_policy: ROUND_ROBIN
172+
load_assignment:
173+
cluster_name: semantic-router
174+
endpoints:
175+
- lb_endpoints:
176+
- endpoint:
177+
address:
178+
socket_address:
179+
address: semantic-router.vllm-semantic-router-system.svc.cluster.local
180+
port_value: 50051
181+
name: semantic-router
182+
type: STRICT_DNS
183+
type: type.googleapis.com/envoy.config.cluster.v3.Cluster
184+
targetRef:
185+
group: gateway.networking.k8s.io
186+
kind: Gateway
187+
name: semantic-router
188+
type: JSONPatch

e2e/profiles/ai-gateway/profile.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,6 @@ func (p *Profile) GetTestCases() []string {
124124
"plugin-chain-execution", // Plugin ordering and blocking
125125
"rule-condition-logic", // AND/OR operators
126126
"decision-fallback-behavior", // Fallback to default
127-
"keyword-routing", // Keyword-based decisions
128127
"plugin-config-variations", // Plugin configuration testing
129128

130129
// Load tests

e2e/profiles/aibrix/profile.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,6 @@ func (p *Profile) GetTestCases() []string {
184184
"plugin-chain-execution", // Plugin ordering and blocking
185185
"rule-condition-logic", // AND/OR operators
186186
"decision-fallback-behavior", // Fallback to default
187-
"keyword-routing", // Keyword-based decisions
188187
"plugin-config-variations", // Plugin configuration testing
189188

190189
// Load tests

e2e/profiles/dynamic-config/profile.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ func (p *Profile) GetTestCases() []string {
121121
"plugin-chain-execution", // Plugin ordering and blocking
122122
"rule-condition-logic", // AND/OR operators
123123
"decision-fallback-behavior", // Fallback to default
124-
"keyword-routing", // Keyword-based decisions
125124
"plugin-config-variations", // Plugin configuration testing
126125

127126
// Load tests

e2e/profiles/routing-strategies/profile.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,12 +202,12 @@ func (p *Profile) deployEnvoyAIGateway(ctx context.Context, deployer *helm.Deplo
202202

203203
func (p *Profile) deployGatewayResources(ctx context.Context, opts *framework.SetupOptions) error {
204204
// Apply base model
205-
if err := p.kubectlApply(ctx, opts.KubeConfig, "deploy/kubernetes/ai-gateway/aigw-resources/base-model.yaml"); err != nil {
205+
if err := p.kubectlApply(ctx, opts.KubeConfig, "deploy/kubernetes/routing-strategies/aigw-resources/base-model.yaml"); err != nil {
206206
return fmt.Errorf("failed to apply base model: %w", err)
207207
}
208208

209209
// Apply gateway API resources
210-
if err := p.kubectlApply(ctx, opts.KubeConfig, "deploy/kubernetes/ai-gateway/aigw-resources/gwapi-resources.yaml"); err != nil {
210+
if err := p.kubectlApply(ctx, opts.KubeConfig, "deploy/kubernetes/routing-strategies/aigw-resources/gwapi-resources.yaml"); err != nil {
211211
return fmt.Errorf("failed to apply gateway API resources: %w", err)
212212
}
213213

@@ -295,8 +295,8 @@ func (p *Profile) verifyEnvironment(ctx context.Context, opts *framework.SetupOp
295295

296296
func (p *Profile) cleanupGatewayResources(ctx context.Context, opts *framework.TeardownOptions) error {
297297
// Delete in reverse order
298-
p.kubectlDelete(ctx, opts.KubeConfig, "deploy/kubernetes/ai-gateway/aigw-resources/gwapi-resources.yaml")
299-
p.kubectlDelete(ctx, opts.KubeConfig, "deploy/kubernetes/ai-gateway/aigw-resources/base-model.yaml")
298+
p.kubectlDelete(ctx, opts.KubeConfig, "deploy/kubernetes/routing-strategies/aigw-resources/gwapi-resources.yaml")
299+
p.kubectlDelete(ctx, opts.KubeConfig, "deploy/kubernetes/routing-strategies/aigw-resources/base-model.yaml")
300300
return nil
301301
}
302302

test-report.json

Lines changed: 0 additions & 26 deletions
This file was deleted.

test-report.md

Lines changed: 0 additions & 41 deletions
This file was deleted.

0 commit comments

Comments
 (0)