Skip to content

Commit 37e5941

Browse files
authored
feat: add LLM-D profile for E2E testing framework (#705)
1 parent 09b12e0 commit 37e5941

File tree

9 files changed

+783
-5
lines changed

9 files changed

+783
-5
lines changed

.github/workflows/integration-test-k8s.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ on:
1818
jobs:
1919
integration-test:
2020
runs-on: ubuntu-latest
21-
timeout-minutes: 60
21+
timeout-minutes: 75
2222
strategy:
2323
fail-fast: false # Continue testing other profiles even if one fails
2424
matrix:
25-
profile: [ai-gateway, aibrix, routing-strategies]
25+
profile: [ai-gateway, aibrix, routing-strategies, llm-d]
2626

2727
steps:
2828
- name: Check out the repo
@@ -165,4 +165,3 @@ jobs:
165165
if: always()
166166
run: |
167167
make e2e-cleanup || true
168-

e2e/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ The framework follows a **separation of concerns** design:
1616
- **aibrix**: Tests Semantic Router with vLLM AIBrix integration
1717
- **istio**: Tests Semantic Router with Istio Gateway (future)
1818
- **production-stack**: Tests vLLM Production Stack configurations (future)
19-
- **llm-d**: Tests with LLM-D (future)
19+
- **llm-d**: Tests Semantic Router with LLM-D distributed inference
2020
- **dynamo**: Tests with Nvidia Dynamo (future)
2121

2222
## Directory Structure

e2e/cmd/e2e/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ import (
1212
aigateway "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
1313
aibrix "github.com/vllm-project/semantic-router/e2e/profiles/aibrix"
1414
dynamicconfig "github.com/vllm-project/semantic-router/e2e/profiles/dynamic-config"
15+
llmd "github.com/vllm-project/semantic-router/e2e/profiles/llm-d"
1516
routingstrategies "github.com/vllm-project/semantic-router/e2e/profiles/routing-strategies"
1617

1718
// Import profiles to register test cases
1819
_ "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
1920
_ "github.com/vllm-project/semantic-router/e2e/profiles/aibrix"
21+
_ "github.com/vllm-project/semantic-router/e2e/profiles/llm-d"
2022
_ "github.com/vllm-project/semantic-router/e2e/profiles/routing-strategies"
2123
)
2224

@@ -105,6 +107,8 @@ func getProfile(name string) (framework.Profile, error) {
105107
return dynamicconfig.NewProfile(), nil
106108
case "aibrix":
107109
return aibrix.NewProfile(), nil
110+
case "llm-d":
111+
return llmd.NewProfile(), nil
108112
case "routing-strategies":
109113
return routingstrategies.NewProfile(), nil
110114
// Add more profiles here as they are implemented
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
apiVersion: gateway.networking.k8s.io/v1
2+
kind: HTTPRoute
3+
metadata:
4+
name: vsr-llama8b-svc
5+
namespace: default
6+
spec:
7+
parentRefs:
8+
- group: gateway.networking.k8s.io
9+
kind: Gateway
10+
name: inference-gateway
11+
rules:
12+
- backendRefs:
13+
- group: inference.networking.k8s.io
14+
kind: InferencePool
15+
name: vllm-llama3-8b-instruct
16+
matches:
17+
- path:
18+
type: PathPrefix
19+
value: /
20+
headers:
21+
- type: Exact
22+
name: x-selected-model
23+
value: llama3-8b
24+
timeouts:
25+
request: 300s
26+
---
27+
apiVersion: gateway.networking.k8s.io/v1
28+
kind: HTTPRoute
29+
metadata:
30+
name: vsr-phi4-mini-svc
31+
namespace: default
32+
spec:
33+
parentRefs:
34+
- group: gateway.networking.k8s.io
35+
kind: Gateway
36+
name: inference-gateway
37+
rules:
38+
- backendRefs:
39+
- group: inference.networking.k8s.io
40+
kind: InferencePool
41+
name: vllm-phi4-mini
42+
matches:
43+
- path:
44+
type: PathPrefix
45+
value: /
46+
headers:
47+
- type: Exact
48+
name: x-selected-model
49+
value: phi4-mini
50+
timeouts:
51+
request: 300s
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: vllm-llama3-8b-instruct
5+
namespace: default
6+
spec:
7+
replicas: 2
8+
selector:
9+
matchLabels:
10+
app: vllm-llama3-8b-instruct
11+
template:
12+
metadata:
13+
labels:
14+
app: vllm-llama3-8b-instruct
15+
spec:
16+
containers:
17+
- name: sim
18+
image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
19+
env:
20+
- name: POD_NAME
21+
valueFrom:
22+
fieldRef:
23+
fieldPath: metadata.name
24+
- name: POD_NAMESPACE
25+
valueFrom:
26+
fieldRef:
27+
fieldPath: metadata.namespace
28+
args:
29+
- --model
30+
- llama3-8b
31+
- --port
32+
- "8000"
33+
ports:
34+
- containerPort: 8000
35+
---
36+
apiVersion: v1
37+
kind: Service
38+
metadata:
39+
name: vllm-llama3-8b-instruct
40+
namespace: default
41+
labels:
42+
app: vllm-llama3-8b-instruct
43+
spec:
44+
type: ClusterIP
45+
selector:
46+
app: vllm-llama3-8b-instruct
47+
ports:
48+
- port: 8000
49+
targetPort: 8000
50+
protocol: TCP
51+
---
52+
apiVersion: apps/v1
53+
kind: Deployment
54+
metadata:
55+
name: phi4-mini
56+
namespace: default
57+
spec:
58+
replicas: 2
59+
selector:
60+
matchLabels:
61+
app: phi4-mini
62+
template:
63+
metadata:
64+
labels:
65+
app: phi4-mini
66+
spec:
67+
containers:
68+
- name: sim
69+
image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
70+
env:
71+
- name: POD_NAME
72+
valueFrom:
73+
fieldRef:
74+
fieldPath: metadata.name
75+
- name: POD_NAMESPACE
76+
valueFrom:
77+
fieldRef:
78+
fieldPath: metadata.namespace
79+
args:
80+
- --model
81+
- phi4-mini
82+
- --port
83+
- "8000"
84+
ports:
85+
- containerPort: 8000
86+
---
87+
apiVersion: v1
88+
kind: Service
89+
metadata:
90+
name: phi4-mini
91+
namespace: default
92+
labels:
93+
app: phi4-mini
94+
spec:
95+
type: ClusterIP
96+
selector:
97+
app: phi4-mini
98+
ports:
99+
- port: 8000
100+
targetPort: 8000
101+
protocol: TCP
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
apiVersion: rbac.authorization.k8s.io/v1
2+
kind: ClusterRole
3+
metadata:
4+
name: llmd-epp-access
5+
rules:
6+
- apiGroups: ["inference.networking.k8s.io", "inference.networking.x-k8s.io"]
7+
resources: ["inferencepools", "inferenceobjectives"]
8+
verbs: ["get", "list", "watch"]
9+
- apiGroups: [""]
10+
resources: ["pods"]
11+
verbs: ["get", "list", "watch"]
12+
---
13+
apiVersion: rbac.authorization.k8s.io/v1
14+
kind: ClusterRoleBinding
15+
metadata:
16+
name: llmd-epp-access-binding
17+
subjects:
18+
- kind: ServiceAccount
19+
name: vllm-llama3-8b-instruct-epp
20+
namespace: default
21+
- kind: ServiceAccount
22+
name: vllm-phi4-mini-epp
23+
namespace: default
24+
roleRef:
25+
apiGroup: rbac.authorization.k8s.io
26+
kind: ClusterRole
27+
name: llmd-epp-access

0 commit comments

Comments
 (0)