Skip to content

Commit ee01b19

Browse files
committed
✨ feat(e2e/llm-d): add LLM-D profile and test cases
Signed-off-by: samzong <[email protected]>
1 parent a149800 commit ee01b19

File tree

13 files changed

+854
-5
lines changed

13 files changed

+854
-5
lines changed

.github/workflows/integration-test-k8s.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ on:
1212
jobs:
1313
integration-test:
1414
runs-on: ubuntu-latest
15-
timeout-minutes: 60
15+
timeout-minutes: 75
1616
strategy:
1717
fail-fast: false # Continue testing other profiles even if one fails
1818
matrix:
19-
profile: [ai-gateway, aibrix]
19+
profile: [ai-gateway, aibrix, llm-d]
2020

2121
steps:
2222
- name: Check out the repo
@@ -159,4 +159,3 @@ jobs:
159159
if: always()
160160
run: |
161161
make e2e-cleanup || true
162-

e2e/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ The framework follows a **separation of concerns** design:
1616
- **aibrix**: Tests Semantic Router with vLLM AIBrix integration
1717
- **istio**: Tests Semantic Router with Istio Gateway (future)
1818
- **production-stack**: Tests vLLM Production Stack configurations (future)
19-
- **llm-d**: Tests with LLM-D (future)
19+
- **llm-d**: Tests Semantic Router with LLM-D distributed inference
2020
- **dynamo**: Tests with Nvidia Dynamo (future)
2121

2222
## Directory Structure

e2e/cmd/e2e/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ import (
1212
aigateway "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
1313
aibrix "github.com/vllm-project/semantic-router/e2e/profiles/aibrix"
1414
dynamicconfig "github.com/vllm-project/semantic-router/e2e/profiles/dynamic-config"
15+
llmd "github.com/vllm-project/semantic-router/e2e/profiles/llm-d"
1516

1617
// Import profiles to register test cases
1718
_ "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
1819
_ "github.com/vllm-project/semantic-router/e2e/profiles/aibrix"
20+
_ "github.com/vllm-project/semantic-router/e2e/profiles/llm-d"
1921
)
2022

2123
const version = "v1.0.0"
@@ -103,6 +105,8 @@ func getProfile(name string) (framework.Profile, error) {
103105
return dynamicconfig.NewProfile(), nil
104106
case "aibrix":
105107
return aibrix.NewProfile(), nil
108+
case "llm-d":
109+
return llmd.NewProfile(), nil
106110
// Add more profiles here as they are implemented
107111
// case "istio":
108112
// return istio.NewProfile(), nil
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: vllm-llama3-8b-instruct
5+
namespace: default
6+
spec:
7+
replicas: 2
8+
selector:
9+
matchLabels:
10+
app: vllm-llama3-8b-instruct
11+
template:
12+
metadata:
13+
labels:
14+
app: vllm-llama3-8b-instruct
15+
spec:
16+
containers:
17+
- name: sim
18+
image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
19+
args:
20+
- --model
21+
- llama3-8b
22+
- --port
23+
- "8000"
24+
ports:
25+
- containerPort: 8000
26+
---
27+
apiVersion: v1
28+
kind: Service
29+
metadata:
30+
name: vllm-llama3-8b-instruct
31+
namespace: default
32+
labels:
33+
app: vllm-llama3-8b-instruct
34+
spec:
35+
type: ClusterIP
36+
selector:
37+
app: vllm-llama3-8b-instruct
38+
ports:
39+
- port: 8000
40+
targetPort: 8000
41+
protocol: TCP
42+
---
43+
apiVersion: apps/v1
44+
kind: Deployment
45+
metadata:
46+
name: phi4-mini
47+
namespace: default
48+
spec:
49+
replicas: 2
50+
selector:
51+
matchLabels:
52+
app: phi4-mini
53+
template:
54+
metadata:
55+
labels:
56+
app: phi4-mini
57+
spec:
58+
containers:
59+
- name: sim
60+
image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
61+
args:
62+
- --model
63+
- phi4-mini
64+
- --port
65+
- "8000"
66+
ports:
67+
- containerPort: 8000
68+
---
69+
apiVersion: v1
70+
kind: Service
71+
metadata:
72+
name: phi4-mini
73+
namespace: default
74+
labels:
75+
app: phi4-mini
76+
spec:
77+
type: ClusterIP
78+
selector:
79+
app: phi4-mini
80+
ports:
81+
- port: 8000
82+
targetPort: 8000
83+
protocol: TCP
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
apiVersion: rbac.authorization.k8s.io/v1
2+
kind: ClusterRole
3+
metadata:
4+
name: llmd-epp-access
5+
rules:
6+
- apiGroups: ["inference.networking.k8s.io", "inference.networking.x-k8s.io"]
7+
resources: ["inferencepools", "inferenceobjectives"]
8+
verbs: ["get", "list", "watch"]
9+
- apiGroups: [""]
10+
resources: ["pods"]
11+
verbs: ["get", "list", "watch"]
12+
---
13+
apiVersion: rbac.authorization.k8s.io/v1
14+
kind: ClusterRoleBinding
15+
metadata:
16+
name: llmd-epp-access-binding
17+
subjects:
18+
- kind: ServiceAccount
19+
name: vllm-llama3-8b-instruct-epp
20+
namespace: default
21+
- kind: ServiceAccount
22+
name: vllm-phi4-mini-epp
23+
namespace: default
24+
roleRef:
25+
apiGroup: rbac.authorization.k8s.io
26+
kind: ClusterRole
27+
name: llmd-epp-access

0 commit comments

Comments
 (0)