Skip to content

Commit 137d03f

Browse files
committed
✨ feat(e2e/llm-d): add LLM-D profile and test cases
Signed-off-by: samzong <[email protected]>
1 parent a149800 commit 137d03f

File tree

14 files changed

+957
-5
lines changed

14 files changed

+957
-5
lines changed

.github/workflows/integration-test-k8s.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ on:
1212
jobs:
1313
integration-test:
1414
runs-on: ubuntu-latest
15-
timeout-minutes: 60
15+
timeout-minutes: 75
1616
strategy:
1717
fail-fast: false # Continue testing other profiles even if one fails
1818
matrix:
19-
profile: [ai-gateway, aibrix]
19+
profile: [ai-gateway, aibrix, llm-d]
2020

2121
steps:
2222
- name: Check out the repo
@@ -159,4 +159,3 @@ jobs:
159159
if: always()
160160
run: |
161161
make e2e-cleanup || true
162-

e2e/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ The framework follows a **separation of concerns** design:
1616
- **aibrix**: Tests Semantic Router with vLLM AIBrix integration
1717
- **istio**: Tests Semantic Router with Istio Gateway (future)
1818
- **production-stack**: Tests vLLM Production Stack configurations (future)
19-
- **llm-d**: Tests with LLM-D (future)
19+
- **llm-d**: Tests Semantic Router with LLM-D distributed inference
2020
- **dynamo**: Tests with Nvidia Dynamo (future)
2121

2222
## Directory Structure

e2e/cmd/e2e/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ import (
1212
aigateway "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
1313
aibrix "github.com/vllm-project/semantic-router/e2e/profiles/aibrix"
1414
dynamicconfig "github.com/vllm-project/semantic-router/e2e/profiles/dynamic-config"
15+
llmd "github.com/vllm-project/semantic-router/e2e/profiles/llm-d"
1516

1617
// Import profiles to register test cases
1718
_ "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway"
1819
_ "github.com/vllm-project/semantic-router/e2e/profiles/aibrix"
20+
_ "github.com/vllm-project/semantic-router/e2e/profiles/llm-d"
1921
)
2022

2123
const version = "v1.0.0"
@@ -103,6 +105,8 @@ func getProfile(name string) (framework.Profile, error) {
103105
return dynamicconfig.NewProfile(), nil
104106
case "aibrix":
105107
return aibrix.NewProfile(), nil
108+
case "llm-d":
109+
return llmd.NewProfile(), nil
106110
// Add more profiles here as they are implemented
107111
// case "istio":
108112
// return istio.NewProfile(), nil
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
apiVersion: gateway.networking.k8s.io/v1
2+
kind: HTTPRoute
3+
metadata:
4+
name: vsr-default-svc
5+
namespace: default
6+
spec:
7+
parentRefs:
8+
- group: gateway.networking.k8s.io
9+
kind: Gateway
10+
name: inference-gateway
11+
rules:
12+
- backendRefs:
13+
- name: vllm-llama3-8b-instruct
14+
port: 8000
15+
matches:
16+
- path:
17+
type: PathPrefix
18+
value: /
19+
timeouts:
20+
request: 300s
21+
---
22+
apiVersion: gateway.networking.k8s.io/v1
23+
kind: HTTPRoute
24+
metadata:
25+
name: vsr-llama8b-svc
26+
namespace: default
27+
spec:
28+
parentRefs:
29+
- group: gateway.networking.k8s.io
30+
kind: Gateway
31+
name: inference-gateway
32+
rules:
33+
- backendRefs:
34+
- name: vllm-llama3-8b-instruct
35+
port: 8000
36+
matches:
37+
- path:
38+
type: PathPrefix
39+
value: /
40+
headers:
41+
- type: Exact
42+
name: x-selected-model
43+
value: llama3-8b
44+
timeouts:
45+
request: 300s
46+
---
47+
apiVersion: gateway.networking.k8s.io/v1
48+
kind: HTTPRoute
49+
metadata:
50+
name: vsr-phi4-mini-svc
51+
namespace: default
52+
spec:
53+
parentRefs:
54+
- group: gateway.networking.k8s.io
55+
kind: Gateway
56+
name: inference-gateway
57+
rules:
58+
- backendRefs:
59+
- name: phi4-mini
60+
port: 8000
61+
matches:
62+
- path:
63+
type: PathPrefix
64+
value: /
65+
headers:
66+
- type: Exact
67+
name: x-selected-model
68+
value: phi4-mini
69+
timeouts:
70+
request: 300s
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: vllm-llama3-8b-instruct
5+
namespace: default
6+
spec:
7+
replicas: 2
8+
selector:
9+
matchLabels:
10+
app: vllm-llama3-8b-instruct
11+
template:
12+
metadata:
13+
labels:
14+
app: vllm-llama3-8b-instruct
15+
spec:
16+
containers:
17+
- name: sim
18+
image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
19+
args:
20+
- --model
21+
- llama3-8b
22+
- --port
23+
- "8000"
24+
ports:
25+
- containerPort: 8000
26+
---
27+
apiVersion: v1
28+
kind: Service
29+
metadata:
30+
name: vllm-llama3-8b-instruct
31+
namespace: default
32+
labels:
33+
app: vllm-llama3-8b-instruct
34+
spec:
35+
type: ClusterIP
36+
selector:
37+
app: vllm-llama3-8b-instruct
38+
ports:
39+
- port: 8000
40+
targetPort: 8000
41+
protocol: TCP
42+
---
43+
apiVersion: apps/v1
44+
kind: Deployment
45+
metadata:
46+
name: phi4-mini
47+
namespace: default
48+
spec:
49+
replicas: 2
50+
selector:
51+
matchLabels:
52+
app: phi4-mini
53+
template:
54+
metadata:
55+
labels:
56+
app: phi4-mini
57+
spec:
58+
containers:
59+
- name: sim
60+
image: ghcr.io/llm-d/llm-d-inference-sim:v0.6.1
61+
args:
62+
- --model
63+
- phi4-mini
64+
- --port
65+
- "8000"
66+
ports:
67+
- containerPort: 8000
68+
---
69+
apiVersion: v1
70+
kind: Service
71+
metadata:
72+
name: phi4-mini
73+
namespace: default
74+
labels:
75+
app: phi4-mini
76+
spec:
77+
type: ClusterIP
78+
selector:
79+
app: phi4-mini
80+
ports:
81+
- port: 8000
82+
targetPort: 8000
83+
protocol: TCP
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
apiVersion: rbac.authorization.k8s.io/v1
2+
kind: ClusterRole
3+
metadata:
4+
name: llmd-epp-access
5+
rules:
6+
- apiGroups: ["inference.networking.k8s.io", "inference.networking.x-k8s.io"]
7+
resources: ["inferencepools", "inferenceobjectives"]
8+
verbs: ["get", "list", "watch"]
9+
- apiGroups: [""]
10+
resources: ["pods"]
11+
verbs: ["get", "list", "watch"]
12+
---
13+
apiVersion: rbac.authorization.k8s.io/v1
14+
kind: ClusterRoleBinding
15+
metadata:
16+
name: llmd-epp-access-binding
17+
subjects:
18+
- kind: ServiceAccount
19+
name: vllm-llama3-8b-instruct-epp
20+
namespace: default
21+
- kind: ServiceAccount
22+
name: vllm-phi4-mini-epp
23+
namespace: default
24+
roleRef:
25+
apiGroup: rbac.authorization.k8s.io
26+
kind: ClusterRole
27+
name: llmd-epp-access

0 commit comments

Comments
 (0)