Skip to content

Commit 6eae511

Browse files
committed
feat: add a new dir called testdata in test/e2e
Signed-off-by: Wangzy <[email protected]>
1 parent bb67eec commit 6eae511

30 files changed

+1435
-0
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
apiVersion: gateway.networking.k8s.io/v1
2+
kind: Gateway
3+
metadata:
4+
name: kthena-gateway
5+
namespace: kthena-system
6+
spec:
7+
gatewayClassName: kthena-router
8+
listeners:
9+
- name: http
10+
port: 8081
11+
protocol: HTTP
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: gateway.networking.k8s.io/v1
2+
kind: HTTPRoute
3+
metadata:
4+
name: llm-route
5+
spec:
6+
parentRefs:
7+
- group: gateway.networking.k8s.io
8+
kind: Gateway
9+
name: default
10+
namespace: kthena-system
11+
rules:
12+
- backendRefs:
13+
- group: inference.networking.k8s.io
14+
kind: InferencePool
15+
name: deepseek-r1-1-5b
16+
matches:
17+
- path:
18+
type: PathPrefix
19+
value: /
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
apiVersion: inference.networking.k8s.io/v1
2+
kind: InferencePool
3+
metadata:
4+
name: deepseek-r1-1-5b
5+
spec:
6+
targetPorts:
7+
- number: 8000
8+
selector:
9+
matchLabels:
10+
app: deepseek-r1-1-5b
11+
# Kthena Router natively supports Gateway Inference Extension and does not require the Endpoint Picker Extension.
12+
# It's just a placeholder for API validation.
13+
endpointPickerRef:
14+
name: deepseek-r1-1-5b
15+
port:
16+
number: 8080
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# This example shows how to deploy a DS1.5B model server.
2+
# The DS1.5B server will provide inference services for the DS1.5B model.
3+
#
4+
# NOTE: Update the image to the correct DS1.5B model image once it's available.
5+
6+
apiVersion: apps/v1
7+
kind: Deployment
8+
metadata:
9+
name: deepseek-r1-1-5b-v1
10+
spec:
11+
replicas: 1
12+
selector:
13+
matchLabels:
14+
app: deepseek-r1-1-5b
15+
version: v1
16+
template:
17+
metadata:
18+
labels:
19+
app: deepseek-r1-1-5b
20+
version: v1
21+
spec:
22+
containers:
23+
- name: llm-engine
24+
image: ghcr.io/yaozengzeng/vllm-mock:latest
25+
imagePullPolicy: IfNotPresent
26+
env:
27+
# specify the model name to mock
28+
- name: MODEL_NAME
29+
value: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B-v1"
30+
command:
31+
- python3
32+
- app.py
33+
---
34+
apiVersion: apps/v1
35+
kind: Deployment
36+
metadata:
37+
name: deepseek-r1-1-5b-v2
38+
spec:
39+
replicas: 1
40+
selector:
41+
matchLabels:
42+
app: deepseek-r1-1-5b
43+
version: v2
44+
template:
45+
metadata:
46+
labels:
47+
app: deepseek-r1-1-5b
48+
version: v2
49+
spec:
50+
containers:
51+
- name: llm-engine
52+
image: ghcr.io/yaozengzeng/vllm-mock:latest
53+
imagePullPolicy: IfNotPresent
54+
env:
55+
# specify the model name to mock
56+
- name: MODEL_NAME
57+
value: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B-v2"
58+
command:
59+
- python3
60+
- app.py
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# This example shows how to deploy a DS1.5B model server.
2+
# The DS1.5B server will provide inference services for the DS1.5B model.
3+
#
4+
# NOTE: Update the image to the correct DS1.5B model image once it's available.
5+
6+
apiVersion: apps/v1
7+
kind: Deployment
8+
metadata:
9+
name: deepseek-r1-1-5b
10+
spec:
11+
replicas: 3
12+
selector:
13+
matchLabels:
14+
app: deepseek-r1-1-5b
15+
template:
16+
metadata:
17+
labels:
18+
app: deepseek-r1-1-5b
19+
spec:
20+
containers:
21+
- name: llm-engine
22+
image: ghcr.io/yaozengzeng/vllm-mock:latest
23+
imagePullPolicy: IfNotPresent
24+
env:
25+
# specify the model name to mock
26+
- name: MODEL_NAME
27+
value: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
28+
command:
29+
- python3
30+
- app.py
31+
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# This example shows how to deploy a DS7B model server.
2+
# The DS7B server will provide inference services for the DS7B model.
3+
#
4+
# NOTE: Update the image to the correct DS7B model image once it's available.
5+
6+
apiVersion: apps/v1
7+
kind: Deployment
8+
metadata:
9+
name: deepseek-r1-7b
10+
spec:
11+
replicas: 3
12+
selector:
13+
matchLabels:
14+
app: deepseek-r1-7b
15+
template:
16+
metadata:
17+
labels:
18+
app: deepseek-r1-7b
19+
spec:
20+
containers:
21+
- name: llm-engine
22+
image: ghcr.io/yaozengzeng/vllm-mock:latest
23+
imagePullPolicy: IfNotPresent
24+
env:
25+
# specify the model name to mock
26+
- name: MODEL_NAME
27+
value: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
28+
command:
29+
- python3
30+
- app.py
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# This example shows how to deploy a mock vLLM server for testing.
2+
# The mock server will return a fixed response for any input.
3+
# You can use this mock server to test the inference router without deploying a real LLM server.
4+
#
5+
# NOTE: `ghcr.io/yaozengzeng/vllm-mock:latest` is built based on `https://github.com/YaoZengzeng/aibrix/tree/vllm-mock`.
6+
# Move the image to kthena registry once it's public.
7+
8+
apiVersion: apps/v1
9+
kind: Deployment
10+
metadata:
11+
name: vllm-llama3-8b-instruct
12+
spec:
13+
replicas: 3
14+
selector:
15+
matchLabels:
16+
app: vllm-llama3-8b-instruct
17+
template:
18+
metadata:
19+
labels:
20+
app: vllm-llama3-8b-instruct
21+
spec:
22+
containers:
23+
- name: llm-engine
24+
image: ghcr.io/yaozengzeng/vllm-mock:latest
25+
imagePullPolicy: IfNotPresent
26+
env:
27+
# specify the model name to mock
28+
- name: MODEL_NAME
29+
value: "meta-llama/Llama-3.1-8B-Instruct"
30+
command:
31+
- python3
32+
- app.py
33+
---
34+
apiVersion: networking.serving.volcano.sh/v1alpha1
35+
kind: ModelRoute
36+
metadata:
37+
name: llama
38+
spec:
39+
modelName: "Llama-3.1"
40+
rules:
41+
- name: "default"
42+
targetModels:
43+
- modelServerName: "llama"
44+
---
45+
apiVersion: networking.serving.volcano.sh/v1alpha1
46+
kind: ModelServer
47+
metadata:
48+
name: llama
49+
spec:
50+
workloadSelector:
51+
matchLabels:
52+
app: vllm-llama3-8b-instruct
53+
workloadPort:
54+
port: 8000
55+
model: "meta-llama/Llama-3.1-8B-Instruct"
56+
inferenceEngine: "vLLM"
57+
trafficPolicy:
58+
timeout: 10s
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
apiVersion: networking.serving.volcano.sh/v1alpha1
2+
kind: ModelRoute
3+
metadata:
4+
name: deepseek-binding-gateway
5+
namespace: default
6+
spec:
7+
modelName: "deepseek-binding-gateway"
8+
parentRefs:
9+
- name: "default"
10+
namespace: "kthena-system"
11+
kind: "Gateway"
12+
rules:
13+
- name: "default"
14+
targetModels:
15+
- modelServerName: "deepseek-r1-1-5b"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
apiVersion: networking.serving.volcano.sh/v1alpha1
2+
kind: ModelRoute
3+
metadata:
4+
name: deepseek-v2
5+
namespace: dev
6+
spec:
7+
modelName: "deepseek-ai/DeepSeekV2"
8+
rules:
9+
- name: "default"
10+
targetModels:
11+
- modelServerName: "deepseek-v2"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: networking.serving.volcano.sh/v1alpha1
2+
kind: ModelRoute
3+
metadata:
4+
name: deepseek-lora
5+
namespace: default
6+
spec:
7+
loraAdapters:
8+
- "lora-A"
9+
- "lora-B"
10+
rules:
11+
- name: "lora-route"
12+
targetModels:
13+
- modelServerName: "deepseek-r1-1-5b"

0 commit comments

Comments
 (0)