Skip to content

Commit dc203e2

Browse files
authored
Add DeepSeek example RayService (#3838)
1 parent c6c9522 commit dc203e2

File tree

1 file changed

+86
-0
lines changed

1 file changed

+86
-0
lines changed
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
apiVersion: ray.io/v1
2+
kind: RayService
3+
metadata:
4+
name: deepseek-r1
5+
spec:
6+
serveConfigV2: |
7+
applications:
8+
- args:
9+
llm_configs:
10+
- model_loading_config:
11+
model_id: "deepseek"
12+
model_source: "deepseek-ai/DeepSeek-R1"
13+
accelerator_type: "H100"
14+
deployment_config:
15+
autoscaling_config:
16+
min_replicas: 1
17+
max_replicas: 1
18+
runtime_env:
19+
env_vars:
20+
VLLM_USE_V1: "1"
21+
engine_kwargs:
22+
tensor_parallel_size: 8
23+
pipeline_parallel_size: 2
24+
gpu_memory_utilization: 0.92
25+
dtype: "auto"
26+
max_num_seqs: 40
27+
max_model_len: 16384
28+
enable_chunked_prefill: true
29+
enable_prefix_caching: true
30+
trust_remote_code: true
31+
import_path: ray.serve.llm:build_openai_app
32+
name: llm_app
33+
route_prefix: "/"
34+
rayClusterConfig:
35+
headGroupSpec:
36+
rayStartParams:
37+
num-gpus: "0"
38+
template:
39+
spec:
40+
containers:
41+
- name: ray-head
42+
# TODO(seiji): change to Ray 2.48 when it's released
43+
# because https://github.com/ray-project/ray/pull/53815 is needed for DeepSeek
44+
image: rayproject/ray-llm:nightly-py311-cu128
45+
resources:
46+
limits:
47+
cpu: "2"
48+
memory: "32Gi"
49+
requests:
50+
cpu: "2"
51+
memory: "32Gi"
52+
ports:
53+
- containerPort: 6379
54+
name: gcs-server
55+
- containerPort: 8265
56+
name: dashboard
57+
- containerPort: 10001
58+
name: client
59+
- containerPort: 8000
60+
name: serve
61+
workerGroupSpecs:
62+
- replicas: 2
63+
minReplicas: 2
64+
maxReplicas: 2
65+
groupName: gpu-group
66+
rayStartParams: {}
67+
template:
68+
spec:
69+
containers:
70+
- name: ray-worker
71+
# TODO(seiji): change to Ray 2.48 when it's released
72+
# because https://github.com/ray-project/ray/pull/53815 is needed for DeepSeek
73+
image: rayproject/ray-llm:nightly-py311-cu128
74+
resources:
75+
limits:
76+
cpu: "24"
77+
memory: "500Gi"
78+
nvidia.com/gpu: "8"
79+
requests:
80+
cpu: "24"
81+
memory: "500Gi"
82+
nvidia.com/gpu: "8"
83+
tolerations:
84+
- key: "nvidia.com/gpu"
85+
operator: "Exists"
86+
effect: "NoSchedule"

0 commit comments

Comments
 (0)