File tree Expand file tree Collapse file tree 1 file changed +86
-0
lines changed
ray-operator/config/samples Expand file tree Collapse file tree 1 file changed +86
-0
lines changed Original file line number Diff line number Diff line change
1
+ apiVersion : ray.io/v1
2
+ kind : RayService
3
+ metadata :
4
+ name : deepseek-r1
5
+ spec :
6
+ serveConfigV2 : |
7
+ applications:
8
+ - args:
9
+ llm_configs:
10
+ - model_loading_config:
11
+ model_id: "deepseek"
12
+ model_source: "deepseek-ai/DeepSeek-R1"
13
+ accelerator_type: "H100"
14
+ deployment_config:
15
+ autoscaling_config:
16
+ min_replicas: 1
17
+ max_replicas: 1
18
+ runtime_env:
19
+ env_vars:
20
+ VLLM_USE_V1: "1"
21
+ engine_kwargs:
22
+ tensor_parallel_size: 8
23
+ pipeline_parallel_size: 2
24
+ gpu_memory_utilization: 0.92
25
+ dtype: "auto"
26
+ max_num_seqs: 40
27
+ max_model_len: 16384
28
+ enable_chunked_prefill: true
29
+ enable_prefix_caching: true
30
+ trust_remote_code: true
31
+ import_path: ray.serve.llm:build_openai_app
32
+ name: llm_app
33
+ route_prefix: "/"
34
+ rayClusterConfig :
35
+ headGroupSpec :
36
+ rayStartParams :
37
+ num-gpus : " 0"
38
+ template :
39
+ spec :
40
+ containers :
41
+ - name : ray-head
42
+ # TODO(seiji): change to Ray 2.48 when it's released
43
+ # because https://github.com/ray-project/ray/pull/53815 is needed for DeepSeek
44
+ image : rayproject/ray-llm:nightly-py311-cu128
45
+ resources :
46
+ limits :
47
+ cpu : " 2"
48
+ memory : " 32Gi"
49
+ requests :
50
+ cpu : " 2"
51
+ memory : " 32Gi"
52
+ ports :
53
+ - containerPort : 6379
54
+ name : gcs-server
55
+ - containerPort : 8265
56
+ name : dashboard
57
+ - containerPort : 10001
58
+ name : client
59
+ - containerPort : 8000
60
+ name : serve
61
+ workerGroupSpecs :
62
+ - replicas : 2
63
+ minReplicas : 2
64
+ maxReplicas : 2
65
+ groupName : gpu-group
66
+ rayStartParams : {}
67
+ template :
68
+ spec :
69
+ containers :
70
+ - name : ray-worker
71
+ # TODO(seiji): change to Ray 2.48 when it's released
72
+ # because https://github.com/ray-project/ray/pull/53815 is needed for DeepSeek
73
+ image : rayproject/ray-llm:nightly-py311-cu128
74
+ resources :
75
+ limits :
76
+ cpu : " 24"
77
+ memory : " 500Gi"
78
+ nvidia.com/gpu : " 8"
79
+ requests :
80
+ cpu : " 24"
81
+ memory : " 500Gi"
82
+ nvidia.com/gpu : " 8"
83
+ tolerations :
84
+ - key : " nvidia.com/gpu"
85
+ operator : " Exists"
86
+ effect : " NoSchedule"
You can’t perform that action at this time.
0 commit comments