File tree Expand file tree Collapse file tree 1 file changed +98
-0
lines changed
ray-operator/config/samples Expand file tree Collapse file tree 1 file changed +98
-0
lines changed Original file line number Diff line number Diff line change
1
+ apiVersion : ray.io/v1
2
+ kind : RayService
3
+ metadata :
4
+ name : ray-serve-llm
5
+ spec :
6
+ serveConfigV2 : |
7
+ applications:
8
+ - name: llms
9
+ import_path: ray.serve.llm:build_openai_app
10
+ route_prefix: "/"
11
+ args:
12
+ llm_configs:
13
+ - model_loading_config:
14
+ model_id: qwen2.5-7b-instruct
15
+ model_source: Qwen/Qwen2.5-7B-Instruct
16
+ engine_kwargs:
17
+ dtype: bfloat16
18
+ max_model_len: 1024
19
+ device: auto
20
+ gpu_memory_utilization: 0.75
21
+ deployment_config:
22
+ autoscaling_config:
23
+ min_replicas: 1
24
+ max_replicas: 4
25
+ target_ongoing_requests: 64
26
+ max_ongoing_requests: 128
27
+ rayClusterConfig :
28
+ rayVersion : " 2.46.0"
29
+ headGroupSpec :
30
+ rayStartParams :
31
+ num-cpus : " 0"
32
+ num-gpus : " 0"
33
+ template :
34
+ spec :
35
+ containers :
36
+ - name : ray-head
37
+ image : rayproject/ray-llm:2.46.0-py311-cu124
38
+ ports :
39
+ - containerPort : 8000
40
+ name : serve
41
+ protocol : TCP
42
+ - containerPort : 8080
43
+ name : metrics
44
+ protocol : TCP
45
+ - containerPort : 6379
46
+ name : gcs
47
+ protocol : TCP
48
+ - containerPort : 8265
49
+ name : dashboard
50
+ protocol : TCP
51
+ - containerPort : 10001
52
+ name : client
53
+ protocol : TCP
54
+ resources :
55
+ limits :
56
+ cpu : 2
57
+ memory : 4Gi
58
+ requests :
59
+ cpu : 2
60
+ memory : 4Gi
61
+ workerGroupSpecs :
62
+ - replicas : 1
63
+ minReplicas : 1
64
+ maxReplicas : 1
65
+ numOfHosts : 1
66
+ groupName : gpu-group
67
+ rayStartParams :
68
+ num-gpus : " 4"
69
+ template :
70
+ spec :
71
+ containers :
72
+ - name : ray-worker
73
+ image : rayproject/ray-llm:2.46.0-py311-cu124
74
+ env :
75
+ - name : HUGGING_FACE_HUB_TOKEN
76
+ valueFrom :
77
+ secretKeyRef :
78
+ name : hf-token
79
+ key : hf_token
80
+ resources :
81
+ limits :
82
+ cpu : 32
83
+ memory : 32Gi
84
+ nvidia.com/gpu : " 4"
85
+ requests :
86
+ cpu : 32
87
+ memory : 32Gi
88
+ nvidia.com/gpu : " 4"
89
+
90
+ ---
91
+
92
+ apiVersion : v1
93
+ kind : Secret
94
+ metadata :
95
+ name : hf-token
96
+ type : Opaque
97
+ stringData :
98
+ hf_token : <your-hf-access-token-value>
You can’t perform that action at this time.
0 commit comments