Skip to content

Commit 983927d

Browse files
kevin85421Blaze-DSPDPatel_7eicherseiji
authored
Added Ray-Serve Config For LLMs (#3517) (#3767)
Co-authored-by: Blaze-DSP <[email protected]> Co-authored-by: DPatel_7 <[email protected]> Co-authored-by: Seiji Eicher <[email protected]>
1 parent 97664df commit 983927d

File tree

1 file changed

+98
-0
lines changed

1 file changed

+98
-0
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
apiVersion: ray.io/v1
2+
kind: RayService
3+
metadata:
4+
name: ray-serve-llm
5+
spec:
6+
serveConfigV2: |
7+
applications:
8+
- name: llms
9+
import_path: ray.serve.llm:build_openai_app
10+
route_prefix: "/"
11+
args:
12+
llm_configs:
13+
- model_loading_config:
14+
model_id: qwen2.5-7b-instruct
15+
model_source: Qwen/Qwen2.5-7B-Instruct
16+
engine_kwargs:
17+
dtype: bfloat16
18+
max_model_len: 1024
19+
device: auto
20+
gpu_memory_utilization: 0.75
21+
deployment_config:
22+
autoscaling_config:
23+
min_replicas: 1
24+
max_replicas: 4
25+
target_ongoing_requests: 64
26+
max_ongoing_requests: 128
27+
rayClusterConfig:
28+
rayVersion: "2.46.0"
29+
headGroupSpec:
30+
rayStartParams:
31+
num-cpus: "0"
32+
num-gpus: "0"
33+
template:
34+
spec:
35+
containers:
36+
- name: ray-head
37+
image: rayproject/ray-llm:2.46.0-py311-cu124
38+
ports:
39+
- containerPort: 8000
40+
name: serve
41+
protocol: TCP
42+
- containerPort: 8080
43+
name: metrics
44+
protocol: TCP
45+
- containerPort: 6379
46+
name: gcs
47+
protocol: TCP
48+
- containerPort: 8265
49+
name: dashboard
50+
protocol: TCP
51+
- containerPort: 10001
52+
name: client
53+
protocol: TCP
54+
resources:
55+
limits:
56+
cpu: 2
57+
memory: 4Gi
58+
requests:
59+
cpu: 2
60+
memory: 4Gi
61+
workerGroupSpecs:
62+
- replicas: 1
63+
minReplicas: 1
64+
maxReplicas: 1
65+
numOfHosts: 1
66+
groupName: gpu-group
67+
rayStartParams:
68+
num-gpus: "4"
69+
template:
70+
spec:
71+
containers:
72+
- name: ray-worker
73+
image: rayproject/ray-llm:2.46.0-py311-cu124
74+
env:
75+
- name: HUGGING_FACE_HUB_TOKEN
76+
valueFrom:
77+
secretKeyRef:
78+
name: hf-token
79+
key: hf_token
80+
resources:
81+
limits:
82+
cpu: 32
83+
memory: 32Gi
84+
nvidia.com/gpu: "4"
85+
requests:
86+
cpu: 32
87+
memory: 32Gi
88+
nvidia.com/gpu: "4"
89+
90+
---
91+
92+
apiVersion: v1
93+
kind: Secret
94+
metadata:
95+
name: hf-token
96+
type: Opaque
97+
stringData:
98+
hf_token: <your-hf-access-token-value>

0 commit comments

Comments
 (0)