Skip to content
This repository was archived by the owner on Oct 15, 2025. It is now read-only.

Commit 4c7de24

Browse files
committed
add runai_streamer support
1 parent c9e16e9 commit 4c7de24

File tree

12 files changed

+676
-7
lines changed

12 files changed

+676
-7
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
test:
2+
enabled: true
3+
4+
sampleApplication:
5+
enabled: false
6+
7+
redis:
8+
master:
9+
persistence:
10+
enabled: false
11+
12+
modelservice:
13+
metrics:
14+
enabled: false
15+
vllm:
16+
# Test loadFormat configuration
17+
loadFormat: "runai_streamer"
18+
# Test runai_streamer specific configurations
19+
runaiStreamer:
20+
concurrency: 32
21+
chunkBytesize: "4194304" # 4 MiB
22+
memoryLimit: 1073741824 # 1 GiB
23+
pattern: "custom-model-rank-{rank}-part-{part}.safetensors"
24+
s3:
25+
endpointUrl: "https://test-s3.example.com"
26+
caBundlePath: "/etc/ssl/certs/ca-bundle.crt"
27+
useVirtualAddressing: false
28+
# Test extra args and env vars
29+
extraArgs:
30+
- "--custom-arg1"
31+
- "value1"
32+
- "--custom-arg2"
33+
extraEnvVars:
34+
- name: TEST_ENV_VAR
35+
value: "test-value"
36+
- name: ANOTHER_TEST_VAR
37+
value: "another-value"
38+
epp:
39+
defaultEnvVarsOverride:
40+
- name: PD_ENABLED
41+
value: 'false'
42+
- name: ENABLE_KVCACHE_AWARE_SCORER
43+
value: "false"
44+
prefill:
45+
tolerations: []
46+
decode:
47+
tolerations: []

charts/llm-d/templates/modelservice/_helpers.tpl

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,51 @@ Return the proper Docker Image Registry Secret Names
110110
value: {{ $v }}
111111
{{- end }}
112112
{{- end }}
113+
114+
{{/*
115+
Return the RunAI Streamer environment variables when loadFormat is runai_streamer
116+
*/}}
117+
{{- define "modelservice.runaiStreamer.envVars" -}}
118+
{{- if or (eq .Values.modelservice.vllm.loadFormat "runai_streamer") (eq .Values.modelservice.vllm.loadFormat "runai_streamer_sharded") }}
119+
- name: RUNAI_STREAMER_CONCURRENCY
120+
value: {{ .Values.modelservice.vllm.runaiStreamer.concurrency | quote }}
121+
{{- if .Values.modelservice.vllm.runaiStreamer.chunkBytesize }}
122+
- name: RUNAI_STREAMER_CHUNK_BYTESIZE
123+
value: {{ .Values.modelservice.vllm.runaiStreamer.chunkBytesize | quote }}
124+
{{- end }}
125+
- name: RUNAI_STREAMER_MEMORY_LIMIT
126+
value: {{ .Values.modelservice.vllm.runaiStreamer.memoryLimit | quote }}
127+
{{- if .Values.modelservice.vllm.runaiStreamer.s3.endpointUrl }}
128+
- name: AWS_ENDPOINT_URL
129+
value: {{ .Values.modelservice.vllm.runaiStreamer.s3.endpointUrl | quote }}
130+
{{- end }}
131+
{{- if .Values.modelservice.vllm.runaiStreamer.s3.caBundlePath }}
132+
- name: AWS_CA_BUNDLE
133+
value: {{ .Values.modelservice.vllm.runaiStreamer.s3.caBundlePath | quote }}
134+
{{- end }}
135+
- name: RUNAI_STREAMER_S3_USE_VIRTUAL_ADDRESSING
136+
value: {{ .Values.modelservice.vllm.runaiStreamer.s3.useVirtualAddressing | ternary "1" "0" }}
137+
{{- end }}
138+
{{- end }}
139+
140+
{{/*
141+
Return the RunAI Streamer extra config args for model-loader-extra-config
142+
*/}}
143+
{{- define "modelservice.runaiStreamer.extraConfigArgs" -}}
144+
{{- if or (eq .Values.modelservice.vllm.loadFormat "runai_streamer") (eq .Values.modelservice.vllm.loadFormat "runai_streamer_sharded") }}
145+
{{- $config := dict }}
146+
{{- if .Values.modelservice.vllm.runaiStreamer.concurrency }}
147+
{{- $_ := set $config "concurrency" .Values.modelservice.vllm.runaiStreamer.concurrency }}
148+
{{- end }}
149+
{{- if .Values.modelservice.vllm.runaiStreamer.memoryLimit }}
150+
{{- $_ := set $config "memory_limit" .Values.modelservice.vllm.runaiStreamer.memoryLimit }}
151+
{{- end }}
152+
{{- if .Values.modelservice.vllm.runaiStreamer.pattern }}
153+
{{- $_ := set $config "pattern" .Values.modelservice.vllm.runaiStreamer.pattern }}
154+
{{- end }}
155+
{{- if $config }}
156+
- "--model-loader-extra-config"
157+
- {{ $config | toJson | quote }}
158+
{{- end }}
159+
{{- end }}
160+
{{- end }}

charts/llm-d/templates/modelservice/presets/basic-gpu-preset.yaml

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,18 @@ data:
7070
command:
7171
- vllm
7272
- serve
73-
- {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
73+
- {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
7474
args:
7575
- "--port"
7676
- "8001"
77+
{{- if .Values.modelservice.vllm.loadFormat }}
78+
- "--load-format"
79+
- {{ .Values.modelservice.vllm.loadFormat | quote }}
80+
{{- end }}
81+
{{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
82+
{{- range .Values.modelservice.vllm.extraArgs }}
83+
- {{ . | quote }}
84+
{{- end }}
7785
env:
7886
- name: HOME
7987
value: /home
@@ -87,6 +95,10 @@ data:
8795
- name: HF_HUB_CACHE
8896
value: /models
8997
{{ `{{- end }}` }}
98+
{{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
99+
{{- range .Values.modelservice.vllm.extraEnvVars }}
100+
{{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
101+
{{- end }}
90102
volumeMounts:
91103
- name: home
92104
mountPath: /home
@@ -149,10 +161,18 @@ data:
149161
command:
150162
- vllm
151163
- serve
152-
- {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
164+
- {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
153165
args:
154166
- "--port"
155167
- "8000"
168+
{{- if .Values.modelservice.vllm.loadFormat }}
169+
- "--load-format"
170+
- {{ .Values.modelservice.vllm.loadFormat | quote }}
171+
{{- end }}
172+
{{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
173+
{{- range .Values.modelservice.vllm.extraArgs }}
174+
- {{ . | quote }}
175+
{{- end }}
156176
env:
157177
- name: HOME
158178
value: /home
@@ -166,6 +186,10 @@ data:
166186
- name: HF_HUB_CACHE
167187
value: /models
168188
{{ `{{- end }}` }}
189+
{{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
190+
{{- range .Values.modelservice.vllm.extraEnvVars }}
191+
{{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
192+
{{- end }}
169193
volumeMounts:
170194
- name: home
171195
mountPath: /home

charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-and-redis-lookup-preset.yaml

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,20 @@ data:
7171
command:
7272
- vllm
7373
- serve
74-
- {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
74+
- {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
7575
args:
7676
- "--port"
7777
- "8001"
7878
- "--kv-transfer-config"
7979
- '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"NixlConnector","kv_role":"kv_both"},{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}]}}'
80+
{{- if .Values.modelservice.vllm.loadFormat }}
81+
- "--load-format"
82+
- {{ .Values.modelservice.vllm.loadFormat | quote }}
83+
{{- end }}
84+
{{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
85+
{{- range .Values.modelservice.vllm.extraArgs }}
86+
- {{ . | quote }}
87+
{{- end }}
8088
env:
8189
- name: HOME
8290
value: /home
@@ -119,6 +127,10 @@ data:
119127
- name: HF_HUB_CACHE
120128
value: /models
121129
{{ `{{- end }}` }}
130+
{{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
131+
{{- range .Values.modelservice.vllm.extraEnvVars }}
132+
{{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
133+
{{- end }}
122134
volumeMounts:
123135
- name: home
124136
mountPath: /home
@@ -186,12 +198,20 @@ data:
186198
command:
187199
- vllm
188200
- serve
189-
- {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
201+
- {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
190202
args:
191203
- "--port"
192204
- "8000"
193205
- "--kv-transfer-config"
194206
- '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"NixlConnector","kv_role":"kv_both"},{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}]}}'
207+
{{- if .Values.modelservice.vllm.loadFormat }}
208+
- "--load-format"
209+
- {{ .Values.modelservice.vllm.loadFormat | quote }}
210+
{{- end }}
211+
{{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
212+
{{- range .Values.modelservice.vllm.extraArgs }}
213+
- {{ . | quote }}
214+
{{- end }}
195215
env:
196216
- name: HOME
197217
value: /home
@@ -234,6 +254,10 @@ data:
234254
- name: HF_HUB_CACHE
235255
value: /models
236256
{{ `{{- end }}` }}
257+
{{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
258+
{{- range .Values.modelservice.vllm.extraEnvVars }}
259+
{{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
260+
{{- end }}
237261
volumeMounts:
238262
- name: home
239263
mountPath: /home

charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,20 @@ data:
7171
command:
7272
- vllm
7373
- serve
74-
- {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
74+
- {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
7575
args:
7676
- "--port"
7777
- "8001"
7878
- "--kv-transfer-config"
7979
- '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
80+
{{- if .Values.modelservice.vllm.loadFormat }}
81+
- "--load-format"
82+
- {{ .Values.modelservice.vllm.loadFormat | quote }}
83+
{{- end }}
84+
{{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
85+
{{- range .Values.modelservice.vllm.extraArgs }}
86+
- {{ . | quote }}
87+
{{- end }}
8088
env:
8189
- name: HOME
8290
value: /home
@@ -107,6 +115,10 @@ data:
107115
- name: HF_HUB_CACHE
108116
value: /models
109117
{{ `{{- end }}` }}
118+
{{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
119+
{{- range .Values.modelservice.vllm.extraEnvVars }}
120+
{{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
121+
{{- end }}
110122
volumeMounts:
111123
- name: home
112124
mountPath: /home
@@ -172,12 +184,20 @@ data:
172184
command:
173185
- vllm
174186
- serve
175-
- {{ `{{ default (print "/models/" .ModelPath) .HFModelName }}` }}
187+
- {{ `{{ if and (ne .LoadFormat "") (eq .LoadFormat "runai_streamer") }}{{ .ModelArtifactURI }}{{ else }}{{ default (print "/models/" .ModelPath) .HFModelName }}{{ end }}` }}
176188
args:
177189
- "--port"
178190
- "8000"
179191
- "--kv-transfer-config"
180192
- '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
193+
{{- if .Values.modelservice.vllm.loadFormat }}
194+
- "--load-format"
195+
- {{ .Values.modelservice.vllm.loadFormat | quote }}
196+
{{- end }}
197+
{{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 16 }}
198+
{{- range .Values.modelservice.vllm.extraArgs }}
199+
- {{ . | quote }}
200+
{{- end }}
181201
env:
182202
- name: HOME
183203
value: /home
@@ -208,6 +228,10 @@ data:
208228
- name: HF_HUB_CACHE
209229
value: /models
210230
{{ `{{- end }}` }}
231+
{{- include "modelservice.runaiStreamer.envVars" . | nindent 16 }}
232+
{{- range .Values.modelservice.vllm.extraEnvVars }}
233+
{{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 16 }}
234+
{{- end }}
211235
volumeMounts:
212236
- name: home
213237
mountPath: /home

charts/llm-d/templates/sample-application/_helpers.tpl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,10 @@ Define the type of the modelArtifactURI
3535
pvc
3636
{{- else if hasPrefix "hf://" .Values.sampleApplication.model.modelArtifactURI -}}
3737
hf
38+
{{- else if eq .Values.modelservice.vllm.loadFormat "runai_streamer" -}}
39+
objectstorage
3840
{{- else }}
39-
{{- fail "Values.sampleApplication.model.modelArtifactURI supports hf:// and pvc://" }}
41+
{{- fail "Values.sampleApplication.model.modelArtifactURI supports hf:// and pvc://. For other protocols (like s3://), set modelservice.vllm.loadFormat to 'runai_streamer'" }}
4042
{{- end }}
4143
{{- end }}
4244

charts/llm-d/templates/sample-application/modelservice.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ spec:
2727
args:
2828
- "--served-model-name"
2929
- {{ include "sampleApplication.servedModelNames" .}}
30+
{{- if .Values.modelservice.vllm.loadFormat }}
31+
- "--load-format"
32+
- {{ .Values.modelservice.vllm.loadFormat | quote }}
33+
{{- end }}
34+
{{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 6 }}
35+
{{- range .Values.modelservice.vllm.extraArgs }}
36+
- {{ . | quote }}
37+
{{- end }}
3038
{{- range .Values.sampleApplication.decode.extraArgs }}
3139
- {{ include "common.tplvalues.render" ( dict "value" . "context" $) | quote }}
3240
{{- end }}
@@ -39,13 +47,25 @@ spec:
3947
name: {{ .Values.sampleApplication.model.auth.hfToken.name }}
4048
key: {{ .Values.sampleApplication.model.auth.hfToken.key }}
4149
{{- end }}
50+
{{- include "modelservice.runaiStreamer.envVars" . | nindent 6 }}
51+
{{- range .Values.modelservice.vllm.extraEnvVars }}
52+
{{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 6 }}
53+
{{- end }}
4254
prefill:
4355
replicas: {{ .Values.sampleApplication.prefill.replicas }}
4456
containers:
4557
- name: "vllm"
4658
args:
4759
- "--served-model-name"
4860
- {{ include "sampleApplication.servedModelNames" .}}
61+
{{- if .Values.modelservice.vllm.loadFormat }}
62+
- "--load-format"
63+
- {{ .Values.modelservice.vllm.loadFormat | quote }}
64+
{{- end }}
65+
{{- include "modelservice.runaiStreamer.extraConfigArgs" . | nindent 6 }}
66+
{{- range .Values.modelservice.vllm.extraArgs }}
67+
- {{ . | quote }}
68+
{{- end }}
4969
{{- range .Values.sampleApplication.prefill.extraArgs }}
5070
- {{ include "common.tplvalues.render" ( dict "value" . "context" $) | quote }}
5171
{{- end }}
@@ -58,6 +78,10 @@ spec:
5878
name: {{ .Values.sampleApplication.model.auth.hfToken.name }}
5979
key: {{ .Values.sampleApplication.model.auth.hfToken.key }}
6080
{{- end }}
81+
{{- include "modelservice.runaiStreamer.envVars" . | nindent 6 }}
82+
{{- range .Values.modelservice.vllm.extraEnvVars }}
83+
{{- include "common.tplvalues.render" ( dict "value" . "context" $) | nindent 6 }}
84+
{{- end }}
6185
endpointPicker:
6286
containers:
6387
- name: epp

0 commit comments

Comments
 (0)