Skip to content

Commit 5cb150e

Browse files
[FEAT] Add LoRA helm deployment (#563)
* [CI] Add prefix aware routing test Signed-off-by: Rui Zhang <[email protected]> * [ci] refactor k8s discovery e2e test Signed-off-by: Rui Zhang <[email protected]> * [CI] Refactor static discovery testing so that it can support multiple logic Signed-off-by: Rui Zhang <[email protected]> * [CI] Add static e2e test for prefixaware Signed-off-by: Rui Zhang <[email protected]> * refactor the code Signed-off-by: Rui Zhang <[email protected]> * [CI] refactor Signed-off-by: Rui Zhang <[email protected]> * [CI] Add multiple routing logic test Signed-off-by: Rui Zhang <[email protected]> * [CI] fix bug Signed-off-by: Rui Zhang <[email protected]> * add lora helm deployment Signed-off-by: Rui Zhang <[email protected]> * add lora helm deployment Signed-off-by: Rui Zhang <[email protected]> * merge two tutotials into one Signed-off-by: Rui Zhang <[email protected]> * fix pre-commit check Signed-off-by: Rui Zhang <[email protected]> * modify tutorials and modify shared-storage Signed-off-by: Rui Zhang <[email protected]> * fix github check Signed-off-by: Rui Zhang <[email protected]> --------- Signed-off-by: Rui Zhang <[email protected]> Co-authored-by: Yuhan Liu <[email protected]>
1 parent d8076b9 commit 5cb150e

13 files changed

+1261
-39
lines changed

helm/crds/crd-lora-adapter.yaml

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
apiVersion: apiextensions.k8s.io/v1
2+
kind: CustomResourceDefinition
3+
metadata:
4+
name: loraadapters.production-stack.vllm.ai
5+
annotations:
6+
controller-gen.kubebuilder.io/version: v0.17.2
7+
labels:
8+
app.kubernetes.io/name: lora-adapter-crd
9+
app.kubernetes.io/component: lora-adapter-crd
10+
spec:
11+
group: production-stack.vllm.ai
12+
names:
13+
kind: LoraAdapter
14+
listKind: LoraAdapterList
15+
plural: loraadapters
16+
singular: loraadapter
17+
scope: Namespaced
18+
versions:
19+
- additionalPrinterColumns:
20+
- jsonPath: .status.phase
21+
name: Phase
22+
type: string
23+
- jsonPath: .metadata.creationTimestamp
24+
name: Age
25+
type: date
26+
name: v1alpha1
27+
schema:
28+
openAPIV3Schema:
29+
description: LoraAdapter is the Schema for the loraadapters API.
30+
properties:
31+
apiVersion:
32+
description: |-
33+
APIVersion defines the versioned schema of this representation of an object.
34+
Servers should convert recognized schemas to the latest internal value, and
35+
may reject unrecognized values.
36+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
37+
type: string
38+
kind:
39+
description: |-
40+
Kind is a string value representing the REST resource this object represents.
41+
Servers may infer this from the endpoint the client submits requests to.
42+
Cannot be updated.
43+
In CamelCase.
44+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
45+
type: string
46+
metadata:
47+
type: object
48+
spec:
49+
description: LoraAdapterSpec defines the desired state of LoraAdapter.
50+
properties:
51+
adapterSource:
52+
description: AdapterSource defines where to get the LoRA adapter from.
53+
properties:
54+
adapterName:
55+
description: AdapterName is the name of the adapter to apply.
56+
type: string
57+
adapterPath:
58+
description: 'AdapterPath is the path to the LoRA adapter weights.
59+
For local sources: required, specifies the path to the adapter
60+
For remote sources: optional, will be updated by the controller
61+
with the download path'
62+
type: string
63+
credentialsSecretRef:
64+
description: CredentialsSecretRef references a secret containing
65+
storage credentials.
66+
properties:
67+
key:
68+
description: Key in the secret containing the value
69+
type: string
70+
name:
71+
description: Name of the secret
72+
type: string
73+
required:
74+
- key
75+
- name
76+
type: object
77+
x-kubernetes-map-type: atomic
78+
maxAdapters:
79+
description: MaxAdapters is the maximum number of adapters to load.
80+
format: int32
81+
type: integer
82+
pattern:
83+
description: Pattern is the pattern to use for the adapter name.
84+
type: string
85+
repository:
86+
description: Repository is the repository to get the LoRA adapter
87+
from.
88+
type: string
89+
type:
90+
description: Type is the type of the adapter source.
91+
enum:
92+
- local
93+
- s3
94+
- http
95+
- huggingface
96+
type: string
97+
required:
98+
- adapterName
99+
- type
100+
type: object
101+
baseModel:
102+
description: BaseModel is the name of the base model this adapter is
103+
for.
104+
type: string
105+
loraAdapterDeploymentConfig:
106+
description: DeploymentConfig defines how the adapter should be deployed
107+
properties:
108+
algorithm:
109+
default: default
110+
description: Algorithm specifies which placement algorithm to use.
111+
enum:
112+
- default
113+
- ordered
114+
- equalized
115+
type: string
116+
replicas:
117+
description: Replicas is the number of replicas that should load
118+
this adapter.
119+
format: int32
120+
minimum: 0
121+
type: integer
122+
required:
123+
- algorithm
124+
type: object
125+
vllmApiKey:
126+
description: VLLMApiKey defines the configuration for vLLM API key authentication
127+
properties:
128+
secretKey:
129+
description: Key in the secret containing the API key
130+
type: string
131+
secretName:
132+
description: Name of the secret
133+
type: string
134+
required:
135+
- secretKey
136+
- secretName
137+
type: object
138+
required:
139+
- adapterSource
140+
- baseModel
141+
type: object
142+
status:
143+
type: object
144+
description: LoraAdapterStatus defines the observed state of LoraAdapter.
145+
properties:
146+
conditions:
147+
description: Condition contains details for one aspect of the current
148+
state of this API Resource.
149+
items:
150+
description: Condition contains details for one aspect of the current
151+
state of this API Resource.
152+
properties:
153+
lastTransitionTime:
154+
description: LastTransitionTime is the last time the condition
155+
transitioned from one status to another.
156+
format: date-time
157+
type: string
158+
message:
159+
description: Message is a human-readable message indicating details
160+
about why the current state is set.
161+
maxLength: 32768
162+
type: string
163+
reason:
164+
description: Reason is a brief reason for the condition's current
165+
status.
166+
maxLength: 1024
167+
minLength: 1
168+
type: string
169+
status:
170+
description: Status is the status of the condition.
171+
enum:
172+
- "True"
173+
- "False"
174+
- Unknown
175+
type: string
176+
type:
177+
description: type of condition in CamelCase.
178+
maxLength: 316
179+
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
180+
type: string
181+
required:
182+
- lastTransitionTime
183+
- message
184+
- reason
185+
- status
186+
- type
187+
type: object
188+
type: array
189+
loadedAdapters:
190+
description: LoadedAdapters tracks the loading status of adapters and
191+
their pod assignments.
192+
items:
193+
description: LoadedAdapter represents an adapter that has been loaded
194+
into a pod
195+
properties:
196+
loadTime:
197+
description: LoadTime is when the adapter was loaded
198+
format: date-time
199+
type: string
200+
name:
201+
description: Name is the name of the adapter
202+
type: string
203+
path:
204+
description: Path is the path where the adapter is loaded
205+
type: string
206+
podAssignments:
207+
description: PodAssignments represents the pods this adapter has
208+
been assigned to
209+
properties:
210+
namespace:
211+
description: Namespace is the namespace of the pod
212+
type: string
213+
podName:
214+
description: Pod represents the pod information
215+
type: string
216+
required:
217+
- namespace
218+
- podName
219+
type: object
220+
status:
221+
description: Status is the status of the adapter
222+
type: string
223+
required:
224+
- name
225+
- path
226+
- podAssignments
227+
- status
228+
type: object
229+
type: array
230+
message:
231+
description: Message provides additional information about the current
232+
phase.
233+
type: string
234+
observedGeneration:
235+
description: ObservedGeneration represents the .metadata.generation
236+
that the condition was set based upon.
237+
format: int64
238+
minimum: 0
239+
type: integer
240+
phase:
241+
description: Phase represents the current phase of the adapter deployment.
242+
type: string
243+
type: object
244+
served: true
245+
storage: true
246+
subresources:
247+
status: {}
248+
status:
249+
acceptedNames:
250+
kind: ""
251+
plural: ""
252+
conditions: []
253+
storedVersions: []
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
{{- if .Values.loraAdapters }}
2+
{{- range .Values.loraAdapters }}
3+
---
4+
apiVersion: production-stack.vllm.ai/v1alpha1
5+
kind: LoraAdapter
6+
metadata:
7+
name: {{ .name }}
8+
namespace: {{ $.Release.Namespace }}
9+
labels:
10+
app.kubernetes.io/component: lora-adapter
11+
helm-release-name: {{ $.Release.Name }}
12+
spec:
13+
baseModel: {{ .baseModel | quote }}
14+
{{- if and (not .vllmApiKey) ($.Values.servingEngineSpec.vllmApiKey) }}
15+
vllmApiKey:
16+
secretName: {{ $.Release.Name }}-secrets
17+
secretKey: vllmApiKey
18+
{{- else if .vllmApiKey }}
19+
vllmApiKey:
20+
secretName: {{ .vllmApiKey.secretName | quote }}
21+
secretKey: {{ .vllmApiKey.secretKey | quote }}
22+
{{- end }}
23+
adapterSource:
24+
type: {{ .adapterSource.type | quote }}
25+
adapterName: {{ .adapterSource.adapterName | quote }}
26+
{{- if .adapterSource.adapterPath }}
27+
adapterPath: {{ .adapterSource.adapterPath | quote }}
28+
{{- end }}
29+
{{- if .adapterSource.repository }}
30+
repository: {{ .adapterSource.repository | quote }}
31+
{{- end }}
32+
{{- if .adapterSource.pattern }}
33+
pattern: {{ .adapterSource.pattern | quote }}
34+
{{- end }}
35+
{{- if .adapterSource.maxAdapters }}
36+
maxAdapters: {{ .adapterSource.maxAdapters }}
37+
{{- end }}
38+
{{- if .adapterSource.credentials }}
39+
{{- if (kindIs "string" .adapterSource.credentials) }}
40+
credentialsSecretRef:
41+
name: {{ $.Release.Name }}-secrets
42+
key: lora_adapter_credentials_{{ .adapterSource.adapterName }}
43+
{{- else }}
44+
credentialsSecretRef:
45+
name: {{ .adapterSource.credentials.secretName | quote }}
46+
key: {{ .adapterSource.credentials.secretKey | quote }}
47+
{{- end }}
48+
{{- end }}
49+
loraAdapterDeploymentConfig:
50+
algorithm: {{ .loraAdapterDeploymentConfig.algorithm | default "default" | quote }}
51+
{{- if .loraAdapterDeploymentConfig.replicas }}
52+
replicas: {{ .loraAdapterDeploymentConfig.replicas }}
53+
{{- end }}
54+
{{- end }}
55+
{{- end }}

0 commit comments

Comments
 (0)