Skip to content

Commit 3edc18f

Browse files
Fix lora operator compatibility with other cr
Signed-off-by: Rui Zhang <[email protected]>
1 parent 1d3d70f commit 3edc18f

10 files changed

+182
-23
lines changed

operator/config/manager/deployment.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,12 @@ spec:
8686
requests:
8787
cpu: 10m
8888
memory: 64Mi
89-
volumeMounts: []
90-
volumes: []
89+
volumeMounts:
90+
- name: shared-pvc-storage
91+
mountPath: /data/shared-pvc-storage
92+
volumes:
93+
- name: shared-pvc-storage
94+
persistentVolumeClaim:
95+
claimName: production-stack-shared-pvc-storage-claim
9196
serviceAccountName: production-stack-controller-manager
9297
terminationGracePeriodSeconds: 10
Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
11
resources:
2-
- namespace.yaml
3-
- deployment.yaml
2+
- namespace.yaml
3+
- deployment.yaml
4+
- pvc.yaml
5+
apiVersion: kustomize.config.k8s.io/v1beta1
6+
kind: Kustomization
7+
images:
8+
- name: controller
9+
newName: controller
10+
newTag: latest

operator/config/manager/pvc.yaml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
---
2+
apiVersion: v1
3+
kind: PersistentVolume
4+
metadata:
5+
name: shared-pvc-storage
6+
labels:
7+
type: local
8+
app: production-stack
9+
component: shared-pvc-storage
10+
spec:
11+
storageClassName: ""
12+
capacity:
13+
storage: 100Gi
14+
accessModes:
15+
- ReadWriteMany
16+
hostPath:
17+
path: /data/shared-pvc-storage
18+
---
19+
apiVersion: v1
20+
kind: PersistentVolumeClaim
21+
metadata:
22+
name: shared-pvc-storage-claim
23+
namespace: production-stack-system
24+
labels:
25+
app: production-stack
26+
component: shared-pvc-storage
27+
spec:
28+
storageClassName: ""
29+
accessModes:
30+
- ReadWriteMany
31+
resources:
32+
requests:
33+
storage: 100Gi

operator/config/rbac/role.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ rules:
88
- ""
99
resources:
1010
- configmaps
11+
- persistentvolumeclaims
12+
- persistentvolumes
1113
- secrets
1214
- serviceaccounts
1315
- services
@@ -39,6 +41,18 @@ rules:
3941
- patch
4042
- update
4143
- watch
44+
- apiGroups:
45+
- networking.k8s.io
46+
resources:
47+
- ingresses
48+
verbs:
49+
- create
50+
- delete
51+
- get
52+
- list
53+
- patch
54+
- update
55+
- watch
4256
- apiGroups:
4357
- production-stack.vllm.ai
4458
resources:

operator/config/samples/production-stack_v1alpha1_loraadapter.yaml

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,17 @@ metadata:
66
app.kubernetes.io/managed-by: kustomize
77
name: loraadapter-sample
88
spec:
9-
baseModel: "llama3-8b-instr" # Use the model name with your specified model name in engineSpec
10-
# If you want to use vllm api key, uncomment the following section, you can either use secret or directly set the value
11-
# Option 1: Secret reference
9+
baseModel: "llama-3.1-8b-instruct" # Use the model name with your specified model label in vllmruntime
1210
# vllmApiKey:
1311
# secretName: "vllm-api-key"
1412
# secretKey: "VLLM_API_KEY"
15-
16-
# Option 2: Direct value
17-
# vllmApiKey:
18-
# value: "abc123"
1913
adapterSource:
20-
type: "local" # (local, huggingface, s3) for now we only support local
14+
type: "huggingface" # (local, huggingface)
2115
adapterName: "llama-3.1-nemoguard-8b-topic-control" # This will be the adapter ID
22-
adapterPath: "/data/lora-adapters/llama-3.1-nemoguard-8b-topic-control" # This will be the path to the adapter in the persistent volume
16+
repository: "nvidia/llama-3.1-nemoguard-8b-topic-control"
17+
credentialsSecretRef:
18+
name: "huggingface-credentials"
19+
key: "hf_token"
2320
loraAdapterDeploymentConfig:
2421
algorithm: "default" # for now we only support default algorithm
2522
replicas: 1 # if not specified, by default algorithm, the lora adapter will be applied to all llama3-8b models, if specified, the lora adapter will only be applied to the specified number of replicas

operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ spec:
1616
serviceDiscovery: k8s
1717

1818
# Label selector for vLLM runtime pods
19-
k8sLabelSelector: "app=vllmruntime-sample"
19+
k8sLabelSelector: "model=llama-3.1-8b-instruct"
2020

2121
# Routing strategy (roundrobin or session)
2222
routingLogic: roundrobin

operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@ metadata:
44
labels:
55
app.kubernetes.io/name: production-stack
66
app.kubernetes.io/managed-by: kustomize
7-
name: vllmruntime-sample
7+
model: "llama-3.1-8b-instruct"
8+
name: llama3
89
spec:
910
# Model configuration
1011
model:
11-
modelURL: "meta-llama/Llama-3.1-8B"
12-
enableLoRA: false
12+
modelURL: "meta-llama/Llama-3.1-8B-Instruct"
13+
enableLoRA: true
1314
enableTool: false
1415
toolCallParser: ""
1516
maxModelLen: 4096
@@ -60,7 +61,7 @@ spec:
6061
pullSecretName: ""
6162

6263
# Number of replicas
63-
replicas: 1
64+
replicas: 2
6465

6566
# Deployment strategy
6667
deploymentStrategy: "Recreate"

operator/internal/controller/loraadapter_controller.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ type LoraAdapterReconciler struct {
6363
// +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=loraadapters/finalizers,verbs=update
6464
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch
6565
// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch
66+
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
67+
// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch;create;update;patch;delete
6668

6769
// Reconcile is part of the main kubernetes reconciliation loop which aims to
6870
// move the current state of the cluster closer to the desired state.

operator/internal/controller/vllmrouter_controller.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,9 @@ func (r *VLLMRouterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
194194

195195
// deploymentForVLLMRouter returns a VLLMRouter Deployment object
196196
func (r *VLLMRouterReconciler) deploymentForVLLMRouter(router *servingv1alpha1.VLLMRouter) *appsv1.Deployment {
197-
labels := map[string]string{
198-
"app": router.Name,
197+
labels := map[string]string{"app": router.Name}
198+
for k, v := range router.Labels {
199+
labels[k] = v
199200
}
200201

201202
// Add user-defined environment variables

operator/internal/controller/vllmruntime_controller.go

Lines changed: 102 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ type VLLMRuntimeReconciler struct {
5050
// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete
5151
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete
5252
// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete
53+
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
54+
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;create;update;patch;delete
5355

5456
// Reconcile is part of the main kubernetes reconciliation loop which aims to
5557
// move the current state of the cluster closer to the desired state.
@@ -105,6 +107,40 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
105107
return ctrl.Result{Requeue: true}, nil
106108
}
107109

110+
// Check if the pv already exists, if not create a new one
111+
foundPV := &corev1.PersistentVolume{}
112+
err = r.Get(ctx, types.NamespacedName{Name: "shared-pvc-storage", Namespace: vllmRuntime.Namespace}, foundPV)
113+
if err != nil && errors.IsNotFound(err) {
114+
// Define a new pv
115+
pv := r.pvForVLLMRuntime(vllmRuntime)
116+
log.Info("Creating a new PV", "PV.Namespace", pv.Namespace, "PV.Name", pv.Name)
117+
err = r.Create(ctx, pv)
118+
if err != nil {
119+
log.Error(err, "Failed to create new PV", "PV.Namespace", pv.Namespace, "PV.Name", pv.Name)
120+
return ctrl.Result{}, err
121+
}
122+
} else if err != nil {
123+
log.Error(err, "Failed to get PV")
124+
return ctrl.Result{}, err
125+
}
126+
127+
// Check if the pvc already exists, if not create a new one
128+
foundPVC := &corev1.PersistentVolumeClaim{}
129+
err = r.Get(ctx, types.NamespacedName{Name: "shared-pvc-storage-claim", Namespace: vllmRuntime.Namespace}, foundPVC)
130+
if err != nil && errors.IsNotFound(err) {
131+
// Define a new pvc
132+
pvc := r.pvcForVLLMRuntime(vllmRuntime)
133+
log.Info("Creating a new PVC", "PVC.Namespace", pvc.Namespace, "PVC.Name", pvc.Name)
134+
err = r.Create(ctx, pvc)
135+
if err != nil {
136+
log.Error(err, "Failed to create new PVC", "PVC.Namespace", pvc.Namespace, "PVC.Name", pvc.Name)
137+
return ctrl.Result{}, err
138+
}
139+
} else if err != nil {
140+
log.Error(err, "Failed to get PVC")
141+
return ctrl.Result{}, err
142+
}
143+
108144
// Check if the deployment already exists, if not create a new one
109145
found := &appsv1.Deployment{}
110146
err = r.Get(ctx, types.NamespacedName{Name: vllmRuntime.Name, Namespace: vllmRuntime.Namespace}, found)
@@ -148,10 +184,48 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
148184
return ctrl.Result{}, nil
149185
}
150186

187+
func (r *VLLMRuntimeReconciler) pvForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *corev1.PersistentVolume {
188+
return &corev1.PersistentVolume{
189+
ObjectMeta: metav1.ObjectMeta{
190+
Name: "shared-pvc-storage",
191+
Namespace: vllmRuntime.Namespace,
192+
Labels: map[string]string{"app": vllmRuntime.Name},
193+
},
194+
Spec: corev1.PersistentVolumeSpec{
195+
AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany},
196+
StorageClassName: "",
197+
Capacity: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")},
198+
PersistentVolumeSource: corev1.PersistentVolumeSource{
199+
HostPath: &corev1.HostPathVolumeSource{
200+
Path: "/data/shared-pvc-storage",
201+
},
202+
},
203+
},
204+
}
205+
}
206+
207+
func (r *VLLMRuntimeReconciler) pvcForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *corev1.PersistentVolumeClaim {
208+
return &corev1.PersistentVolumeClaim{
209+
ObjectMeta: metav1.ObjectMeta{
210+
Name: "shared-pvc-storage-claim",
211+
Namespace: vllmRuntime.Namespace,
212+
Labels: map[string]string{"app": vllmRuntime.Name},
213+
},
214+
Spec: corev1.PersistentVolumeClaimSpec{
215+
AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany},
216+
StorageClassName: &[]string{""}[0],
217+
Resources: corev1.VolumeResourceRequirements{
218+
Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")},
219+
},
220+
},
221+
}
222+
}
223+
151224
// deploymentForVLLMRuntime returns a VLLMRuntime Deployment object
152225
func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *appsv1.Deployment {
153-
labels := map[string]string{
154-
"app": vllmRuntime.Name,
226+
labels := map[string]string{"app": vllmRuntime.Name}
227+
for k, v := range vllmRuntime.Labels {
228+
labels[k] = v
155229
}
156230

157231
// Define probes
@@ -178,7 +252,7 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
178252
Scheme: corev1.URISchemeHTTP,
179253
},
180254
},
181-
InitialDelaySeconds: 240,
255+
InitialDelaySeconds: 500,
182256
PeriodSeconds: 10,
183257
TimeoutSeconds: 3,
184258
SuccessThreshold: 1,
@@ -260,6 +334,15 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
260334
})
261335
}
262336

337+
if vllmRuntime.Spec.Model.EnableLoRA {
338+
env = append(env,
339+
corev1.EnvVar{
340+
Name: "VLLM_ALLOW_RUNTIME_LORA_UPDATING",
341+
Value: "True",
342+
},
343+
)
344+
}
345+
263346
// LM Cache configuration
264347
if vllmRuntime.Spec.LMCacheConfig.Enabled {
265348
env = append(env,
@@ -424,6 +507,22 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
424507
Resources: resources,
425508
ReadinessProbe: readinessProbe,
426509
LivenessProbe: livenessProbe,
510+
VolumeMounts: []corev1.VolumeMount{
511+
{
512+
Name: "shared-pvc-storage",
513+
MountPath: "/data/shared-pvc-storage",
514+
},
515+
},
516+
},
517+
},
518+
Volumes: []corev1.Volume{
519+
{
520+
Name: "shared-pvc-storage",
521+
VolumeSource: corev1.VolumeSource{
522+
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
523+
ClaimName: "shared-pvc-storage-claim",
524+
},
525+
},
427526
},
428527
},
429528
},

0 commit comments

Comments
 (0)