Fix lora operator compatibility with other cr

zerofishnoodles · zerofishnoodles · commit 3edc18f773db · 2025-07-23T17:09:29.000Z
Signed-off-by: Rui Zhang &lt;zrfishnoodles@gmail.com&gt;
diff --git a/operator/config/manager/deployment.yaml b/operator/config/manager/deployment.yaml
@@ -86,7 +86,12 @@ spec:
             requests:
               cpu: 10m
               memory: 64Mi
-          volumeMounts: []
-      volumes: []
+          volumeMounts:
+            - name: shared-pvc-storage
+              mountPath: /data/shared-pvc-storage
+      volumes:
+        - name: shared-pvc-storage
+          persistentVolumeClaim:
+            claimName: production-stack-shared-pvc-storage-claim
       serviceAccountName: production-stack-controller-manager
       terminationGracePeriodSeconds: 10
diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml
@@ -1,3 +1,10 @@
 resources:
-  - namespace.yaml
-  - deployment.yaml
+- namespace.yaml
+- deployment.yaml
+- pvc.yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+images:
+- name: controller
+  newName: controller
+  newTag: latest
diff --git a/operator/config/manager/pvc.yaml b/operator/config/manager/pvc.yaml
@@ -0,0 +1,33 @@
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: shared-pvc-storage
+  labels:
+    type: local
+    app: production-stack
+    component: shared-pvc-storage
+spec:
+  storageClassName: ""
+  capacity:
+    storage: 100Gi
+  accessModes:
+    - ReadWriteMany
+  hostPath:
+    path: /data/shared-pvc-storage
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: shared-pvc-storage-claim
+  namespace: production-stack-system
+  labels:
+    app: production-stack
+    component: shared-pvc-storage
+spec:
+  storageClassName: ""
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: 100Gi
diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml
@@ -8,6 +8,8 @@ rules:
   - ""
   resources:
   - configmaps
+  - persistentvolumeclaims
+  - persistentvolumes
   - secrets
   - serviceaccounts
   - services
@@ -39,6 +41,18 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - networking.k8s.io
+  resources:
+  - ingresses
+  verbs:
+  - create
+  - delete
+  - get
+  - list
+  - patch
+  - update
+  - watch
 - apiGroups:
   - production-stack.vllm.ai
   resources:
diff --git a/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml b/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml
@@ -6,20 +6,17 @@ metadata:
     app.kubernetes.io/managed-by: kustomize
   name: loraadapter-sample
 spec:
-  baseModel: "llama3-8b-instr" # Use the model name with your specified model name in engineSpec
-  # If you want to use vllm api key, uncomment the following section, you can either use secret or directly set the value
-  # Option 1: Secret reference
+  baseModel: "llama-3.1-8b-instruct" # Use the model name with your specified model label in vllmruntime
   # vllmApiKey:
   #   secretName: "vllm-api-key"
   #   secretKey: "VLLM_API_KEY"
-
-  # Option 2: Direct value
-  # vllmApiKey:
-  #   value: "abc123"
   adapterSource:
-    type: "local" # (local, huggingface, s3) for now we only support local
+    type: "huggingface" # (local, huggingface)
     adapterName: "llama-3.1-nemoguard-8b-topic-control" # This will be the adapter ID
-    adapterPath: "/data/lora-adapters/llama-3.1-nemoguard-8b-topic-control" # This will be the path to the adapter in the persistent volume
+    repository: "nvidia/llama-3.1-nemoguard-8b-topic-control"
+    credentialsSecretRef:
+      name: "huggingface-credentials"
+      key: "hf_token"
   loraAdapterDeploymentConfig:
     algorithm: "default" # for now we only support default algorithm
     replicas: 1 # if not specified, by default algorithm, the lora adapter will be applied to all llama3-8b models, if specified, the lora adapter will only be applied to the specified number of replicas
diff --git a/operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml b/operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml
@@ -16,7 +16,7 @@ spec:
   serviceDiscovery: k8s
 
   # Label selector for vLLM runtime pods
-  k8sLabelSelector: "app=vllmruntime-sample"
+  k8sLabelSelector: "model=llama-3.1-8b-instruct"
 
   # Routing strategy (roundrobin or session)
   routingLogic: roundrobin
diff --git a/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml
@@ -4,12 +4,13 @@ metadata:
   labels:
     app.kubernetes.io/name: production-stack
     app.kubernetes.io/managed-by: kustomize
-  name: vllmruntime-sample
+    model: "llama-3.1-8b-instruct"
+  name: llama3
 spec:
   # Model configuration
   model:
-    modelURL: "meta-llama/Llama-3.1-8B"
-    enableLoRA: false
+    modelURL: "meta-llama/Llama-3.1-8B-Instruct"
+    enableLoRA: true
     enableTool: false
     toolCallParser: ""
     maxModelLen: 4096
@@ -60,7 +61,7 @@ spec:
       pullSecretName: ""
 
     # Number of replicas
-    replicas: 1
+    replicas: 2
 
     # Deployment strategy
     deploymentStrategy: "Recreate"
diff --git a/operator/internal/controller/loraadapter_controller.go b/operator/internal/controller/loraadapter_controller.go
@@ -63,6 +63,8 @@ type LoraAdapterReconciler struct {
 // +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=loraadapters/finalizers,verbs=update
 // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch
 // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch
+// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch;create;update;patch;delete
 
 // Reconcile is part of the main kubernetes reconciliation loop which aims to
 // move the current state of the cluster closer to the desired state.
diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go
@@ -194,8 +194,9 @@ func (r *VLLMRouterReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 
 // deploymentForVLLMRouter returns a VLLMRouter Deployment object
 func (r *VLLMRouterReconciler) deploymentForVLLMRouter(router *servingv1alpha1.VLLMRouter) *appsv1.Deployment {
-	labels := map[string]string{
-		"app": router.Name,
+	labels := map[string]string{"app": router.Name}
+	for k, v := range router.Labels {
+		labels[k] = v
 	}
 
 	// Add user-defined environment variables
diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go
@@ -50,6 +50,8 @@ type VLLMRuntimeReconciler struct {
 // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;create;update;patch;delete
 
 // Reconcile is part of the main kubernetes reconciliation loop which aims to
 // move the current state of the cluster closer to the desired state.
@@ -105,6 +107,40 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 		return ctrl.Result{Requeue: true}, nil
 	}
 
+	// Check if the pv already exists, if not create a new one
+	foundPV := &corev1.PersistentVolume{}
+	err = r.Get(ctx, types.NamespacedName{Name: "shared-pvc-storage", Namespace: vllmRuntime.Namespace}, foundPV)
+	if err != nil && errors.IsNotFound(err) {
+		// Define a new pv
+		pv := r.pvForVLLMRuntime(vllmRuntime)
+		log.Info("Creating a new PV", "PV.Namespace", pv.Namespace, "PV.Name", pv.Name)
+		err = r.Create(ctx, pv)
+		if err != nil {
+			log.Error(err, "Failed to create new PV", "PV.Namespace", pv.Namespace, "PV.Name", pv.Name)
+			return ctrl.Result{}, err
+		}
+	} else if err != nil {
+		log.Error(err, "Failed to get PV")
+		return ctrl.Result{}, err
+	}
+
+	// Check if the pvc already exists, if not create a new one
+	foundPVC := &corev1.PersistentVolumeClaim{}
+	err = r.Get(ctx, types.NamespacedName{Name: "shared-pvc-storage-claim", Namespace: vllmRuntime.Namespace}, foundPVC)
+	if err != nil && errors.IsNotFound(err) {
+		// Define a new pvc
+		pvc := r.pvcForVLLMRuntime(vllmRuntime)
+		log.Info("Creating a new PVC", "PVC.Namespace", pvc.Namespace, "PVC.Name", pvc.Name)
+		err = r.Create(ctx, pvc)
+		if err != nil {
+			log.Error(err, "Failed to create new PVC", "PVC.Namespace", pvc.Namespace, "PVC.Name", pvc.Name)
+			return ctrl.Result{}, err
+		}
+	} else if err != nil {
+		log.Error(err, "Failed to get PVC")
+		return ctrl.Result{}, err
+	}
+
 	// Check if the deployment already exists, if not create a new one
 	found := &appsv1.Deployment{}
 	err = r.Get(ctx, types.NamespacedName{Name: vllmRuntime.Name, Namespace: vllmRuntime.Namespace}, found)
@@ -148,10 +184,48 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
 	return ctrl.Result{}, nil
 }
 
+func (r *VLLMRuntimeReconciler) pvForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *corev1.PersistentVolume {
+	return &corev1.PersistentVolume{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "shared-pvc-storage",
+			Namespace: vllmRuntime.Namespace,
+			Labels:    map[string]string{"app": vllmRuntime.Name},
+		},
+		Spec: corev1.PersistentVolumeSpec{
+			AccessModes:      []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany},
+			StorageClassName: "",
+			Capacity:         corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")},
+			PersistentVolumeSource: corev1.PersistentVolumeSource{
+				HostPath: &corev1.HostPathVolumeSource{
+					Path: "/data/shared-pvc-storage",
+				},
+			},
+		},
+	}
+}
+
+func (r *VLLMRuntimeReconciler) pvcForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *corev1.PersistentVolumeClaim {
+	return &corev1.PersistentVolumeClaim{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "shared-pvc-storage-claim",
+			Namespace: vllmRuntime.Namespace,
+			Labels:    map[string]string{"app": vllmRuntime.Name},
+		},
+		Spec: corev1.PersistentVolumeClaimSpec{
+			AccessModes:      []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany},
+			StorageClassName: &[]string{""}[0],
+			Resources: corev1.VolumeResourceRequirements{
+				Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")},
+			},
+		},
+	}
+}
+
 // deploymentForVLLMRuntime returns a VLLMRuntime Deployment object
 func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *appsv1.Deployment {
-	labels := map[string]string{
-		"app": vllmRuntime.Name,
+	labels := map[string]string{"app": vllmRuntime.Name}
+	for k, v := range vllmRuntime.Labels {
+		labels[k] = v
 	}
 
 	// Define probes
@@ -178,7 +252,7 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 				Scheme: corev1.URISchemeHTTP,
 			},
 		},
-		InitialDelaySeconds: 240,
+		InitialDelaySeconds: 500,
 		PeriodSeconds:       10,
 		TimeoutSeconds:      3,
 		SuccessThreshold:    1,
@@ -260,6 +334,15 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 		})
 	}
 
+	if vllmRuntime.Spec.Model.EnableLoRA {
+		env = append(env,
+			corev1.EnvVar{
+				Name:  "VLLM_ALLOW_RUNTIME_LORA_UPDATING",
+				Value: "True",
+			},
+		)
+	}
+
 	// LM Cache configuration
 	if vllmRuntime.Spec.LMCacheConfig.Enabled {
 		env = append(env,
@@ -424,6 +507,22 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production
 							Resources:      resources,
 							ReadinessProbe: readinessProbe,
 							LivenessProbe:  livenessProbe,
+							VolumeMounts: []corev1.VolumeMount{
+								{
+									Name:      "shared-pvc-storage",
+									MountPath: "/data/shared-pvc-storage",
+								},
+							},
+						},
+					},
+					Volumes: []corev1.Volume{
+						{
+							Name: "shared-pvc-storage",
+							VolumeSource: corev1.VolumeSource{
+								PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
+									ClaimName: "shared-pvc-storage-claim",
+								},
+							},
 						},
 					},
 				},