@@ -19,6 +19,7 @@ package inferenceservice
19
19
import (
20
20
"context"
21
21
"fmt"
22
+ "reflect"
22
23
"time"
23
24
24
25
"github.com/kserve/kserve/pkg/apis/serving/v1alpha1"
@@ -3195,6 +3196,143 @@ var _ = Describe("v1beta1 inference service controller", func() {
3195
3196
verifyTensorParallelSizeDeployments (actualDefaultDeployment , actualWorkerDeployment , "3" , constants .NvidiaGPUResourceType )
3196
3197
})
3197
3198
})
3199
+ Context ("When creating an inference service with modelcar and raw deployment" , func () {
3200
+ It ("Should only have the ImagePullSecrets that are specified in the InferenceService" , func () {
3201
+ By ("Updating an InferenceService with a new ImagePullSecret and checking the deployment" )
3202
+ var configMap = & v1.ConfigMap {
3203
+ ObjectMeta : metav1.ObjectMeta {
3204
+ Name : constants .InferenceServiceConfigMapName ,
3205
+ Namespace : constants .KServeNamespace ,
3206
+ },
3207
+ Data : configs ,
3208
+ }
3209
+ Expect (k8sClient .Create (context .TODO (), configMap )).NotTo (HaveOccurred ())
3210
+ defer k8sClient .Delete (context .TODO (), configMap )
3211
+
3212
+ servingRuntime := & v1alpha1.ServingRuntime {
3213
+ ObjectMeta : metav1.ObjectMeta {
3214
+ Name : "vllm-runtime" ,
3215
+ Namespace : constants .KServeNamespace ,
3216
+ },
3217
+ Spec : v1alpha1.ServingRuntimeSpec {
3218
+ SupportedModelFormats : []v1alpha1.SupportedModelFormat {
3219
+ {
3220
+ AutoSelect : proto .Bool (true ),
3221
+ Name : "vLLM" ,
3222
+ },
3223
+ },
3224
+ ServingRuntimePodSpec : v1alpha1.ServingRuntimePodSpec {
3225
+ Containers : []v1.Container {
3226
+ {
3227
+ Name : constants .InferenceServiceContainerName ,
3228
+ Image : "kserve/vllm:latest" ,
3229
+ Command : []string {"bash" , "-c" },
3230
+ Args : []string {
3231
+ "python2 -m vllm --model_name=${MODEL_NAME} --model_dir=${MODEL} --tensor-parallel-size=${TENSOR_PARALLEL_SIZE} --pipeline-parallel-size=${PIPELINE_PARALLEL_SIZE}" ,
3232
+ },
3233
+ Resources : defaultResource ,
3234
+ },
3235
+ },
3236
+ },
3237
+ Disabled : proto .Bool (false ),
3238
+ },
3239
+ }
3240
+
3241
+ k8sClient .Create (context .TODO (), servingRuntime )
3242
+ defer k8sClient .Delete (context .TODO (), servingRuntime )
3243
+ serviceName := "modelcar-raw-deployment"
3244
+ var expectedRequest = reconcile.Request {NamespacedName : types.NamespacedName {Name : serviceName , Namespace : constants .KServeNamespace }}
3245
+ var serviceKey = expectedRequest .NamespacedName
3246
+ var storageUri = "oci://test/mnist/export"
3247
+ ctx := context .Background ()
3248
+ isvc := & v1beta1.InferenceService {
3249
+ ObjectMeta : metav1.ObjectMeta {
3250
+ Name : serviceKey .Name ,
3251
+ Namespace : serviceKey .Namespace ,
3252
+ Annotations : map [string ]string {
3253
+ "serving.kserve.io/deploymentMode" : "RawDeployment" ,
3254
+ "serving.kserve.io/autoscalerClass" : "hpa" ,
3255
+ "serving.kserve.io/metrics" : "cpu" ,
3256
+ "serving.kserve.io/targetUtilizationPercentage" : "75" ,
3257
+ },
3258
+ },
3259
+ Spec : v1beta1.InferenceServiceSpec {
3260
+ Predictor : v1beta1.PredictorSpec {
3261
+ ComponentExtensionSpec : v1beta1.ComponentExtensionSpec {
3262
+ MinReplicas : v1beta1 .GetIntReference (1 ),
3263
+ MaxReplicas : 2 ,
3264
+ },
3265
+ PodSpec : v1beta1.PodSpec {
3266
+ ImagePullSecrets : []v1.LocalObjectReference {
3267
+ {Name : "isvc-image-pull-secret" },
3268
+ },
3269
+ },
3270
+ Model : & v1beta1.ModelSpec {
3271
+ ModelFormat : v1beta1.ModelFormat {
3272
+ Name : "vLLM" ,
3273
+ },
3274
+ PredictorExtensionSpec : v1beta1.PredictorExtensionSpec {
3275
+ StorageURI : & storageUri ,
3276
+ RuntimeVersion : proto .String ("0.14.0" ),
3277
+ Container : v1.Container {
3278
+ Name : constants .InferenceServiceContainerName ,
3279
+ Resources : v1.ResourceRequirements {
3280
+ Limits : v1.ResourceList {
3281
+ constants .NvidiaGPUResourceType : resource .MustParse ("1" ),
3282
+ },
3283
+ Requests : v1.ResourceList {
3284
+ constants .NvidiaGPUResourceType : resource .MustParse ("1" ),
3285
+ },
3286
+ },
3287
+ },
3288
+ },
3289
+ },
3290
+ },
3291
+ },
3292
+ }
3293
+
3294
+ isvc .DefaultInferenceService (nil , nil , & v1beta1.SecurityConfig {AutoMountServiceAccountToken : false }, nil )
3295
+ Expect (k8sClient .Create (ctx , isvc )).Should (Succeed ())
3296
+ defer k8sClient .Delete (ctx , isvc )
3297
+
3298
+ inferenceService := & v1beta1.InferenceService {}
3299
+
3300
+ Eventually (func () bool {
3301
+ return k8sClient .Get (ctx , serviceKey , inferenceService ) == nil
3302
+ }, timeout , interval ).Should (BeTrue ())
3303
+
3304
+ actualDeployment := & appsv1.Deployment {}
3305
+ predictorDeploymentKey := types.NamespacedName {Name : constants .PredictorServiceName (serviceKey .Name ),
3306
+ Namespace : serviceKey .Namespace }
3307
+ Eventually (func () error { return k8sClient .Get (context .TODO (), predictorDeploymentKey , actualDeployment ) }, timeout , interval ).
3308
+ Should (Succeed ())
3309
+
3310
+ Expect (actualDeployment .Spec .Template .Spec .ImagePullSecrets ).To (HaveLen (1 ))
3311
+ Expect (actualDeployment .Spec .Template .Spec .ImagePullSecrets [0 ].Name ).To (Equal ("isvc-image-pull-secret" ))
3312
+
3313
+ Expect (k8sClient .Get (ctx , serviceKey , inferenceService )).Should (Succeed ())
3314
+ updateForInferenceService := inferenceService .DeepCopy ()
3315
+ updateForInferenceService .Spec .Predictor .PodSpec .ImagePullSecrets = []v1.LocalObjectReference {
3316
+ {Name : "new-image-pull-secret" },
3317
+ }
3318
+ expectedImagePullSecrets := updateForInferenceService .Spec .Predictor .PodSpec .ImagePullSecrets
3319
+ Eventually (func () error {
3320
+ return k8sClient .Update (ctx , updateForInferenceService )
3321
+ }, timeout , interval ).Should (Succeed ())
3322
+
3323
+ updatedDeployment := & appsv1.Deployment {}
3324
+ Eventually (func () (bool , error ) {
3325
+ if err := k8sClient .Get (ctx , predictorDeploymentKey , updatedDeployment ); err != nil {
3326
+ return false , err
3327
+ }
3328
+ if len (updatedDeployment .Spec .Template .Spec .ImagePullSecrets ) != 1 {
3329
+ return false , nil
3330
+ }
3331
+ return reflect .DeepEqual (updatedDeployment .Spec .Template .Spec .ImagePullSecrets , expectedImagePullSecrets ), nil
3332
+ }, timeout , interval ).Should (BeTrue ())
3333
+
3334
+ })
3335
+ })
3198
3336
})
3199
3337
3200
3338
func verifyPipelineParallelSizeDeployments (actualDefaultDeployment * appsv1.Deployment , actualWorkerDeployment * appsv1.Deployment , pipelineParallelSize string , replicas * int32 ) {
0 commit comments