@@ -30,6 +30,12 @@ import (
3030 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3131)
3232
33+ // setting some defaults in case not provided.
34+ const (
35+ ILAB_RHELAI_WORKBENCH_IMAGE = "quay.io/opendatahub/workbench-images:jupyter-datascience-ubi9-python-3.11-20241004-609ffb8"
36+ ILAB_RHELAI_STORAGE_CLASS = "nfs-csi"
37+ )
38+
3339func TestInstructlabTrainingOnRhoai (t * testing.T ) {
3440 instructlabDistributedTrainingOnRhoai (t , 1 )
3541}
@@ -41,8 +47,9 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
4147
4248 rhelaiWorkbenchImage , rhelaiWorkbenchImageExists := GetRhelaiWorkbenchImage ()
4349 if ! rhelaiWorkbenchImageExists {
44- test .T ().Skip ("Rhelai workbench image is not provided as environment variable.." )
45- rhelaiWorkbenchImage = "quay.io/opendatahub/workbench-images:jupyter-datascience-ubi9-python-3.11-20241004-609ffb8"
50+ rhelaiWorkbenchImage = ILAB_RHELAI_WORKBENCH_IMAGE
51+
52+ test .T ().Logf ("RHELAI workbench image is not provided as environment variable. Using workbench image: %s" , ILAB_RHELAI_WORKBENCH_IMAGE )
4653 }
4754
4855 // Get S3 bucket credentials using environment variables
@@ -55,47 +62,41 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
5562 s3BucketVerifyTls , _ := GetStorageBucketVerifyTls ()
5663
5764 if ! s3BucketNameExists {
58- test .T ().Skip ("Please provide storage bucket name to download SDG data from. ." )
65+ test .T ().Skip ("AWS_STORAGE_BUCKET Bucket name is required ." )
5966 }
6067 if ! s3BucketDataKeyExists {
61- test .T ().Skip ("Please provide storage bucket data-key(Name or path of tar archive) to download SDG+model+taxonomy data from. ." )
68+ test .T ().Skip ("SDG_OBJECT_STORE_DATA_KEY is required to download required data to start training ." )
6269 }
6370
64- // Get Judge model server credentials using environment variables
65- judgeServingApiKey , judgeServingApiKeyExists := GetJudgeServingApiKey ()
66- judgeServingModelName , judgeServingModelNameExists := GetJudgeServingModelName ()
67- judgeServingModelEndpoint , judgeServingModelEndpointExists := GetJudgeServingModelEndpoint ()
68- judgeServingCAConfigMapName , judgeServingCAConfigMapNameExists := GetJudeServingCACertConfigMapName ()
69- judgeServingCAConfigMapKey , judgeServingCAConfigMapKeyExists := GetJudeServingCACertCMKeyName ()
71+ // judge model details like endpoint, api-key, model-name, ca certs, ...etc should be provided via k8s secret
72+ // we need the secret name so the standalone.py script can fetch the details from that secret.
73+ judgeServingModelSecret , judgeServingModelSecretExists := GetJudeServingModelSecret ()
74+ ilabStorageClassName , ilabStorageClassNameExists := GetStorageClassName ()
7075
71- if ! judgeServingApiKeyExists {
72- test .T ().Skip ("Please provide judge serving api key.." )
73- }
74- if ! judgeServingModelNameExists {
75- test .T ().Skip ("Please provide judge serving model name.." )
76- }
77- if ! judgeServingModelEndpointExists {
78- test .T ().Skip ("Please provide judge serving model endpoint.." )
76+ if ! judgeServingModelSecretExists {
77+ test .T ().Skip ("JUDGE_SERVING_MODEL_SECRET judge model details secret is not provided. " )
7978 }
8079
81- if ! judgeServingCAConfigMapNameExists {
82- test .T ().Logf ("ConfigMap contain CA for the judge model is not provided" )
83- }
80+ if ! ilabStorageClassNameExists {
81+ ilabStorageClassName = ILAB_RHELAI_STORAGE_CLASS
8482
85- if ! judgeServingCAConfigMapKeyExists {
86- test .T ().Logf ("ConfigMap Key containing CA for the judge model is not provided" )
83+ test .T ().Logf ("Storage class is not provided. Using default %s" , ilabStorageClassName )
8784 }
8885
8986 // Create a namespace
9087 test_namespace , test_namespace_exists := GetTestNamespace ()
9188 var namespace * corev1.Namespace
89+
9290 if ! test_namespace_exists {
9391 namespace = test .NewTestNamespace ()
9492 } else {
9593 _ , namespace_exists_err := test .Client ().Core ().CoreV1 ().Namespaces ().Get (test .Ctx (), test_namespace , metav1.GetOptions {})
94+
9695 if namespace_exists_err != nil {
97- test .T ().Logf ("The namespace provided using environment variable doesn't exists.." )
96+
97+ test .T ().Logf ("%s namespace doesn't exists. Creating ..." , test_namespace )
9898 namespace = CreateTestNamespaceWithName (test , test_namespace )
99+
99100 } else {
100101 namespace = GetNamespaceWithName (test , test_namespace )
101102 test .T ().Logf ("Using the namespace name which is provided using environment variable.." )
@@ -343,8 +344,8 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
343344 },
344345 },
345346 {
346- Name : "JUDGE_SERVING_MODEL_API_KEY " ,
347- Value : judgeServingApiKey ,
347+ Name : "JUDGE_SERVING_MODEL_SECRET " ,
348+ Value : judgeServingModelSecret ,
348349 },
349350 },
350351 VolumeMounts : []corev1.VolumeMount {
@@ -357,17 +358,13 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
357358 Command : []string {
358359 "python3" , "/home/standalone.py" , "run" ,
359360 "--namespace" , namespace .Name ,
360- "--judge-serving-model-endpoint" , judgeServingModelEndpoint ,
361- "--judge-serving-model-name" , judgeServingModelName ,
362- "--judge-serving-model-ca-cert" , judgeServingCAConfigMapName ,
363- "--judge-serving-model-ca-cert-cm-key" , judgeServingCAConfigMapKey ,
364- "--judge-serving-model-secret" , "judge-serving-details" ,
361+ "--judge-serving-model-secret" , judgeServingModelSecret ,
365362 "--nproc-per-node" , strconv .Itoa (numGpus ),
366- "--storage-class" , "nfs-csi" ,
363+ "--storage-class" , ilabStorageClassName ,
367364 "--sdg-object-store-secret" , createdSecret .Name ,
368365 // "--training-1-epoch-num", strconv.Itoa(1),
369366 // "--training-2-epoch-num", strconv.Itoa(1),
370- "--force-pull" , "--eval-type" , "mt-bench" , "evaluation" ,
367+ "--force-pull" ,
371368 },
372369 },
373370 },
@@ -415,29 +412,15 @@ func GetStorageBucketVerifyTls() (string, bool) {
415412 return data_key , exists
416413}
417414
418- func GetJudgeServingModelEndpoint () (string , bool ) {
419- data_key , exists := os .LookupEnv ("JUDGE_ENDPOINT" )
420- return data_key , exists
421- }
422-
423- func GetJudgeServingModelName () (string , bool ) {
424- data_key , exists := os .LookupEnv ("JUDGE_NAME" )
425- return data_key , exists
426- }
427-
428- func GetJudgeServingApiKey () (string , bool ) {
429- data_key , exists := os .LookupEnv ("JUDGE_SERVING_MODEL_API_KEY" )
430- return data_key , exists
431- }
432-
433- func GetJudeServingCACertConfigMapName () (string , bool ) {
434- data_key , exists := os .LookupEnv ("JUDGE_SERVING_CA_CONFIGMAP_NAME" )
415+ // GetJudeServingModelSecret secret containing the details of the judge model
416+ func GetJudeServingModelSecret () (string , bool ) {
417+ data_key , exists := os .LookupEnv ("JUDGE_SERVING_MODEL_SECRET" )
435418 return data_key , exists
436419}
437420
438- // GetJudeServingCACertCMKeyName the key name of the ca bundle inside the configmap
439- func GetJudeServingCACertCMKeyName () (string , bool ) {
440- data_key , exists := os .LookupEnv ("JUDGE_SERVING_CA_CONFIGMAP_KEY " )
421+ // GetStorageClassName name of the storage class to use for testing, default is nfs-csi
422+ func GetStorageClassName () (string , bool ) {
423+ data_key , exists := os .LookupEnv ("TEST_ILAB_STORAGE_CLASS_NAME " )
441424 return data_key , exists
442425}
443426
0 commit comments