Skip to content

Commit a291ccd

Browse files
szaheropenshift-merge-bot[bot]
authored andcommitted
Cleanup Judge Model Details
Judge model details are provided via k8s secret so we don't need all environment variables for the judge model. we only need the `JUDGE_SERVING_MODEL_SECRET` which contains all details. Signed-off-by: Saad <[email protected]>
1 parent 4a4bb90 commit a291ccd

File tree

1 file changed

+36
-53
lines changed

1 file changed

+36
-53
lines changed

tests/odh/ilab_dw_rhoai_test.go

Lines changed: 36 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ import (
3030
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3131
)
3232

33+
// setting some defaults in case not provided.
34+
const (
35+
ILAB_RHELAI_WORKBENCH_IMAGE = "quay.io/opendatahub/workbench-images:jupyter-datascience-ubi9-python-3.11-20241004-609ffb8"
36+
ILAB_RHELAI_STORAGE_CLASS = "nfs-csi"
37+
)
38+
3339
func TestInstructlabTrainingOnRhoai(t *testing.T) {
3440
instructlabDistributedTrainingOnRhoai(t, 1)
3541
}
@@ -41,8 +47,9 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
4147

4248
rhelaiWorkbenchImage, rhelaiWorkbenchImageExists := GetRhelaiWorkbenchImage()
4349
if !rhelaiWorkbenchImageExists {
44-
test.T().Skip("Rhelai workbench image is not provided as environment variable..")
45-
rhelaiWorkbenchImage = "quay.io/opendatahub/workbench-images:jupyter-datascience-ubi9-python-3.11-20241004-609ffb8"
50+
rhelaiWorkbenchImage = ILAB_RHELAI_WORKBENCH_IMAGE
51+
52+
test.T().Logf("RHELAI workbench image is not provided as environment variable. Using workbench image: %s", ILAB_RHELAI_WORKBENCH_IMAGE)
4653
}
4754

4855
// Get S3 bucket credentials using environment variables
@@ -55,47 +62,41 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
5562
s3BucketVerifyTls, _ := GetStorageBucketVerifyTls()
5663

5764
if !s3BucketNameExists {
58-
test.T().Skip("Please provide storage bucket name to download SDG data from..")
65+
test.T().Skip("AWS_STORAGE_BUCKET Bucket name is required.")
5966
}
6067
if !s3BucketDataKeyExists {
61-
test.T().Skip("Please provide storage bucket data-key(Name or path of tar archive) to download SDG+model+taxonomy data from..")
68+
test.T().Skip("SDG_OBJECT_STORE_DATA_KEY is required to download required data to start training.")
6269
}
6370

64-
// Get Judge model server credentials using environment variables
65-
judgeServingApiKey, judgeServingApiKeyExists := GetJudgeServingApiKey()
66-
judgeServingModelName, judgeServingModelNameExists := GetJudgeServingModelName()
67-
judgeServingModelEndpoint, judgeServingModelEndpointExists := GetJudgeServingModelEndpoint()
68-
judgeServingCAConfigMapName, judgeServingCAConfigMapNameExists := GetJudeServingCACertConfigMapName()
69-
judgeServingCAConfigMapKey, judgeServingCAConfigMapKeyExists := GetJudeServingCACertCMKeyName()
71+
// judge model details like endpoint, api-key, model-name, ca certs, ...etc should be provided via k8s secret
72+
// we need the secret name so the standalone.py script can fetch the details from that secret.
73+
judgeServingModelSecret, judgeServingModelSecretExists := GetJudeServingModelSecret()
74+
ilabStorageClassName, ilabStorageClassNameExists := GetStorageClassName()
7075

71-
if !judgeServingApiKeyExists {
72-
test.T().Skip("Please provide judge serving api key..")
73-
}
74-
if !judgeServingModelNameExists {
75-
test.T().Skip("Please provide judge serving model name..")
76-
}
77-
if !judgeServingModelEndpointExists {
78-
test.T().Skip("Please provide judge serving model endpoint..")
76+
if !judgeServingModelSecretExists {
77+
test.T().Skip("JUDGE_SERVING_MODEL_SECRET judge model details secret is not provided. ")
7978
}
8079

81-
if !judgeServingCAConfigMapNameExists {
82-
test.T().Logf("ConfigMap contain CA for the judge model is not provided")
83-
}
80+
if !ilabStorageClassNameExists {
81+
ilabStorageClassName = ILAB_RHELAI_STORAGE_CLASS
8482

85-
if !judgeServingCAConfigMapKeyExists {
86-
test.T().Logf("ConfigMap Key containing CA for the judge model is not provided")
83+
test.T().Logf("Storage class is not provided. Using default %s", ilabStorageClassName)
8784
}
8885

8986
// Create a namespace
9087
test_namespace, test_namespace_exists := GetTestNamespace()
9188
var namespace *corev1.Namespace
89+
9290
if !test_namespace_exists {
9391
namespace = test.NewTestNamespace()
9492
} else {
9593
_, namespace_exists_err := test.Client().Core().CoreV1().Namespaces().Get(test.Ctx(), test_namespace, metav1.GetOptions{})
94+
9695
if namespace_exists_err != nil {
97-
test.T().Logf("The namespace provided using environment variable doesn't exists..")
96+
97+
test.T().Logf("%s namespace doesn't exists. Creating ...", test_namespace)
9898
namespace = CreateTestNamespaceWithName(test, test_namespace)
99+
99100
} else {
100101
namespace = GetNamespaceWithName(test, test_namespace)
101102
test.T().Logf("Using the namespace name which is provided using environment variable..")
@@ -343,8 +344,8 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
343344
},
344345
},
345346
{
346-
Name: "JUDGE_SERVING_MODEL_API_KEY",
347-
Value: judgeServingApiKey,
347+
Name: "JUDGE_SERVING_MODEL_SECRET",
348+
Value: judgeServingModelSecret,
348349
},
349350
},
350351
VolumeMounts: []corev1.VolumeMount{
@@ -357,17 +358,13 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
357358
Command: []string{
358359
"python3", "/home/standalone.py", "run",
359360
"--namespace", namespace.Name,
360-
"--judge-serving-model-endpoint", judgeServingModelEndpoint,
361-
"--judge-serving-model-name", judgeServingModelName,
362-
"--judge-serving-model-ca-cert", judgeServingCAConfigMapName,
363-
"--judge-serving-model-ca-cert-cm-key", judgeServingCAConfigMapKey,
364-
"--judge-serving-model-secret", "judge-serving-details",
361+
"--judge-serving-model-secret", judgeServingModelSecret,
365362
"--nproc-per-node", strconv.Itoa(numGpus),
366-
"--storage-class", "nfs-csi",
363+
"--storage-class", ilabStorageClassName,
367364
"--sdg-object-store-secret", createdSecret.Name,
368365
// "--training-1-epoch-num", strconv.Itoa(1),
369366
// "--training-2-epoch-num", strconv.Itoa(1),
370-
"--force-pull", "--eval-type", "mt-bench", "evaluation",
367+
"--force-pull",
371368
},
372369
},
373370
},
@@ -415,29 +412,15 @@ func GetStorageBucketVerifyTls() (string, bool) {
415412
return data_key, exists
416413
}
417414

418-
func GetJudgeServingModelEndpoint() (string, bool) {
419-
data_key, exists := os.LookupEnv("JUDGE_ENDPOINT")
420-
return data_key, exists
421-
}
422-
423-
func GetJudgeServingModelName() (string, bool) {
424-
data_key, exists := os.LookupEnv("JUDGE_NAME")
425-
return data_key, exists
426-
}
427-
428-
func GetJudgeServingApiKey() (string, bool) {
429-
data_key, exists := os.LookupEnv("JUDGE_SERVING_MODEL_API_KEY")
430-
return data_key, exists
431-
}
432-
433-
func GetJudeServingCACertConfigMapName() (string, bool) {
434-
data_key, exists := os.LookupEnv("JUDGE_SERVING_CA_CONFIGMAP_NAME")
415+
// GetJudeServingModelSecret secret containing the details of the judge model
416+
func GetJudeServingModelSecret() (string, bool) {
417+
data_key, exists := os.LookupEnv("JUDGE_SERVING_MODEL_SECRET")
435418
return data_key, exists
436419
}
437420

438-
// GetJudeServingCACertCMKeyName the key name of the ca bundle inside the configmap
439-
func GetJudeServingCACertCMKeyName() (string, bool) {
440-
data_key, exists := os.LookupEnv("JUDGE_SERVING_CA_CONFIGMAP_KEY")
421+
// GetStorageClassName name of the storage class to use for testing, default is nfs-csi
422+
func GetStorageClassName() (string, bool) {
423+
data_key, exists := os.LookupEnv("TEST_ILAB_STORAGE_CLASS_NAME")
441424
return data_key, exists
442425
}
443426

0 commit comments

Comments
 (0)