@@ -17,6 +17,7 @@ limitations under the License.
17
17
package odh
18
18
19
19
import (
20
+ "fmt"
20
21
"os"
21
22
"os/exec"
22
23
"strconv"
@@ -68,15 +69,7 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
68
69
test .T ().Skip ("SDG_OBJECT_STORE_DATA_KEY is required to download required data to start training." )
69
70
}
70
71
71
- // judge model details like endpoint, api-key, model-name, ca certs, ...etc should be provided via k8s secret
72
- // we need the secret name so the standalone.py script can fetch the details from that secret.
73
- judgeServingModelSecret , judgeServingModelSecretExists := GetJudeServingModelSecret ()
74
72
ilabStorageClassName , ilabStorageClassNameExists := GetStorageClassName ()
75
-
76
- if ! judgeServingModelSecretExists {
77
- test .T ().Skip ("JUDGE_SERVING_MODEL_SECRET judge model details secret is not provided. " )
78
- }
79
-
80
73
if ! ilabStorageClassNameExists {
81
74
ilabStorageClassName = ILAB_RHELAI_STORAGE_CLASS
82
75
@@ -244,6 +237,8 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
244
237
test .Expect (err ).ToNot (HaveOccurred ())
245
238
test .T ().Logf ("Secret '%s' created successfully\n " , createdSecret .Name )
246
239
240
+ judgeServingModelSecret := CreateJudgeServingModelSecret (test , namespace .Name )
241
+
247
242
// Create pod resource using workbench image to run standalone script
248
243
pod := & corev1.Pod {
249
244
ObjectMeta : metav1.ObjectMeta {
@@ -345,7 +340,7 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
345
340
},
346
341
{
347
342
Name : "JUDGE_SERVING_MODEL_SECRET" ,
348
- Value : judgeServingModelSecret ,
343
+ Value : judgeServingModelSecret . Name ,
349
344
},
350
345
},
351
346
VolumeMounts : []corev1.VolumeMount {
@@ -358,7 +353,7 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
358
353
Command : []string {
359
354
"python3" , "/home/standalone.py" , "run" ,
360
355
"--namespace" , namespace .Name ,
361
- "--judge-serving-model-secret" , judgeServingModelSecret ,
356
+ "--judge-serving-model-secret" , judgeServingModelSecret . Name ,
362
357
"--nproc-per-node" , strconv .Itoa (numGpus ),
363
358
"--storage-class" , ilabStorageClassName ,
364
359
"--sdg-object-store-secret" , createdSecret .Name ,
@@ -397,6 +392,43 @@ func instructlabDistributedTrainingOnRhoai(t *testing.T, numGpus int) {
397
392
test .Expect (err ).ToNot (HaveOccurred ())
398
393
}
399
394
395
+ func CreateJudgeServingModelSecret (test Test , namespace string ) * corev1.Secret {
396
+ // judge model details like endpoint, api-key, model-name, ca certs, ...etc should be provided via k8s secret
397
+ // we need the secret name so the standalone.py script can fetch the details from that secret.
398
+ // Get Judge model server credentials using environment variables
399
+ judgeServingModelApiKeyEnvVar := "JUDGE_API_KEY"
400
+ judgeServingModelNameEnvVar := "JUDGE_NAME"
401
+ judgeServingModelEndpointEnvVar := "JUDGE_ENDPOINT"
402
+ judgeServingCaCertEnvVar := "JUDGE_CA_CERT"
403
+ judgeServingCaCertCmKeyEnvVar := "JUDGE_CA_CERT_CM_KEY"
404
+ judgeServingCaCertFromOpenShiftEnvVar := "JUDGE_CA_CERT_FROM_OPENSHIFT"
405
+ judgeServingModelApiKey , judgeServingModelApiKeyExists := os .LookupEnv (judgeServingModelApiKeyEnvVar )
406
+ judgeServingModelName , judgeServingModelNameExists := os .LookupEnv (judgeServingModelNameEnvVar )
407
+ judgeServingModelEndpoint , judgeServingModelEndpointExists := os .LookupEnv (judgeServingModelEndpointEnvVar )
408
+ judgeServingCaCertFromOpenShift , judgeServingCaCertFromOpenShiftExists := os .LookupEnv (judgeServingCaCertFromOpenShiftEnvVar )
409
+
410
+ test .Expect (judgeServingModelApiKeyExists ).To (BeTrue (), fmt .Sprintf ("please provide judge serving model api key using env variable %s" , judgeServingModelApiKeyEnvVar ))
411
+ test .Expect (judgeServingModelNameExists ).To (BeTrue (), fmt .Sprintf ("please provide judge serving model name using env variable %s" , judgeServingModelNameEnvVar ))
412
+ test .Expect (judgeServingModelEndpointExists ).To (BeTrue (), fmt .Sprintf ("please provide judge serving model endpoint using env variable %s" , judgeServingModelEndpointEnvVar ))
413
+
414
+ judgeServingDetails := map [string ]string {
415
+ judgeServingModelApiKeyEnvVar : judgeServingModelApiKey ,
416
+ judgeServingModelEndpointEnvVar : judgeServingModelEndpoint ,
417
+ judgeServingModelNameEnvVar : judgeServingModelName ,
418
+ }
419
+
420
+ if judgeServingCaCertFromOpenShiftExists && judgeServingCaCertFromOpenShift == "true" {
421
+ test .T ().Logf ("Using OpenShift CA as Judge CA certificate" )
422
+ judgeServingDetails [judgeServingCaCertEnvVar ] = "kube-root-ca.crt"
423
+ judgeServingDetails [judgeServingCaCertCmKeyEnvVar ] = "ca.crt"
424
+ } else {
425
+ test .T ().Logf ("Env variable '%s' not defined or not set to `true`, Judge CA certificate ConfigMap is not provided" , judgeServingCaCertFromOpenShiftEnvVar )
426
+ }
427
+
428
+ judgeServingModelSecret := CreateSecret (test , namespace , judgeServingDetails )
429
+ return judgeServingModelSecret
430
+ }
431
+
400
432
func GetRhelaiWorkbenchImage () (string , bool ) {
401
433
data_key , exists := os .LookupEnv ("RHELAI_WORKBENCH_IMAGE" )
402
434
return data_key , exists
@@ -412,12 +444,6 @@ func GetStorageBucketVerifyTls() (string, bool) {
412
444
return data_key , exists
413
445
}
414
446
415
- // GetJudeServingModelSecret secret containing the details of the judge model
416
- func GetJudeServingModelSecret () (string , bool ) {
417
- data_key , exists := os .LookupEnv ("JUDGE_SERVING_MODEL_SECRET" )
418
- return data_key , exists
419
- }
420
-
421
447
// GetStorageClassName name of the storage class to use for testing, default is nfs-csi
422
448
func GetStorageClassName () (string , bool ) {
423
449
data_key , exists := os .LookupEnv ("TEST_ILAB_STORAGE_CLASS_NAME" )
0 commit comments