@@ -119,7 +119,7 @@ func kftoSftLlm(t *testing.T, image string, gpu Accelerator, modelName string) {
119
119
"eval_strategy: epoch" : "eval_strategy: 'no'" ,
120
120
"logging_steps: 1" : "logging_steps: 10" ,
121
121
"output_dir: /mnt/shared/Meta-Llama-3.1-8B-Instruct" : fmt .Sprintf ("output_dir: /mnt/shared/%s" , modelName ),
122
- "api_server = \\ \" <API_SERVER> \\ \" " : fmt . Sprintf ( "api_server = \\ \" %s \\ \" " , GetOpenShiftApiUrl ( test )) ,
122
+ "%pip install -U kubeflow-training " : "#%pip install -U kubeflow-training " ,
123
123
"token = \\ \" <TOKEN>\\ \" " : fmt .Sprintf ("token = \\ \" %s\\ \" " , userToken ),
124
124
"#configuration.verify_ssl = False" : "configuration.verify_ssl = False" ,
125
125
"name=\\ \" sft\\ \" " : fmt .Sprintf ("name=\\ \" sft-%s\\ \" " , namespace .Name ),
@@ -128,7 +128,7 @@ func kftoSftLlm(t *testing.T, image string, gpu Accelerator, modelName string) {
128
128
"base_image=\\ \" quay.io/modh/training:py311-cuda124-torch251\\ \" " : fmt .Sprintf ("base_image=\\ \" %s\\ \" " , image ),
129
129
"\" HF_TOKEN\\ \" : \\ \" \\ \" " : fmt .Sprintf ("\" HF_TOKEN\\ \" : \\ \" %s\\ \" " , hfToken ),
130
130
"claim_name=\\ \" shared\\ \" " : fmt .Sprintf ("claim_name=\\ \" %s\\ \" " , notebookPVC .Name ),
131
- "\" client.get_job_logs(\\ n\" ," : "\" client.wait_for_job_conditions(\\ n\" ," ,
131
+ "\" _ = client.get_job_logs(\\ n\" ," : "\" client.wait_for_job_conditions(\\ n\" ," ,
132
132
"\" follow=True,\\ n\" ," : "\" wait_timeout=1800,\\ n\" ,\n \t \" polling_interval=60,\\ n\" ," ,
133
133
"os.environ[\\ \" TENSORBOARD_PROXY_URL\\ \" ]" : "#os.environ[\\ \" TENSORBOARD_PROXY_URL\\ \" ]" ,
134
134
"%load_ext tensorboard" : "#%load_ext tensorboard" ,
0 commit comments