minor fix

yuanjingx87 · yuanjingx87 · commit bf93595ec19f · 2025-12-31T15:30:04.000-08:00
Signed-off-by: Yuanjing Xue &lt;197832395+yuanjingx87@users.noreply.github.com&gt;
diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy
@@ -933,15 +933,15 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
             def scriptBashUtilsPathNode = "${jobWorkspace}/${jobUID}-bash_utils.sh"
             def testListPathNode = "${jobWorkspace}/${testList}.txt"
             def waivesListPathNode = "${jobWorkspace}/waives.txt"
-            def outputPath = "${jobWorkspace}/job-output.log"
+            def sbatchLogPath = "${jobWorkspace}/job-output.log"
             def scriptLaunchPathLocal = Utils.createTempLocation(pipeline, "./slurm_launch.sh")
-            def scriptLaunchPathNode = "${jobWorkspace}/slurm_launch.sh"
+            def scriptLaunchPathNode = "${jobWorkspace}/${jobUID}-slurm_launch.sh"
             def scriptSubmitPathLocal = Utils.createTempLocation(pipeline, "./slurm_submit.sh")
-            def scriptSubmitPathNode = "${jobWorkspace}/slurm_submit.sh"
+            def scriptSubmitPathNode = "${jobWorkspace}/${jobUID}-slurm_submit.sh"
             def scriptTrackPathLocal = Utils.createTempLocation(pipeline, "./slurm_track.sh")
-            def scriptTrackPathNode = "${jobWorkspace}/slurm_track.sh"
+            def scriptTrackPathNode = "${jobWorkspace}/${jobUID}-slurm_track.sh"
             def scriptStatusPathLocal = Utils.createTempLocation(pipeline, "./slurm_status.sh")
-            def scriptStatusPathNode = "${jobWorkspace}/slurm_status.sh"
+            def scriptStatusPathNode = "${jobWorkspace}/${jobUID}-slurm_status.sh"
             def isAarch64 = config.contains("aarch64")
             def coverageConfigFile = "${jobWorkspace}/.coveragerc"
             def perfCheckScriptLocal = "${llmSrcLocal}/tests/integration/defs/perf/perf_regression_check.py"
@@ -1150,8 +1150,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
                     "export ${varName}=\"${escapedValue}\""
                 }.join('\n')
 
-                // Save job ID in $jobWorkspace/slurm_job_id.txt for later job to retrieve
-                def scriptLaunchPrefix = """#!/bin/bash
+                def scriptContent = """#!/bin/bash
                     #SBATCH ${exemptionComment}
                     #SBATCH --output=${outputPath}
                     ${taskArgs.collect { "#SBATCH $it" }.join('\n')}
@@ -1247,9 +1246,9 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
                     rm -rf "${jobWorkspace}/results.xml"
                     rm -rf "${jobWorkspace}/report.csv"
                     rm -rf "${jobWorkspace}/unfinished_test.txt"
-                    rm -rf "${outputPath}"
+                    rm -rf "${sbatchLogPath}"
 
-                    touch "${outputPath}"
+                    touch ${sbatchLogPath}
                     jobId=\$(sbatch ${scriptLaunchPathNode} | awk '{print \$4}')
                     if [ -z "\$jobId" ]; then
                         echo "Error: Slurm job submission failed, no job ID returned."
@@ -1281,14 +1280,13 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
                 )
                 def scriptTrack = """#!/bin/bash
                     jobId=\$(cat $jobWorkspace/slurm_job_id.txt)
-                    tail -f $outputPath &
+                    tail -f ${sbatchLogPath} &
                     tailPid=\$!
                     # Wait until sbatch job is done.
                     while true; do
-                        state=\$(sacct -j \$jobId --format=JobIDRaw,State --noheader | \
-                            awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}')
-                        if [[ -z "\$state" || "\$state" == "RUNNING" || \
-                            "\$state" == "PENDING"]]; then
+                        state=\$(sacct -j \$jobId --format=JobIDRaw,State --noheader | awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}')
+                        if [[ -z \$state || \$state == "RUNNING" || \$state == "PENDING" ]]; then
+                            echo "job is still running"
                             sleep 300
                         else
                             echo "Job \$jobId finished with state: \$state"
@@ -1340,8 +1338,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
                 )
                 def scriptStatus = """#!/bin/bash
                     jobId=\$(cat $jobWorkspace/slurm_job_id.txt)
-                    sacct -j \$jobId --format=JobIDRaw,State --noheader |\
-                        awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}'
+                    sacct -j \$jobId --format=JobIDRaw,State --noheader | awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}'
                 """
                 pipeline.writeFile(file: scriptStatusPathLocal, text: scriptStatus)
                 Utils.copyFileToRemoteHost(
@@ -1378,13 +1375,10 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
                             remote,
                             scriptStatusPathNode
                         )
-                    )
-                    println(result)
-                    if (result == "") {
-                        echo "Job is done."
-                        break
-                    } else {
+                    ).trim()
+                    if (!result || result == "RUNNING" || result == "PENDING") {
                         echo "Job is still running, pulling the job log."
+                        // Pulling the sbatch output log
                         Utils.exec(
                             pipeline,
                             timeout: false,
@@ -1393,6 +1387,9 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
                                 scriptTrackPathNode
                             )
                         )
+                    } else {
+                        echo "Job is done."
+                        break
                     }
                 }
             }