Skip to content

Commit e566439

Browse files
committed
minor fix
Signed-off-by: Yuanjing Xue <197832395+yuanjingx87@users.noreply.github.com>
1 parent 122e46e commit e566439

File tree

1 file changed

+19
-22
lines changed

1 file changed

+19
-22
lines changed

jenkins/L0_Test.groovy

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -933,15 +933,15 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
933933
def scriptBashUtilsPathNode = "${jobWorkspace}/${jobUID}-bash_utils.sh"
934934
def testListPathNode = "${jobWorkspace}/${testList}.txt"
935935
def waivesListPathNode = "${jobWorkspace}/waives.txt"
936-
def outputPath = "${jobWorkspace}/job-output.log"
936+
def sbatchLogPath = "${jobWorkspace}/job-output.log"
937937
def scriptLaunchPathLocal = Utils.createTempLocation(pipeline, "./slurm_launch.sh")
938-
def scriptLaunchPathNode = "${jobWorkspace}/slurm_launch.sh"
938+
def scriptLaunchPathNode = "${jobWorkspace}/${jobUID}-slurm_launch.sh"
939939
def scriptSubmitPathLocal = Utils.createTempLocation(pipeline, "./slurm_submit.sh")
940-
def scriptSubmitPathNode = "${jobWorkspace}/slurm_submit.sh"
940+
def scriptSubmitPathNode = "${jobWorkspace}/${jobUID}-slurm_submit.sh"
941941
def scriptTrackPathLocal = Utils.createTempLocation(pipeline, "./slurm_track.sh")
942-
def scriptTrackPathNode = "${jobWorkspace}/slurm_track.sh"
942+
def scriptTrackPathNode = "${jobWorkspace}/${jobUID}-slurm_track.sh"
943943
def scriptStatusPathLocal = Utils.createTempLocation(pipeline, "./slurm_status.sh")
944-
def scriptStatusPathNode = "${jobWorkspace}/slurm_status.sh"
944+
def scriptStatusPathNode = "${jobWorkspace}/${jobUID}-slurm_status.sh"
945945
def isAarch64 = config.contains("aarch64")
946946
def coverageConfigFile = "${jobWorkspace}/.coveragerc"
947947

@@ -1138,8 +1138,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
11381138
"export ${varName}=\"${escapedValue}\""
11391139
}.join('\n')
11401140

1141-
// Save job ID in $jobWorkspace/slurm_job_id.txt for later job to retrieve
1142-
def scriptLaunchPrefix = """#!/bin/bash
1141+
def scriptContent = """#!/bin/bash
11431142
#SBATCH ${exemptionComment}
11441143
#SBATCH --output=${outputPath}
11451144
${taskArgs.collect { "#SBATCH $it" }.join('\n')}
@@ -1235,9 +1234,9 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
12351234
rm -rf "${jobWorkspace}/results.xml"
12361235
rm -rf "${jobWorkspace}/report.csv"
12371236
rm -rf "${jobWorkspace}/unfinished_test.txt"
1238-
rm -rf "${outputPath}"
1237+
rm -rf "${sbatchLogPath}"
12391238
1240-
touch "${outputPath}"
1239+
touch ${sbatchLogPath}
12411240
jobId=\$(sbatch ${scriptLaunchPathNode} | awk '{print \$4}')
12421241
if [ -z "\$jobId" ]; then
12431242
echo "Error: Slurm job submission failed, no job ID returned."
@@ -1269,14 +1268,13 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
12691268
)
12701269
def scriptTrack = """#!/bin/bash
12711270
jobId=\$(cat $jobWorkspace/slurm_job_id.txt)
1272-
tail -f $outputPath &
1271+
tail -f ${sbatchLogPath} &
12731272
tailPid=\$!
12741273
# Wait until sbatch job is done.
12751274
while true; do
1276-
state=\$(sacct -j \$jobId --format=JobIDRaw,State --noheader | \
1277-
awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}')
1278-
if [[ -z "\$state" || "\$state" == "RUNNING" || \
1279-
"\$state" == "PENDING"]]; then
1275+
state=\$(sacct -j \$jobId --format=JobIDRaw,State --noheader | awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}')
1276+
if [[ -z \$state || \$state == "RUNNING" || \$state == "PENDING" ]]; then
1277+
echo "job is still running"
12801278
sleep 300
12811279
else
12821280
echo "Job \$jobId finished with state: \$state"
@@ -1328,8 +1326,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
13281326
)
13291327
def scriptStatus = """#!/bin/bash
13301328
jobId=\$(cat $jobWorkspace/slurm_job_id.txt)
1331-
sacct -j \$jobId --format=JobIDRaw,State --noheader |\
1332-
awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}'
1329+
sacct -j \$jobId --format=JobIDRaw,State --noheader | awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}'
13331330
"""
13341331
pipeline.writeFile(file: scriptStatusPathLocal, text: scriptStatus)
13351332
Utils.copyFileToRemoteHost(
@@ -1366,13 +1363,10 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
13661363
remote,
13671364
scriptStatusPathNode
13681365
)
1369-
)
1370-
println(result)
1371-
if (result == "") {
1372-
echo "Job is done."
1373-
break
1374-
} else {
1366+
).trim()
1367+
if (!result || result == "RUNNING" || result == "PENDING") {
13751368
echo "Job is still running, pulling the job log."
1369+
// Pulling the sbatch output log
13761370
Utils.exec(
13771371
pipeline,
13781372
timeout: false,
@@ -1381,6 +1375,9 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
13811375
scriptTrackPathNode
13821376
)
13831377
)
1378+
} else {
1379+
echo "Job is done."
1380+
break
13841381
}
13851382
}
13861383
}

0 commit comments

Comments
 (0)