Skip to content

Commit 122e46e

Browse files
committed
Using sacct to check status instead of squeue
Signed-off-by: Yuanjing Xue <[email protected]>
1 parent bca2190 commit 122e46e

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

jenkins/L0_Test.groovy

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,8 +1272,16 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
12721272
tail -f $outputPath &
12731273
tailPid=\$!
12741274
# Wait until sbatch job is done.
1275-
while squeue -j \$jobId -o %T >/dev/null 2>&1; do
1276-
sleep 300
1275+
while true; do
1276+
state=\$(sacct -j \$jobId --format=JobIDRaw,State --noheader | \
1277+
awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}')
1278+
if [[ -z "\$state" || "\$state" == "RUNNING" || \
1279+
"\$state" == "PENDING"]]; then
1280+
sleep 300
1281+
else
1282+
echo "Job \$jobId finished with state: \$state"
1283+
break
1284+
fi
12771285
done
12781286
# Kill tail -f process
12791287
kill \$tailPid
@@ -1320,7 +1328,8 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
13201328
)
13211329
def scriptStatus = """#!/bin/bash
13221330
jobId=\$(cat $jobWorkspace/slurm_job_id.txt)
1323-
squeue -j \$jobId -h
1331+
sacct -j \$jobId --format=JobIDRaw,State --noheader |\
1332+
awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}'
13241333
"""
13251334
pipeline.writeFile(file: scriptStatusPathLocal, text: scriptStatus)
13261335
Utils.copyFileToRemoteHost(
@@ -1349,7 +1358,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
13491358

13501359
sh "cat $scriptStatusPathLocal"
13511360
while (true) {
1352-
// Check if the job is done by running squeue via SSH
1361+
// Check if the job is done by running sacct via SSH
13531362
def result = Utils.exec(
13541363
pipeline,
13551364
returnStdout: true,

0 commit comments

Comments
 (0)