Skip to content

Commit af98b12

Browse files
committed
Using sacct to check status instead of squeue
Signed-off-by: Yuanjing Xue <197832395+yuanjingx87@users.noreply.github.com>
1 parent ef39567 commit af98b12

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

jenkins/L0_Test.groovy

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,8 +1284,16 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
12841284
tail -f $outputPath &
12851285
tailPid=\$!
12861286
# Wait until sbatch job is done.
1287-
while squeue -j \$jobId -o %T >/dev/null 2>&1; do
1288-
sleep 300
1287+
while true; do
1288+
state=\$(sacct -j \$jobId --format=JobIDRaw,State --noheader | \
1289+
awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}')
1290+
if [[ -z "\$state" || "\$state" == "RUNNING" || \
1291+
"\$state" == "PENDING"]]; then
1292+
sleep 300
1293+
else
1294+
echo "Job \$jobId finished with state: \$state"
1295+
break
1296+
fi
12891297
done
12901298
# Kill tail -f process
12911299
kill \$tailPid
@@ -1332,7 +1340,8 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
13321340
)
13331341
def scriptStatus = """#!/bin/bash
13341342
jobId=\$(cat $jobWorkspace/slurm_job_id.txt)
1335-
squeue -j \$jobId -h
1343+
sacct -j \$jobId --format=JobIDRaw,State --noheader |\
1344+
awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}'
13361345
"""
13371346
pipeline.writeFile(file: scriptStatusPathLocal, text: scriptStatus)
13381347
Utils.copyFileToRemoteHost(
@@ -1361,7 +1370,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
13611370

13621371
sh "cat $scriptStatusPathLocal"
13631372
while (true) {
1364-
// Check if the job is done by running squeue via SSH
1373+
// Check if the job is done by running sacct via SSH
13651374
def result = Utils.exec(
13661375
pipeline,
13671376
returnStdout: true,

0 commit comments

Comments
 (0)