Skip to content

Commit 8028e4d

Browse files
committed
Address feedbacks
Signed-off-by: Yuanjing Xue <197832395+yuanjingx87@users.noreply.github.com>
1 parent e566439 commit 8028e4d

File tree

1 file changed

+14
-6
lines changed

1 file changed

+14
-6
lines changed

jenkins/L0_Test.groovy

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,9 +1138,9 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
11381138
"export ${varName}=\"${escapedValue}\""
11391139
}.join('\n')
11401140

1141-
def scriptContent = """#!/bin/bash
1141+
def scriptLaunchPrefix = """#!/bin/bash
11421142
#SBATCH ${exemptionComment}
1143-
#SBATCH --output=${outputPath}
1143+
#SBATCH --output=${sbatchLogPath}
11441144
${taskArgs.collect { "#SBATCH $it" }.join('\n')}
11451145
#SBATCH ${partition.additionalArgs}
11461146
${partition?.time ? "#SBATCH --time=${partition.time}" : "#SBATCH --time=${SlurmConfig.DEFAULT_TIMEOUT_SHORT}"}
@@ -1266,14 +1266,22 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
12661266
),
12671267
numRetries: 3
12681268
)
1269+
def sbatchJobId = Utils.exec(
1270+
pipeline,
1271+
returnStdout: true,
1272+
script: Utils.sshUserCmd(
1273+
remote,
1274+
"cat $jobWorkspace/slurm_job_id.txt"
1275+
)
1276+
).trim()
12691277
def scriptTrack = """#!/bin/bash
12701278
jobId=\$(cat $jobWorkspace/slurm_job_id.txt)
12711279
tail -f ${sbatchLogPath} &
12721280
tailPid=\$!
12731281
# Wait until sbatch job is done.
12741282
while true; do
12751283
state=\$(sacct -j \$jobId --format=JobIDRaw,State --noheader | awk -v jobId=\$jobId '""\$1"" == jobId {print \$2}')
1276-
if [[ -z \$state || \$state == "RUNNING" || \$state == "PENDING" ]]; then
1284+
if [[ -z \$state || \$state == "RUNNING" || \$state == "PENDING" || \$state == "CONFIGURING" ]]; then
12771285
echo "job is still running"
12781286
sleep 300
12791287
else
@@ -1364,8 +1372,8 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
13641372
scriptStatusPathNode
13651373
)
13661374
).trim()
1367-
if (!result || result == "RUNNING" || result == "PENDING") {
1368-
echo "Job is still running, pulling the job log."
1375+
if (!result || result == "RUNNING" || result == "PENDING" || result == "CONFIGURING") {
1376+
echo "Slurm job $sbatchJobId is still running, pulling the job log."
13691377
// Pulling the sbatch output log
13701378
Utils.exec(
13711379
pipeline,
@@ -1376,7 +1384,7 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
13761384
)
13771385
)
13781386
} else {
1379-
echo "Job is done."
1387+
echo "Slurm job $sbatchJobId is done."
13801388
break
13811389
}
13821390
}

0 commit comments

Comments
 (0)