@@ -1117,6 +1117,9 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
11171117
11181118 // Save job ID in $jobWorkspace/slurm_job_id.txt for later job to retrieve
11191119 def scriptLaunchPrefix = """ #!/bin/bash
1120+ set -xEeuo pipefail
1121+ trap 'rc=\$ ?; echo "Error in file \$ {BASH_SOURCE[0]} on line \$ LINENO: \$ BASH_COMMAND (exit \$ rc)"; exit \$ rc' ERR
1122+
11201123 #SBATCH ${ exemptionComment}
11211124 #SBATCH --output=${ outputPath}
11221125 ${ taskArgs.collect { "#SBATCH $it" }.join('\n')}
@@ -1125,8 +1128,6 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
11251128 echo "Starting job \$ SLURM_JOB_ID on \$ SLURM_NODELIST"
11261129 echo "\$ SLURM_JOB_ID > $jobWorkspace /slurm_job_id.txt
11271130
1128- set -xEeuo pipefail
1129- trap 'rc=\$ ?; echo "Error in file \$ {BASH_SOURCE[0]} on line \$ LINENO: \$ BASH_COMMAND (exit \$ rc)"; exit \$ rc' ERR
11301131 export jobWorkspace=$jobWorkspace
11311132 export tarName=$tarName
11321133 export llmTarfile=$llmTarfile
@@ -1198,7 +1199,14 @@ def runLLMTestlistWithSbatch(pipeline, platform, testList, config=VANILLA_CONFIG
11981199 def scriptExec = """ #!/bin/bash
11991200 set -xEeuo pipefail
12001201 trap 'rc=\$ ?; echo "Error in file \$ {BASH_SOURCE[0]} on line \$ LINENO: \$ BASH_COMMAND (exit \$ rc)"; exit \$ rc' ERR
1202+
1203+ # Clean up previous job intermediate files so that retry can work
1204+ rm -rf ${ jobWorkspace} /slurm_job_id.txt
1205+ rm -rf ${ jobWorkspace} /results.xml
1206+ rm -rf ${ jobWorkspace} /report.csv
1207+ rm -rf ${ jobWorkspace} /unfinished_test.txt
12011208 rm -rf ${ outputPath}
1209+
12021210 touch ${ outputPath}
12031211 jobId=\$ (sbatch ${ scriptLaunchPathNode} | awk '{print \$ 4}')
12041212 if [ -z "\$ jobId" ]; then
0 commit comments