Skip to content

Commit 0bf6f6e

Browse files
committed
clean up
1 parent b800154 commit 0bf6f6e

File tree

4 files changed

+15
-19
lines changed

4 files changed

+15
-19
lines changed

src/backends.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ function module_load_string(::Type{DerechoBackend})
9898
module purge
9999
module load climacommon
100100
module list
101+
export TMPDIR=\${SCRATCH}/temp
102+
mkdir -p \${TMPDIR}
103+
echo \$TMPDIR
101104
"""
102105
end
103106

src/pbs.jl

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,11 @@ duplicating the job snippet in tests.
8686
"""
8787
function pbs_trap_block()
8888
return """
89-
# Self-requeue on preemption or near-walltime signals
90-
# - Many PBS deployments send SIGTERM shortly before walltime or on preemption;
91-
# some may send SIGUSR1 as a warning.
92-
# - We trap these signals and call `qrerun` to requeue the same job ID so it can
93-
# continue later with the same submission parameters.
94-
# - Exiting with status 0 prevents the scheduler from marking the job as failed
95-
# due to the trap.
89+
# Self-requeue on preemption or near-walltime signals:
90+
# Trap SIGTERM on job termination and call `qrerun` to requeue the same job ID
91+
# so it can continue later with the same submission parameters.
92+
# Exiting with status 0 prevents the scheduler from marking the job as failed
93+
# due to the trap.
9694
handle_preterminate() {
9795
sig="\$1"
9896
echo "[ClimaCalibrate] Received \$sig on PBS job \${PBS_JOBID:-unknown}, attempting qrerun"
@@ -104,7 +102,6 @@ function pbs_trap_block()
104102
exit 0
105103
}
106104
trap 'handle_preterminate TERM' TERM
107-
trap 'handle_preterminate USR1' USR1
108105
"""
109106
end
110107

src/slurm.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,10 @@ function generate_sbatch_script(
250250
# Self-requeue on pre-timeout or termination signals
251251
# `#SBATCH --signal=B:USR1@300` sends SIGUSR1 to the batch script 300 seconds before
252252
# the job time limit (B means send to the batch script). Sites may also deliver TERM.
253-
# We trap USR1/TERM and call `scontrol requeue $SLURM_JOB_ID` so the job returns to
253+
# We trap USR1/TERM and call `scontrol requeue \$SLURM_JOB_ID` so the job returns to
254254
# the queue and can continue later with the same submission parameters.
255255
# Exiting with status 0 prevents a false failure due to the trap itself.
256-
trap 'echo "[ClimaCalibrate] Pre-timeout/TERM on job $SLURM_JOB_ID, requeuing"; scontrol requeue $SLURM_JOB_ID; exit 0' USR1 TERM
256+
trap 'echo "[ClimaCalibrate] Pre-timeout/TERM on job \$SLURM_JOB_ID, requeuing"; scontrol requeue \$SLURM_JOB_ID; exit 0' USR1 TERM
257257
258258
$module_load_str
259259
export CLIMACOMMS_DEVICE="$climacomms_device"

test/pbs_unit_tests.jl

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,11 @@ expected_pbs_contents = """
4646
#PBS -l walltime=01:30:00
4747
#PBS -l select=2:ncpus=16:ngpus=2:mpiprocs=2
4848
49-
# Self-requeue on preemption or near-walltime signals
50-
# - Many PBS deployments send SIGTERM shortly before walltime or on preemption;
51-
# some may send SIGUSR1 as a warning.
52-
# - We trap these signals and call `qrerun` to requeue the same job ID so it can
53-
# continue later with the same submission parameters.
54-
# - Exiting with status 0 prevents the scheduler from marking the job as failed
55-
# due to the trap.
49+
# Self-requeue on preemption or near-walltime signals:
50+
# Trap SIGTERM on job termination and call `qrerun` to requeue the same job ID
51+
# so it can continue later with the same submission parameters.
52+
# Exiting with status 0 prevents the scheduler from marking the job as failed
53+
# due to the trap.
5654
handle_preterminate() {
5755
sig="\$1"
5856
echo "[ClimaCalibrate] Received \$sig on PBS job \${PBS_JOBID:-unknown}, attempting qrerun"
@@ -64,8 +62,6 @@ handle_preterminate() {
6462
exit 0
6563
}
6664
trap 'handle_preterminate TERM' TERM
67-
trap 'handle_preterminate USR1' USR1
68-
6965
7066
export MODULEPATH="/glade/campaign/univ/ucit0011/ClimaModules-Derecho:\$MODULEPATH"
7167
module purge

0 commit comments

Comments
 (0)