File tree Expand file tree Collapse file tree 4 files changed +15
-19
lines changed Expand file tree Collapse file tree 4 files changed +15
-19
lines changed Original file line number Diff line number Diff line change @@ -98,6 +98,9 @@ function module_load_string(::Type{DerechoBackend})
98
98
module purge
99
99
module load climacommon
100
100
module list
101
+ export TMPDIR=\$ {SCRATCH}/temp
102
+ mkdir -p \$ {TMPDIR}
103
+ echo \$ TMPDIR
101
104
"""
102
105
end
103
106
Original file line number Diff line number Diff line change @@ -86,13 +86,11 @@ duplicating the job snippet in tests.
86
86
"""
87
87
function pbs_trap_block ()
88
88
return """
89
- # Self-requeue on preemption or near-walltime signals
90
- # - Many PBS deployments send SIGTERM shortly before walltime or on preemption;
91
- # some may send SIGUSR1 as a warning.
92
- # - We trap these signals and call `qrerun` to requeue the same job ID so it can
93
- # continue later with the same submission parameters.
94
- # - Exiting with status 0 prevents the scheduler from marking the job as failed
95
- # due to the trap.
89
+ # Self-requeue on preemption or near-walltime signals:
90
+ # Trap SIGTERM on job termination and call `qrerun` to requeue the same job ID
91
+ # so it can continue later with the same submission parameters.
92
+ # Exiting with status 0 prevents the scheduler from marking the job as failed
93
+ # due to the trap.
96
94
handle_preterminate() {
97
95
sig="\$ 1"
98
96
echo "[ClimaCalibrate] Received \$ sig on PBS job \$ {PBS_JOBID:-unknown}, attempting qrerun"
@@ -104,7 +102,6 @@ function pbs_trap_block()
104
102
exit 0
105
103
}
106
104
trap 'handle_preterminate TERM' TERM
107
- trap 'handle_preterminate USR1' USR1
108
105
"""
109
106
end
110
107
Original file line number Diff line number Diff line change @@ -250,10 +250,10 @@ function generate_sbatch_script(
250
250
# Self-requeue on pre-timeout or termination signals
251
251
# `#SBATCH --signal=B:USR1@300` sends SIGUSR1 to the batch script 300 seconds before
252
252
# the job time limit (B means send to the batch script). Sites may also deliver TERM.
253
- # We trap USR1/TERM and call `scontrol requeue $SLURM_JOB_ID ` so the job returns to
253
+ # We trap USR1/TERM and call `scontrol requeue \ $ SLURM_JOB_ID` so the job returns to
254
254
# the queue and can continue later with the same submission parameters.
255
255
# Exiting with status 0 prevents a false failure due to the trap itself.
256
- trap 'echo "[ClimaCalibrate] Pre-timeout/TERM on job $SLURM_JOB_ID , requeuing"; scontrol requeue $SLURM_JOB_ID ; exit 0' USR1 TERM
256
+ trap 'echo "[ClimaCalibrate] Pre-timeout/TERM on job \ $ SLURM_JOB_ID, requeuing"; scontrol requeue \ $ SLURM_JOB_ID; exit 0' USR1 TERM
257
257
258
258
$module_load_str
259
259
export CLIMACOMMS_DEVICE="$climacomms_device "
Original file line number Diff line number Diff line change @@ -46,13 +46,11 @@ expected_pbs_contents = """
46
46
#PBS -l walltime=01:30:00
47
47
#PBS -l select=2:ncpus=16:ngpus=2:mpiprocs=2
48
48
49
- # Self-requeue on preemption or near-walltime signals
50
- # - Many PBS deployments send SIGTERM shortly before walltime or on preemption;
51
- # some may send SIGUSR1 as a warning.
52
- # - We trap these signals and call `qrerun` to requeue the same job ID so it can
53
- # continue later with the same submission parameters.
54
- # - Exiting with status 0 prevents the scheduler from marking the job as failed
55
- # due to the trap.
49
+ # Self-requeue on preemption or near-walltime signals:
50
+ # Trap SIGTERM on job termination and call `qrerun` to requeue the same job ID
51
+ # so it can continue later with the same submission parameters.
52
+ # Exiting with status 0 prevents the scheduler from marking the job as failed
53
+ # due to the trap.
56
54
handle_preterminate() {
57
55
sig="\$ 1"
58
56
echo "[ClimaCalibrate] Received \$ sig on PBS job \$ {PBS_JOBID:-unknown}, attempting qrerun"
@@ -64,8 +62,6 @@ handle_preterminate() {
64
62
exit 0
65
63
}
66
64
trap 'handle_preterminate TERM' TERM
67
- trap 'handle_preterminate USR1' USR1
68
-
69
65
70
66
export MODULEPATH="/glade/campaign/univ/ucit0011/ClimaModules-Derecho:\$ MODULEPATH"
71
67
module purge
You can’t perform that action at this time.
0 commit comments