Skip to content

Commit 9e1a777

Browse files
authored
Merge pull request #409 from ExaWorks/fix_walltimes
Fixed walltimes
2 parents aeddcea + 613ca79 commit 9e1a777

File tree

9 files changed

+54
-15
lines changed

9 files changed

+54
-15
lines changed

src/psij/executors/batch/batch_scheduler_executor.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import time
55
import traceback
66
from abc import abstractmethod
7+
from datetime import timedelta
78
from pathlib import Path
89
from threading import Thread, RLock
910
from typing import Optional, List, Dict, Collection, cast, TextIO, Union
@@ -15,7 +16,6 @@
1516
JobStatus, JobState
1617
from psij.executors.batch.template_function_library import ALL as FUNCTION_LIBRARY
1718

18-
1919
UNKNOWN_ERROR = 'PSIJ: Unknown error'
2020

2121
logger = logging.getLogger(__name__)
@@ -471,8 +471,18 @@ def _create_script_context(self, job: Job) -> Dict[str, object]:
471471
'script_dir': str(self.work_directory)
472472
}
473473
}
474+
assert job.spec is not None
475+
if job.spec.attributes:
476+
duration = job.spec.attributes.duration
477+
if duration is not None:
478+
ctx['formatted_job_duration'] = self._format_duration(duration)
474479
return ctx
475480

481+
def _format_duration(self, d: timedelta) -> str:
482+
# the default is hh:mm:ss, with hh not limited to 24; this is the least ambiguous
483+
# choice
484+
return '%s:%s:%s' % (d.total_seconds() // 3600, (d.seconds // 60) % 60, d.seconds % 60)
485+
476486
def _run_command(self, cmd: List[str]) -> str:
477487
res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
478488
if res.returncode != 0:

src/psij/executors/batch/cobalt.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Defines a JobExecutor for the Cobalt resource manager."""
2-
2+
from datetime import timedelta
33
from pathlib import Path
44
from typing import Optional, Collection, List, Dict, TextIO
55
import re
@@ -121,3 +121,10 @@ def job_id_from_submit_output(self, out: str) -> str:
121121
if match is None:
122122
raise SubmitException(out)
123123
return match.group(0)
124+
125+
def _format_duration(self, d: timedelta) -> str:
126+
# https://trac.mcs.anl.gov/projects/cobalt/wiki/qsub.1.html:
127+
# The time may be specified as eitehr an integer number of minutes or a colon-delimited
128+
# value of the format: HH:MM:SS. Enter 0 to get the max allowed walltime.
129+
# base class _format_duration is HH:MM:SS
130+
return super()._format_duration(d)

src/psij/executors/batch/cobalt/cobalt.mustache

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
#COBALT --proccount={{.}}
1414
{{/process_count}}
1515
{{/job.spec.resources}}
16-
{{#job.spec.attributes}}
17-
{{#duration}}
16+
{{#formatted_job_duration}}
1817
#COBALT --time={{duration}}
19-
{{/duration}}
18+
{{/formatted_job_duration}}
19+
{{#job.spec.attributes}}
2020
{{#queue_name}}
2121
#COBALT --queue={{.}}
2222
{{/queue_name}}

src/psij/executors/batch/lsf.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Defines the LsfJobExecutor class and its config class."""
2-
2+
from datetime import timedelta
33
from pathlib import Path
44
import re
55
import json
@@ -131,3 +131,8 @@ def job_id_from_submit_output(self, out: str) -> str:
131131
def get_list_command(self) -> List[str]:
132132
"""See :meth:`~.BatchSchedulerExecutor.get_list_command`."""
133133
return [_BJOBS_COMMAND, '-a', '-noheader', '-o', 'jobid', '-u', self._current_user()]
134+
135+
def _format_duration(self, d: timedelta) -> str:
136+
# https://www.ibm.com/docs/en/spectrum-lsf/10.1.0?topic=o-w-1:
137+
# bsub -W [hour:]minute[/host_name | /host_model]
138+
return "%s:%s" % (d.total_seconds() // 3600, (d.seconds // 60) % 60)

src/psij/executors/batch/lsf/lsf.mustache

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@
3535
{{/job.spec.resources}}
3636

3737

38-
{{#job_duration}}
38+
{{#formatted_job_duration}}
3939
#BSUB -W {{.}}
40-
{{/job_duration}}
40+
{{/formatted_job_duration}}
4141

4242
{{#job.spec.attributes}}
4343
{{#queue_name}}

src/psij/executors/batch/pbspro.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import timedelta
12
from pathlib import Path
23
from typing import Optional, Collection, List, Dict, TextIO
34

@@ -145,3 +146,9 @@ def parse_list_output(self, out: str) -> List[str]:
145146
def _get_state(self, state: str) -> JobState:
146147
assert state in _STATE_MAP, f"PBS state {state} is not known to PSI/J"
147148
return _STATE_MAP[state]
149+
150+
def _format_duration(self, d: timedelta) -> str:
151+
# There isn't a clear specification for the walltime in
152+
# https://help.altair.com/2022.1.0/PBS%20Professional/PBSReferenceGuide2022.1.pdf,
153+
# but all examples use hh:mm:ss, which is the default in the base class
154+
return super()._format_duration(d)

src/psij/executors/batch/pbspro/pbspro.mustache

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
#PBS -V
1212
{{/job.spec.inherit_environment}}
1313

14-
{{#job.spec.attributes}}
15-
{{#duration}}
14+
{{#formatted_job_duration}}
1615
#PBS -l walltime={{.}}
17-
# TODO: unclear what the syntax is here for times... its always a mess
18-
{{/duration}}
16+
{{/formatted_job_duration}}
17+
18+
{{#job.spec.attributes}}
1919
{{#projectName}}
2020
#PBS -P {{.}}
2121
{{/projectName}}

src/psij/executors/batch/slurm.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import timedelta
12
from pathlib import Path
23
from typing import Optional, Collection, List, Dict, TextIO
34

@@ -175,3 +176,12 @@ def _get_message(self, reason: str) -> str:
175176
def job_id_from_submit_output(self, out: str) -> str:
176177
"""See :meth:`~.BatchSchedulerExecutor.job_id_from_submit_output`."""
177178
return out.strip().split()[-1]
179+
180+
def _format_duration(self, d: timedelta) -> str:
181+
# https://slurm.schedmd.com/sbatch.html#OPT_time:
182+
# Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds",
183+
# "days-hours", "days-hours:minutes" and "days-hours:minutes:seconds".
184+
days = ''
185+
if d.days > 0:
186+
days = str(d.days) + '-'
187+
return days + "%s:%s:%s" % (d.seconds // 3600, (d.seconds // 60) % 60, d.seconds % 60)

src/psij/executors/batch/slurm/slurm.mustache

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@
4545
{{/cpu_cores_per_process}}
4646
{{/job.spec.resources}}
4747

48-
{{#job.spec.attributes}}
49-
{{#duration}}
48+
{{#formatted_job_duration}}
5049
#SBATCH --time={{.}}
51-
{{/duration}}
50+
{{/formatted_job_duration}}
5251

52+
{{#job.spec.attributes}}
5353
{{#queue_name}}
5454
#SBATCH --partition="{{.}}"
5555
{{/queue_name}}

0 commit comments

Comments
 (0)