Skip to content

Commit 7ad8f24

Browse files
committed
Merge branch 'main' into fix-env-type
2 parents 1bf70ca + 707429f commit 7ad8f24

21 files changed

+324
-56
lines changed

docs/user_guide.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ following example shows:
311311
.. literalinclude:: ../tests/user_guide/test_prelaunch.py
312312
:language: python
313313
:dedent: 4
314-
:lines: 10-16
314+
:lines: 12-18
315315

316316
where the contents of ``pre_launch.sh`` is
317317

requirements-docs.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,15 @@ sphinx_rtd_theme
1212
sphinx-tabs==3.2.0
1313
sphinx-multiversion
1414

15+
# These are not requirements for PSI/J, but we must pin the version
16+
# since Sphinx 4.5.0 does not properly do so and later versions of
17+
# these plugins require Sphinx >= 5.0.0
18+
sphinxcontrib-applehelp==1.0.4
19+
sphinxcontrib-devhelp==1.0.2
20+
sphinxcontrib-htmlhelp==2.0.1
21+
sphinxcontrib-serializinghtml==1.1.5
22+
sphinxcontrib-qthelp==1.0.3
23+
1524
# For the web version of the docs
1625
cloud_sptheme == 1.10.1.post20200504175005
1726
# sphinx-autodoc-typehints
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from distutils.version import StrictVersion
2+
3+
from psij.descriptor import Descriptor
4+
5+
6+
__PSI_J_EXECUTORS__ = [Descriptor(name='pbs', nice_name='PBS Pro', aliases=['pbspro'],
7+
version=StrictVersion('0.0.2'),
8+
cls='psij.executors.batch.pbs.PBSJobExecutor'),
9+
Descriptor(name='pbs_classic', nice_name='PBS Classic', aliases=['torque'],
10+
version=StrictVersion('0.0.2'),
11+
cls='psij.executors.batch.pbs_classic.PBSClassicJobExecutor')]

src/psij-descriptors/pbspro_descriptor.py

Lines changed: 0 additions & 8 deletions
This file was deleted.

src/psij/executors/batch/batch_scheduler_executor.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,11 @@ def submit(self, job: Job) -> None:
228228
self._set_job_status(job, JobStatus(JobState.QUEUED,
229229
metadata={'native_id': job.native_id}))
230230
except subprocess.CalledProcessError as ex:
231+
if logger.isEnabledFor(logging.DEBUG):
232+
with submit_file_path.open('r') as submit_file:
233+
script = submit_file.read()
234+
logger.debug('Job %s: submit script is: %s' % (job.id, script))
235+
231236
raise SubmitException(ex.output) from None
232237

233238
self._queue_poll_thread.register_job(job)
@@ -497,7 +502,7 @@ def _create_script_context(self, job: Job) -> Dict[str, object]:
497502
def _format_duration(self, d: timedelta) -> str:
498503
# the default is hh:mm:ss, with hh not limited to 24; this is the least ambiguous
499504
# choice
500-
return '%s:%s:%s' % (d.total_seconds() // 3600, (d.seconds // 60) % 60, d.seconds % 60)
505+
return '%s:%s:%s' % (int(d.total_seconds()) // 3600, (d.seconds // 60) % 60, d.seconds % 60)
501506

502507
def _run_command(self, cmd: List[str]) -> str:
503508
res = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
@@ -562,7 +567,11 @@ def _read_aux_files(self, job: Job, status: JobStatus) -> None:
562567
if status.message is None:
563568
# only read output from submit script if another error message is not
564569
# already present
565-
status.message = self._read_aux_file(job, '.out')
570+
out = self._read_aux_file(job, '.out')
571+
if out:
572+
launcher = self._get_launcher_from_job(job)
573+
if launcher.is_launcher_failure(out):
574+
status.message = launcher.get_launcher_failure_message(out)
566575
logger.debug('Output from launcher: %s', status.message)
567576
else:
568577
self._delete_aux_file(job, '.out')

src/psij/executors/batch/lsf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,4 +141,4 @@ def get_list_command(self) -> List[str]:
141141
def _format_duration(self, d: timedelta) -> str:
142142
# https://www.ibm.com/docs/en/spectrum-lsf/10.1.0?topic=o-w-1:
143143
# bsub -W [hour:]minute[/host_name | /host_model]
144-
return "%s:%s" % (d.total_seconds() // 3600, (d.seconds // 60) % 60)
144+
return "%s:%s" % (int(d.total_seconds()) // 3600, (d.seconds // 60) % 60)

src/psij/executors/batch/pbs.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from pathlib import Path
2+
from typing import Optional
3+
4+
from psij.executors.batch.pbs_base import PBSExecutorConfig, GenericPBSJobExecutor
5+
from psij.executors.batch.script_generator import TemplatedScriptGenerator
6+
7+
8+
class PBSJobExecutor(GenericPBSJobExecutor):
9+
"""A :class:`~psij.JobExecutor` for PBS Pro and friends.
10+
11+
This executor uses resource specifications specific to PBS Pro
12+
"""
13+
14+
def __init__(self, url: Optional[str] = None, config: Optional[PBSExecutorConfig] = None):
15+
"""
16+
Parameters
17+
----------
18+
url
19+
Not used, but required by the spec for automatic initialization.
20+
config
21+
An optional configuration for this executor.
22+
"""
23+
if not config:
24+
config = PBSExecutorConfig()
25+
generator = TemplatedScriptGenerator(config, Path(__file__).parent / 'pbspro'
26+
/ 'pbspro.mustache')
27+
super().__init__(generator, url=url, config=config)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/bin/bash
2+
3+
4+
{{#job.name}}
5+
#PBS -N="{{.}}"
6+
{{/job.name}}
7+
8+
{{#job.spec.inherit_environment}}
9+
#PBS -V
10+
{{/job.spec.inherit_environment}}
11+
12+
{{#job.spec.resources}}
13+
{{#process_count}}
14+
#PBS -l nodes={{job.spec.resources.computed_node_count}}:ppn={{.}}{{#job.spec.resources.gpu_cores_per_process}}:gpus={{.}}{{/job.spec.resources.gpu_cores_per_process}}
15+
{{/process_count}}
16+
{{#exclusive_node_use}}
17+
#PBS -n
18+
{{/exclusive_node_use}}
19+
{{/job.spec.resources}}
20+
21+
{{#formatted_job_duration}}
22+
#PBS -l walltime={{.}}
23+
{{/formatted_job_duration}}
24+
25+
{{#job.spec.attributes}}
26+
{{#project_name}}
27+
#PBS -P {{.}}
28+
{{/project_name}}
29+
{{#queue_name}}
30+
#PBS -q {{.}}
31+
{{/queue_name}}
32+
{{!PBS uses specially named queues for reservations, so we send the job to the respective
33+
queue when a reservation ID is specified.}}
34+
{{#reservation_id}}
35+
#PBS -q {{.}}
36+
{{/reservation_id}}
37+
{{/job.spec.attributes}}
38+
39+
{{#custom_attributes}}
40+
{{#pbs}}
41+
#PBS -{{key}} "{{value}}"
42+
{{/pbs}}
43+
{{/custom_attributes}}
44+
45+
46+
{{!since we redirect the output manually, below, tell pbs not to do its own thing, since it
47+
only results in empty files that are not cleaned up}}
48+
#PBS -e /dev/null
49+
#PBS -o /dev/null
50+
51+
{{#job.spec.inherit_environment}}
52+
#PBS -V
53+
{{/job.spec.inherit_environment}}
54+
{{#env}}
55+
#PBS -v {{name}}={{value}}
56+
{{/env}}
57+
58+
PSIJ_NODEFILE="$PBS_NODEFILE"
59+
export PSIJ_NODEFILE
60+
61+
62+
{{#job.spec.directory}}
63+
cd "{{.}}"
64+
{{/job.spec.directory}}
65+
66+
exec &>> "{{psij.script_dir}}/$PBS_JOBID.out"
67+
68+
{{#psij.launch_command}}{{.}} {{/psij.launch_command}}
69+
70+
{{!we redirect to a file tied to the native ID so that we can reach the file with attach().}}
71+
echo "$?" > "{{psij.script_dir}}/$PBS_JOBID.ec"

src/psij/executors/batch/pbspro/pbspro.mustache renamed to src/psij/executors/batch/pbs/pbspro.mustache

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
#!/bin/bash
22

3-
# TODO: job.spec.resources
4-
# TODO: job.spec.attributes
53

64
{{#job.name}}
75
#PBS -N="{{.}}"
@@ -11,6 +9,18 @@
119
#PBS -V
1210
{{/job.spec.inherit_environment}}
1311

12+
{{#job.spec.resources}}
13+
{{#process_count}}
14+
#PBS -l select={{job.spec.resources.computed_node_count}}:ncpus={{.}}
15+
{{/process_count}}
16+
{{#exclusive_node_use}}
17+
#PBS -l place=scatter:exclhost
18+
{{/exclusive_node_use}}
19+
{{^exclusive_node_use}}
20+
#PBS -l place=scatter:shared
21+
{{/exclusive_node_use}}
22+
{{/job.spec.resources}}
23+
1424
{{#formatted_job_duration}}
1525
#PBS -l walltime={{.}}
1626
{{/formatted_job_duration}}
Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
_QSTAT_COMMAND = 'qstat'
1313

14-
# This table maps PBS Pro state codes to the corresponding PSI/J
14+
# This table maps PBS state codes to the corresponding PSI/J
1515
# JobState.
1616
# See https://www.altair.com/pdfs/pbsworks/PBSReferenceGuide2021.1.pdf
1717
# page 361, section 8.1 "Job States"
@@ -39,14 +39,14 @@
3939
}
4040

4141

42-
class PBSProExecutorConfig(BatchSchedulerExecutorConfig):
43-
"""A configuration class for the PBS executor."""
42+
class PBSExecutorConfig(BatchSchedulerExecutorConfig):
43+
"""A generic configuration class for PBS-type executors."""
4444

4545
pass
4646

4747

48-
class PBSProJobExecutor(BatchSchedulerExecutor):
49-
"""A :class:`~psij.JobExecutor` for PBS.
48+
class GenericPBSJobExecutor(BatchSchedulerExecutor):
49+
"""A generic :class:`~psij.JobExecutor` for PBS-type schedulers.
5050
5151
PBS, originally developed by NASA, is one of the oldest resource managers still in use.
5252
A number of variations are available: `PBS Pro <https://www.altair.com/pbs-professional/>`_,
@@ -60,7 +60,8 @@ class PBSProJobExecutor(BatchSchedulerExecutor):
6060
Creates a batch script with #PBS directives when submitting a job.
6161
"""
6262

63-
def __init__(self, url: Optional[str] = None, config: Optional[PBSProExecutorConfig] = None):
63+
def __init__(self, generator: TemplatedScriptGenerator, url: Optional[str] = None,
64+
config: Optional[PBSExecutorConfig] = None) -> None:
6465
"""
6566
Parameters
6667
----------
@@ -69,11 +70,8 @@ def __init__(self, url: Optional[str] = None, config: Optional[PBSProExecutorCon
6970
config
7071
An optional configuration for this executor.
7172
"""
72-
if not config:
73-
config = PBSProExecutorConfig()
7473
super().__init__(url=url, config=config)
75-
self.generator = TemplatedScriptGenerator(config, Path(__file__).parent / 'pbspro'
76-
/ 'pbspro.mustache')
74+
self.generator = generator
7775

7876
# Submit methods
7977

0 commit comments

Comments
 (0)