Skip to content

Commit 9caa317

Browse files
authored
Merge pull request #3158 from vkarak/bugfix/flex-nodes-treat-or-constraints
[bugfix] Treat OR Slurm constraints in flexible node allocation
2 parents ea78747 + 2d1eca4 commit 9caa317

File tree

5 files changed

+75
-12
lines changed

5 files changed

+75
-12
lines changed

docs/config_reference.rst

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -480,9 +480,14 @@ System Partition Configuration
480480

481481
A list of job scheduler options that will be passed to the generated job script for gaining access to that logical partition.
482482

483-
.. note::
484-
For the ``pbs`` and ``torque`` backends, options accepted in the :attr:`~config.systems.partitions.access` and :attr:`~config.systems.partitions.resources` parameters may either refer to actual ``qsub`` options or may just be resources specifications to be passed to the ``-l`` option.
485-
The backend assumes a ``qsub`` option, if the options passed in these attributes start with a ``-``.
483+
.. note::
484+
For the ``pbs`` and ``torque`` backends, options accepted in the :attr:`~config.systems.partitions.access` and :attr:`~config.systems.partitions.resources` parameters may either refer to actual ``qsub`` options or may just be resources specifications to be passed to the ``-l`` option.
485+
The backend assumes a ``qsub`` option, if the options passed in these attributes start with a ``-``.
486+
487+
.. note::
488+
If constraints are specified in :attr:`~config.systems.partition.access` for the Slurm backends,
489+
these will be AND'ed with any additional constraints passed either through the test job :attr:`~reframe.core.schedulers.Job.options` or the :option:`-J` command-line option.
490+
In other words, any constraint passed in :attr:`~config.systems.partition.access` will always be present in the generated job script.
486491

487492

488493
.. py:attribute:: systems.partitions.environs

docs/manpage.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,9 @@ Flexible node allocation
728728
ReFrame can automatically set the number of tasks of a test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to a value less than or equal to zero.
729729
This scheme is conveniently called *flexible node allocation* and is valid only for the Slurm backend.
730730
When allocating nodes automatically, ReFrame will take into account all node limiting factors, such as partition :attr:`~config.systems.partitions.access` options, and any job submission control options described above.
731+
Particularly for Slurm constraints, ReFrame will only recognize simple AND or OR constraints and any parenthesized expression of them.
732+
The full syntax of `Slurm constraints <https://slurm.schedmd.com/sbatch.html#OPT_constraint>`__ is not currently supported.
733+
731734
Nodes from this pool are allocated according to different policies.
732735
If no node can be selected, the test will be marked as a failure with an appropriate message.
733736

@@ -747,6 +750,8 @@ If no node can be selected, the test will be marked as a failure with an appropr
747750
Align the state selection with the :option:`--distribute` option.
748751
See the :option:`--distribute` for more details.
749752

753+
Slurm OR constraints and parenthesized expressions are supported in flexible node allocation.
754+
750755
---------------------------------------
751756
Options controlling ReFrame environment
752757
---------------------------------------

reframe/core/launchers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class JobLauncher(metaclass=_JobLauncherMeta):
5151
#:
5252
#: If the modifier is empty, these options will be ignored.
5353
#:
54-
#: :type: :clas:`List[str]`
54+
#: :type: :class:`List[str]`
5555
#: :default: ``[]``
5656
#:
5757
#: :versionadded:: 4.6.0

reframe/core/schedulers/slurm.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -213,11 +213,15 @@ def emit_preamble(self, job):
213213
if not opt.strip().startswith(('-C', '--constraint')):
214214
preamble.append('%s %s' % (self._prefix, opt))
215215

216+
# To avoid overriding a constraint that's passed into `sched_access`,
217+
# we AND it with the `--constraint` option passed either in `options`
218+
# or in `cli_options`
216219
constraints = []
217220
constraint_parser = ArgumentParser()
218221
constraint_parser.add_argument('-C', '--constraint')
219222
parsed_options, _ = constraint_parser.parse_known_args(
220-
job.sched_access)
223+
job.sched_access
224+
)
221225
if parsed_options.constraint:
222226
constraints.append(parsed_options.constraint.strip())
223227

@@ -230,9 +234,14 @@ def emit_preamble(self, job):
230234
constraints.append(parsed_options.constraint.strip())
231235

232236
if constraints:
233-
preamble.append(
234-
self._format_option('&'.join(constraints), '--constraint={0}')
235-
)
237+
if len(constraints) == 1:
238+
constr = constraints[0]
239+
else:
240+
# Parenthesize the constraints prior to joining them with `&`
241+
# to make sure that precedence is respected.
242+
constr = '&'.join(f'({c})' for c in constraints)
243+
244+
preamble.append(self._format_option(constr, '--constraint={0}'))
236245

237246
preamble.append(self._format_option(hint, '--hint={0}'))
238247
prefix_patt = re.compile(r'(#\w+)')
@@ -350,8 +359,7 @@ def filternodes(self, job, nodes):
350359
self.log(f'[F] Filtering nodes by partition(s) {partitions}: '
351360
f'available nodes now: {len(nodes)}')
352361
if constraints:
353-
constraints = set(constraints.strip().split('&'))
354-
nodes = {n for n in nodes if n.active_features >= constraints}
362+
nodes = {n for n in nodes if n.satisfies(constraints)}
355363
self.log(f'[F] Filtering nodes by constraint(s) {constraints}: '
356364
f'available nodes now: {len(nodes)}')
357365

@@ -669,6 +677,23 @@ def is_avail(self):
669677
def is_down(self):
670678
return not self.is_avail()
671679

680+
def satisfies(self, slurm_constraint):
681+
# Convert the Slurm constraint to a Python expression and evaluate it,
682+
# but restrict our syntax to accept only AND or OR constraints and
683+
# their combinations
684+
if not re.match(r'^[\w\d\(\)\|\&]*$', slurm_constraint):
685+
return False
686+
687+
names = {grp[0]
688+
for grp in re.finditer(r'(\w(\w|\d)*)', slurm_constraint)}
689+
expr = slurm_constraint.replace('|', ' or ').replace('&', ' and ')
690+
vars = {n: True for n in self.active_features}
691+
vars.update({n: False for n in names - self.active_features})
692+
try:
693+
return eval(expr, {}, vars)
694+
except BaseException:
695+
return False
696+
672697
@property
673698
def active_features(self):
674699
return self._active_features

unittests/test_schedulers.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ def test_combined_access_constraint(make_job, slurm_only):
617617
with open(job.script_filename) as fp:
618618
script_content = fp.read()
619619

620-
assert re.search(r'(?m)--constraint=c1&c2&c3$', script_content)
620+
assert re.search(r'(?m)--constraint=\(c1\)&\(c2&c3\)$', script_content)
621621
assert re.search(r'(?m)--constraint=(c1|c2&c3)$', script_content) is None
622622

623623

@@ -628,7 +628,7 @@ def test_combined_access_multiple_constraints(make_job, slurm_only):
628628
with open(job.script_filename) as fp:
629629
script_content = fp.read()
630630

631-
assert re.search(r'(?m)--constraint=c1&c3$', script_content)
631+
assert re.search(r'(?m)--constraint=\(c1\)&\(c3\)$', script_content)
632632
assert re.search(r'(?m)--constraint=(c1|c2|c3)$', script_content) is None
633633

634634

@@ -1176,6 +1176,34 @@ def test_flex_alloc_enough_nodes_constraint_partition(make_flexible_job):
11761176
assert job.num_tasks == 4
11771177

11781178

1179+
def test_flex_alloc_enough_nodes_constraint_expr(make_flexible_job):
1180+
job = make_flexible_job('all')
1181+
job.options = ['-C "(f1|f2)&f3"']
1182+
prepare_job(job)
1183+
assert job.num_tasks == 8
1184+
1185+
1186+
def test_flex_alloc_nodes_unsupported_constraint(make_flexible_job):
1187+
job = make_flexible_job('all')
1188+
job.options = ['-C "[f1*2&f2*4]"']
1189+
with pytest.raises(JobError):
1190+
prepare_job(job)
1191+
1192+
1193+
def test_flex_alloc_nodes_invalid_constraint(make_flexible_job):
1194+
job = make_flexible_job('all')
1195+
job.options = ['-C "(f1|f2)&"']
1196+
with pytest.raises(JobError):
1197+
prepare_job(job)
1198+
1199+
1200+
def test_flex_alloc_not_enough_nodes_constraint_expr(make_flexible_job):
1201+
job = make_flexible_job('all')
1202+
job.options = ['-C "(f1|f2)&(f8|f9)"']
1203+
with pytest.raises(JobError):
1204+
prepare_job(job)
1205+
1206+
11791207
@pytest.fixture
11801208
def slurm_node_allocated():
11811209
return _SlurmNode(

0 commit comments

Comments
 (0)