Skip to content

Commit 17a9044

Browse files
authored
Merge pull request #652 from teojgo/feature/min_flex_nodes
[feat] Add support for setting a minimum number of required tasks in tests with flexible task allocation
2 parents 02e0d16 + 69a84d7 commit 17a9044

File tree

5 files changed

+61
-22
lines changed

5 files changed

+61
-22
lines changed

docs/advanced.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,8 +404,11 @@ Flexible Regression Tests
404404

405405
.. versionadded:: 2.15
406406

407-
ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to ``0``.
407+
ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to ``<=0``.
408408
In ReFrame's terminology, such tests are called `flexible`.
409+
Negative values indicate the minimum number of tasks that is acceptable for this test (a value of ``-4`` indicates a minimum acceptable number of ``4`` tasks).
410+
A zero value indicates the default minimum number of tasks which is equal to :attr:`num_tasks_per_node <reframe.core.pipeline.RegressionTest.num_tasks_per_node>`.
411+
409412
By default, ReFrame will spawn such a test on all the idle nodes of the current system partition, but this behavior can be adjusted from the command-line.
410413
Flexible tests are very useful for diagnostics tests, e.g., tests for checking the health of a whole set nodes.
411414
In this example, we demonstrate this feature through a simple test that runs ``hostname``.

docs/running.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1006,7 +1006,7 @@ Controlling the Flexible Task Allocation
10061006

10071007
.. versionadded:: 2.15
10081008

1009-
ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to ``0``.
1009+
ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to a value ``<=0``.
10101010
By default, ReFrame will spawn such a test on all the idle nodes of the current system partition.
10111011
This behavior can be adjusted using the ``--flex-alloc-tasks`` command line option.
10121012
This option accepts three values:

reframe/core/pipeline.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,15 @@ class RegressionTest:
255255

256256
#: Number of tasks required by this test.
257257
#:
258-
#: If the number of tasks is set to ``0``, ReFrame will try to flexibly
259-
#: allocate the number of tasks, based on the command line option
260-
#: ``--flex-alloc-tasks``.
258+
#: If the number of tasks is set to a number ``<=0``, ReFrame will try
259+
#: to flexibly allocate the number of tasks, based on the command line
260+
#: option ``--flex-alloc-tasks``.
261+
#: A negative number is used to indicate the minimum number of tasks
262+
#: required for the test.
263+
#: In this case the minimum number of tasks is the absolute value of
264+
#: the number, while
265+
#: Setting ``num_tasks`` to ``0`` is equivalent to setting it to
266+
#: ``-num_tasks_per_node``.
261267
#:
262268
#: :type: integral
263269
#: :default: ``1``
@@ -269,6 +275,9 @@ class RegressionTest:
269275
#: (see `Flexible task allocation
270276
#: <running.html#flexible-task-allocation>`__)
271277
#: if the number of tasks is set to ``0``.
278+
#: .. versionchanged:: 2.16
279+
#: Negative ``num_tasks`` is allowed for specifying the minimum
280+
#: number of required tasks by the test.
272281
num_tasks = fields.TypedField('num_tasks', int)
273282

274283
#: Number of tasks per node required by this test.

reframe/core/schedulers/__init__.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -221,14 +221,27 @@ def sched_exclusive_access(self):
221221

222222
def prepare(self, commands, environs=None, **gen_opts):
223223
environs = environs or []
224-
if self.num_tasks == 0:
224+
if self.num_tasks <= 0:
225+
num_tasks_per_node = self.num_tasks_per_node or 1
226+
min_num_tasks = (-self.num_tasks if self.num_tasks else
227+
num_tasks_per_node)
228+
225229
try:
226-
self._num_tasks = self.guess_num_tasks()
227-
getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' %
228-
self._num_tasks)
230+
guessed_num_tasks = self.guess_num_tasks()
229231
except NotImplementedError as e:
230-
raise JobError('guessing number of tasks is not implemented '
231-
'by the backend') from e
232+
raise JobError('flexible task allocation is not supported by '
233+
'this backend') from e
234+
235+
if guessed_num_tasks < min_num_tasks:
236+
nodes_required = min_num_tasks // num_tasks_per_node
237+
nodes_found = guessed_num_tasks // num_tasks_per_node
238+
raise JobError('could not find enough nodes: '
239+
'required %s, found %s' %
240+
(nodes_required, nodes_found))
241+
242+
self._num_tasks = guessed_num_tasks
243+
getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' %
244+
self._num_tasks)
232245

233246
with shell.generate_script(self.script_filename,
234247
**gen_opts) as builder:
@@ -257,25 +270,16 @@ def guess_num_tasks(self):
257270

258271
# Try to guess the number of tasks now
259272
available_nodes = self.filter_nodes(available_nodes, self.options)
260-
if not available_nodes:
261-
options = ' '.join(self.sched_access + self.options)
262-
raise JobError('could not find any node satisfying the '
263-
'required criteria: %s' % options)
264273

265274
if self.sched_flex_alloc_tasks == 'idle':
266275
available_nodes = {n for n in available_nodes
267276
if n.is_available()}
268-
if not available_nodes:
269-
raise JobError('could not find any idle nodes')
270-
271277
getlogger().debug(
272278
'flex_alloc_tasks: selecting idle nodes: '
273279
'available nodes now: %s' % len(available_nodes))
274280

275281
num_tasks_per_node = self.num_tasks_per_node or 1
276282
num_tasks = len(available_nodes) * num_tasks_per_node
277-
getlogger().debug('flex_alloc_tasks: setting num_tasks to: %s' %
278-
num_tasks)
279283
return num_tasks
280284

281285
@abc.abstractmethod

unittests/test_schedulers.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,7 @@ def test_guess_num_tasks(self):
390390
# monkey patch `get_partition_nodes()` to simulate extraction of
391391
# slurm nodes through the use of `scontrol show`
392392
self.testjob.get_partition_nodes = lambda: set()
393-
with self.assertRaises(JobError):
394-
self.testjob.guess_num_tasks()
393+
self.assertEqual(self.testjob.guess_num_tasks(), 0)
395394

396395

397396
class TestSqueueJob(TestSlurmJob):
@@ -708,6 +707,30 @@ def test_exclude_nodes_opt(self):
708707
self.prepare_job()
709708
self.assertEqual(self.testjob.num_tasks, 8)
710709

710+
def test_no_num_tasks_per_node(self):
711+
self.testjob._num_tasks_per_node = None
712+
self.testjob.options = ['-C f1,f2', '--partition=p1,p2']
713+
self.prepare_job()
714+
self.assertEqual(self.testjob.num_tasks, 1)
715+
716+
def test_not_enough_idle_nodes(self):
717+
self.testjob._sched_flex_alloc_tasks = 'idle'
718+
self.testjob._num_tasks = -12
719+
with self.assertRaises(JobError):
720+
self.prepare_job()
721+
722+
def test_not_enough_nodes_constraint_partition(self):
723+
self.testjob.options = ['-C f1,f2', '--partition=p1,p2']
724+
self.testjob._num_tasks = -8
725+
with self.assertRaises(JobError):
726+
self.prepare_job()
727+
728+
def test_enough_nodes_constraint_partition(self):
729+
self.testjob.options = ['-C f1,f2', '--partition=p1,p2']
730+
self.testjob._num_tasks = -4
731+
self.prepare_job()
732+
self.assertEqual(self.testjob.num_tasks, 4)
733+
711734
def prepare_job(self):
712735
self.testjob.prepare(['hostname'])
713736

0 commit comments

Comments
 (0)