Skip to content

Commit b63c8af

Browse files
author
Theofilos Manitaras
committed
Address PR comments
1 parent cd81b9d commit b63c8af

File tree

5 files changed

+35
-32
lines changed

5 files changed

+35
-32
lines changed

docs/advanced.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,9 @@ Flexible Regression Tests
406406

407407
ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to ``<=0``.
408408
In ReFrame's terminology, such tests are called `flexible`.
409+
Negative values indicate the minimum number of tasks that is acceptable for this test (a value of ``-4`` indicates a minimum acceptable number of ``4`` tasks).
410+
A zero value indicates the default minimum number of tasks which is equal to :attr:`num_tasks_per_node <reframe.core.pipeline.RegressionTest.num_tasks_per_node>`.
411+
409412
By default, ReFrame will spawn such a test on all the idle nodes of the current system partition, but this behavior can be adjusted from the command-line.
410413
Flexible tests are very useful for diagnostics tests, e.g., tests for checking the health of a whole set nodes.
411414
In this example, we demonstrate this feature through a simple test that runs ``hostname``.

docs/running.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,8 +1007,6 @@ Controlling the Flexible Task Allocation
10071007
.. versionadded:: 2.15
10081008

10091009
ReFrame can automatically set the number of tasks of a particular test, if its :attr:`num_tasks <reframe.core.pipeline.RegressionTest.num_tasks>` attribute is set to a value ``<=0``.
1010-
Negative values indicate the minimum number of tasks that is acceptable for this test (a value of ``-4`` indicates a minimum acceptable number of ``4`` tasks).
1011-
A zero value indicates the default minimum number of tasks which is ``1``.
10121010
By default, ReFrame will spawn such a test on all the idle nodes of the current system partition.
10131011
This behavior can be adjusted using the ``--flex-alloc-tasks`` command line option.
10141012
This option accepts three values:

reframe/core/pipeline.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -257,10 +257,13 @@ class RegressionTest:
257257
#:
258258
#: If the number of tasks is set to a number ``<=0``, ReFrame will try
259259
#: to flexibly allocate the number of tasks, based on the command line
260-
#: option ``--flex-alloc-tasks``. A negative number is used to indicate
261-
#: the minimum number of tasks valid for the test. In this case the
262-
#: minimum number of tasks is the absolute value of the number, while
263-
#: ``0`` is used when the minimum number of tasks is ``1``.
260+
#: option ``--flex-alloc-tasks``.
261+
#: A negative number is used to indicate the minimum number of tasks
262+
#: required for the test.
263+
#: In this case the minimum number of tasks is the absolute value of
264+
#: the number, while
265+
#: Setting ``num_tasks`` to ``0`` is equivalent to setting it to
266+
#: ``-num_tasks_per_node``.
264267
#:
265268
#: :type: integral
266269
#: :default: ``1``
@@ -273,8 +276,8 @@ class RegressionTest:
273276
#: <running.html#flexible-task-allocation>`__)
274277
#: if the number of tasks is set to ``0``.
275278
#: .. versionchanged:: 2.16
276-
#: Added support for specifying the minimum number of acceptable
277-
#: tasks when negative numbers are specified.
279+
#: Negative ``num_tasks`` is allowed for specifying the minimum
280+
#: number of required tasks by the test.
278281
num_tasks = fields.TypedField('num_tasks', int)
279282

280283
#: Number of tasks per node required by this test.

reframe/core/schedulers/__init__.py

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -222,12 +222,22 @@ def sched_exclusive_access(self):
222222
def prepare(self, commands, environs=None, **gen_opts):
223223
environs = environs or []
224224
if self.num_tasks <= 0:
225+
num_tasks_per_node = (self.num_tasks_per_node if
226+
self.num_tasks_per_node else 1)
227+
min_num_tasks = (abs(self.num_tasks) if self.num_tasks < 0 else
228+
num_tasks_per_node)
225229
try:
226-
self._num_tasks = (self.guess_num_tasks(abs(self.num_tasks)) if
227-
self.num_tasks != 0 else
228-
self.guess_num_tasks())
229-
getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' %
230-
self._num_tasks)
230+
guessed_num_tasks = self.guess_num_tasks()
231+
if guessed_num_tasks >= min_num_tasks:
232+
self._num_tasks = guessed_num_tasks
233+
getlogger().debug('flex_alloc_tasks: setting num_tasks '
234+
'to %s' % self._num_tasks)
235+
else:
236+
raise JobError(
237+
'not enough nodes satisfying the minimum '
238+
'number of tasks required: %s < %s' %
239+
(guessed_num_tasks, min_num_tasks))
240+
231241
except NotImplementedError as e:
232242
raise JobError('guessing number of tasks is not implemented '
233243
'by the backend') from e
@@ -245,15 +255,11 @@ def prepare(self, commands, environs=None, **gen_opts):
245255
def emit_preamble(self):
246256
pass
247257

248-
def guess_num_tasks(self, min_num_tasks=1):
258+
def guess_num_tasks(self):
249259
if isinstance(self.sched_flex_alloc_tasks, int):
250260
if self.sched_flex_alloc_tasks <= 0:
251261
raise JobError('invalid number of flex_alloc_tasks: %s' %
252262
self.sched_flex_alloc_tasks)
253-
elif self.sched_flex_alloc_tasks < min_num_tasks:
254-
raise JobError('invalid number of flex_alloc_tasks: %s > '
255-
'%s (min number of tasks)' %
256-
self.sched_flex_alloc_tasks, min_num_tasks)
257263

258264
return self.sched_flex_alloc_tasks
259265

@@ -263,28 +269,16 @@ def guess_num_tasks(self, min_num_tasks=1):
263269

264270
# Try to guess the number of tasks now
265271
available_nodes = self.filter_nodes(available_nodes, self.options)
266-
if not available_nodes:
267-
options = ' '.join(self.sched_access + self.options)
268-
raise JobError('could not find any node satisfying the '
269-
'required criteria: %s' % options)
270272

271273
if self.sched_flex_alloc_tasks == 'idle':
272274
available_nodes = {n for n in available_nodes
273275
if n.is_available()}
274-
if not available_nodes:
275-
raise JobError('could not find any idle nodes')
276-
277276
getlogger().debug(
278277
'flex_alloc_tasks: selecting idle nodes: '
279278
'available nodes now: %s' % len(available_nodes))
280279

281280
num_tasks_per_node = self.num_tasks_per_node or 1
282281
num_tasks = len(available_nodes) * num_tasks_per_node
283-
if num_tasks < min_num_tasks:
284-
raise JobError('could not schedule enough tasks')
285-
286-
getlogger().debug('flex_alloc_tasks: setting num_tasks to: %s' %
287-
num_tasks)
288282
return num_tasks
289283

290284
@abc.abstractmethod

unittests/test_schedulers.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,7 @@ def test_guess_num_tasks(self):
390390
# monkey patch `get_partition_nodes()` to simulate extraction of
391391
# slurm nodes through the use of `scontrol show`
392392
self.testjob.get_partition_nodes = lambda: set()
393-
with self.assertRaises(JobError):
394-
self.testjob.guess_num_tasks()
393+
self.assertEqual(self.testjob.guess_num_tasks(), 0)
395394

396395

397396
class TestSqueueJob(TestSlurmJob):
@@ -708,6 +707,12 @@ def test_exclude_nodes_opt(self):
708707
self.prepare_job()
709708
self.assertEqual(self.testjob.num_tasks, 8)
710709

710+
def test_no_num_tasks_per_node(self):
711+
self.testjob._num_tasks_per_node = None
712+
self.testjob.options = ['-C f1,f2', '--partition=p1,p2']
713+
self.prepare_job()
714+
self.assertEqual(self.testjob.num_tasks, 1)
715+
711716
def test_not_enough_idle_nodes(self):
712717
self.testjob._sched_flex_alloc_tasks = 'idle'
713718
self.testjob._num_tasks = -12

0 commit comments

Comments
 (0)