Skip to content

Commit 28d0c63

Browse files
author
Vasileios Karakasis
authored
Merge branch 'master' into test/p2p-fixes
2 parents f64b077 + 34f96d6 commit 28d0c63

File tree

7 files changed

+89
-51
lines changed

7 files changed

+89
-51
lines changed

docs/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ TARGET_DOCS := \
3838
coverage
3939

4040
latest:
41-
@make html man
41+
@make html
42+
@make man && mkdir -p man/man1 man/man8 && \
43+
mv man/reframe.1 man/man1/ && mv man/reframe.settings.8 man/man8/
4244
@touch html/.nojekyll
4345

4446
clean:

docs/manpage.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,19 +361,23 @@ When allocating nodes automatically, ReFrame will take into account all node lim
361361
Nodes from this pool are allocated according to different policies.
362362
If no node can be selected, the test will be marked as a failure with an appropriate message.
363363

364-
.. option:: --flex-alloc-nodes[=POLICY]
364+
.. option:: --flex-alloc-nodes=POLICY
365365

366366
Set the flexible node allocation policy.
367367
Available values are the following:
368368

369369
- ``all``: Flexible tests will be assigned as many tasks as needed in order to span over *all* the nodes of the node pool.
370-
- ``idle``: Flexible tests will be assigned as many tasks as needed in order to span over the *idle* nodes of the node pool.
370+
- ``STATE``: Flexible tests will be assigned as many tasks as needed in order to span over the nodes that are currently in state ``STATE``.
371371
Querying of the node state and submission of the test job are two separate steps not executed atomically.
372-
It is therefore possible that the number of tasks assigned does not correspond to the actual idle nodes.
372+
It is therefore possible that the number of tasks assigned does not correspond to the actual nodes in the given state.
373373

374-
This is the default policy.
374+
If this option is not specified, the default allocation policy for flexible tests is 'idle'.
375375
- Any positive integer: Flexible tests will be assigned as many tasks as needed in order to span over the specified number of nodes from the node pool.
376376

377+
.. versionchanged:: 3.1
378+
It is now possible to pass an arbitrary node state as a flexible node allocation parameter.
379+
380+
377381
---------------------------------------
378382
Options controlling ReFrame environment
379383
---------------------------------------

reframe/core/schedulers/__init__.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -349,17 +349,18 @@ def guess_num_tasks(self):
349349
return self.sched_flex_alloc_nodes * num_tasks_per_node
350350

351351
available_nodes = self.scheduler.allnodes()
352-
getlogger().debug('flex_alloc_nodes: total available nodes %s ' %
352+
getlogger().debug('flex_alloc_nodes: total available nodes: %s ' %
353353
len(available_nodes))
354354

355355
# Try to guess the number of tasks now
356356
available_nodes = self.scheduler.filternodes(self, available_nodes)
357-
if self.sched_flex_alloc_nodes == 'idle':
357+
if self.sched_flex_alloc_nodes.casefold() != 'all':
358358
available_nodes = {n for n in available_nodes
359-
if n.is_available()}
359+
if n.in_state(self.sched_flex_alloc_nodes)}
360360
getlogger().debug(
361-
'flex_alloc_nodes: selecting idle nodes: '
362-
'available nodes now: %s' % len(available_nodes)
361+
f'flex_alloc_nodes: selecting nodes in state '
362+
f'{self.sched_flex_alloc_nodes!r}: '
363+
f'available nodes now: {len(available_nodes)}'
363364
)
364365

365366
return len(available_nodes) * num_tasks_per_node
@@ -398,5 +399,10 @@ class Node(abc.ABC):
398399
'''
399400

400401
@abc.abstractmethod
401-
def is_available(self):
402-
'''Return ``True`` if this node is available, ``False`` otherwise.'''
402+
def in_state(self, state):
403+
'''Returns whether the node is in the given state.
404+
405+
:arg state: The node state.
406+
:returns: :class:`True` if the nodes's state matches the given one,
407+
:class:`False` otherwise.
408+
'''

reframe/core/schedulers/local.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,5 +182,5 @@ class _LocalNode(sched.Node):
182182
def __init__(self, name):
183183
self._name = name
184184

185-
def is_available(self):
186-
return True
185+
def in_state(self, state):
186+
return state.casefold() == 'idle'

reframe/core/schedulers/slurm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -611,9 +611,9 @@ def __eq__(self, other):
611611
def __hash__(self):
612612
return hash(self.name)
613613

614-
def is_available(self):
615-
return all([self._states == {'IDLE'}, self._partitions,
616-
self._active_features, self._states])
614+
def in_state(self, state):
615+
return all([self._states >= set(state.upper().split('+')),
616+
self._partitions, self._active_features, self._states])
617617

618618
def is_down(self):
619619
return bool({'DOWN', 'DRAIN', 'MAINT', 'NO_RESPOND'} & self._states)

reframe/frontend/cli.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def main():
260260
)
261261
run_options.add_argument(
262262
'--flex-alloc-nodes', action='store',
263-
dest='flex_alloc_nodes', metavar='{all|idle|NUM}', default=None,
263+
dest='flex_alloc_nodes', metavar='{all|STATE|NUM}', default=None,
264264
help='Set strategy for the flexible node allocation (default: "idle").'
265265
)
266266
env_options.add_argument(
@@ -656,10 +656,6 @@ def print_infoline(param, value):
656656
if sched_flex_alloc_nodes <= 0:
657657
raise ConfigError(errmsg.format(options.flex_alloc_nodes))
658658
except ValueError:
659-
if not options.flex_alloc_nodes.casefold() in {'idle', 'all'}:
660-
raise ConfigError(
661-
errmsg.format(options.flex_alloc_nodes)) from None
662-
663659
sched_flex_alloc_nodes = options.flex_alloc_nodes
664660

665661
exec_policy.sched_flex_alloc_nodes = sched_flex_alloc_nodes

unittests/test_schedulers.py

Lines changed: 59 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,24 @@ def slurm_nodes():
647647
'ExtSensorsTemp=n/s Reason=Foo/ '
648648
'failed [reframe_user@01 Jan 2018]',
649649

650+
'NodeName=nid00006 Arch=x86_64 CoresPerSocket=12 '
651+
'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 '
652+
'AvailableFeatures=f6 ActiveFeatures=f6 '
653+
'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00006'
654+
'NodeHostName=nid00006 Version=10.00 OS=Linux '
655+
'RealMemory=32220 AllocMem=0 FreeMem=10000 '
656+
'Sockets=1 Boards=1 State=MAINT '
657+
'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A '
658+
'MCS_label=N/A Partitions=p4 '
659+
'BootTime=01 Jan 2018 '
660+
'SlurmdStartTime=01 Jan 2018 '
661+
'CfgTRES=cpu=24,mem=32220M '
662+
'AllocTRES= CapWatts=n/a CurrentWatts=100 '
663+
'LowestJoules=100000000 ConsumedJoules=0 '
664+
'ExtSensorsJoules=n/s ExtSensorsWatts=0 '
665+
'ExtSensorsTemp=n/s Reason=Foo/ '
666+
'failed [reframe_user@01 Jan 2018]',
667+
650668
'Node invalid_node2 not found']
651669

652670

@@ -861,6 +879,13 @@ def test_flex_alloc_not_enough_idle_nodes(make_flexible_job):
861879
prepare_job(job)
862880

863881

882+
def test_flex_alloc_maintenance_nodes(make_flexible_job):
883+
job = make_flexible_job('maint')
884+
job.options = ['--partition=p4']
885+
prepare_job(job)
886+
assert job.num_tasks == 4
887+
888+
864889
def test_flex_alloc_not_enough_nodes_constraint_partition(make_flexible_job):
865890
job = make_flexible_job('all')
866891
job.options = ['-C f1,f2', '--partition=p1,p2']
@@ -968,6 +993,29 @@ def slurm_node_nopart():
968993
)
969994

970995

996+
@pytest.fixture
997+
def slurm_node_maintenance():
998+
return _SlurmNode(
999+
'NodeName=nid00006 Arch=x86_64 CoresPerSocket=12 '
1000+
'CPUAlloc=0 CPUErr=0 CPUTot=24 CPULoad=0.00 '
1001+
'AvailableFeatures=f6 ActiveFeatures=f6 '
1002+
'Gres=gpu_mem:16280,gpu:1 NodeAddr=nid00006'
1003+
'NodeHostName=nid00006 Version=10.00 OS=Linux '
1004+
'RealMemory=32220 AllocMem=0 FreeMem=10000 '
1005+
'Sockets=1 Boards=1 State=MAINT '
1006+
'ThreadsPerCore=2 TmpDisk=0 Weight=1 Owner=N/A '
1007+
'MCS_label=N/A Partitions=p4 '
1008+
'BootTime=01 Jan 2018 '
1009+
'SlurmdStartTime=01 Jan 2018 '
1010+
'CfgTRES=cpu=24,mem=32220M '
1011+
'AllocTRES= CapWatts=n/a CurrentWatts=100 '
1012+
'LowestJoules=100000000 ConsumedJoules=0 '
1013+
'ExtSensorsJoules=n/s ExtSensorsWatts=0 '
1014+
'ExtSensorsTemp=n/s Reason=Foo/ '
1015+
'failed [reframe_user@01 Jan 2018]'
1016+
)
1017+
1018+
9711019
def test_slurm_node_noname():
9721020
with pytest.raises(JobError):
9731021
_SlurmNode(
@@ -1022,14 +1070,17 @@ def test_str(slurm_node_allocated):
10221070
assert 'nid00001' == str(slurm_node_allocated)
10231071

10241072

1025-
def test_slurm_node_is_available(slurm_node_allocated,
1026-
slurm_node_idle,
1027-
slurm_node_drained,
1028-
slurm_node_nopart):
1029-
assert not slurm_node_allocated.is_available()
1030-
assert slurm_node_idle.is_available()
1031-
assert not slurm_node_drained.is_available()
1032-
assert not slurm_node_nopart.is_available()
1073+
def test_slurm_node_in_state(slurm_node_allocated,
1074+
slurm_node_idle,
1075+
slurm_node_drained,
1076+
slurm_node_nopart):
1077+
assert slurm_node_allocated.in_state('allocated')
1078+
assert slurm_node_idle.in_state('Idle')
1079+
assert slurm_node_drained.in_state('IDLE+Drain')
1080+
assert slurm_node_drained.in_state('IDLE')
1081+
assert slurm_node_drained.in_state('idle')
1082+
assert slurm_node_drained.in_state('DRAIN')
1083+
assert not slurm_node_nopart.in_state('IDLE')
10331084

10341085

10351086
def test_slurm_node_is_down(slurm_node_allocated,
@@ -1038,24 +1089,3 @@ def test_slurm_node_is_down(slurm_node_allocated,
10381089
assert not slurm_node_allocated.is_down()
10391090
assert not slurm_node_idle.is_down()
10401091
assert slurm_node_nopart.is_down()
1041-
1042-
1043-
class TestSlurmNode:
1044-
def setUp(self):
1045-
idle_node_description = (
1046-
)
1047-
1048-
idle_drained_node_description = (
1049-
)
1050-
1051-
no_partition_node_description = (
1052-
)
1053-
1054-
self.no_name_node_description = (
1055-
)
1056-
1057-
self.allocated_node = _SlurmNode(allocated_node_description)
1058-
self.allocated_node_copy = _SlurmNode(allocated_node_description)
1059-
self.idle_node = _SlurmNode(idle_node_description)
1060-
self.idle_drained = _SlurmNode(idle_drained_node_description)
1061-
self.no_partition_node = _SlurmNode(no_partition_node_description)

0 commit comments

Comments
 (0)