Skip to content

Commit 17d5bdd

Browse files
author
Vasileios Karakasis
committed
Merge branch 'master' into test/required_version_semver
2 parents 3553530 + 5c7ef82 commit 17d5bdd

File tree

13 files changed

+546
-203
lines changed

13 files changed

+546
-203
lines changed

cscs-checks/prgenv/affinity_check.py

Lines changed: 125 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -41,40 +41,57 @@ class to figure out the processor's topology. The content of this reference
4141
# }
4242
system = variable(dict, value={})
4343

44+
valid_systems = [
45+
'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
46+
'eiger:mc', 'pilatus:mc',
47+
'ault:amdv100'
48+
]
49+
valid_prog_environs = [
50+
'PrgEnv-gnu', 'PrgEnv-cray', 'PrgEnv-intel', 'PrgEnv-pgi'
51+
]
52+
build_system = 'Make'
53+
54+
# The github URL can not be specifid as `self.sourcedir` as that
55+
# would prevent the src folder from being copied to stage which is
56+
# necessary since these tests need files from it.
57+
sourcesdir = os.path.join('src/affinity_ref')
58+
prebuild_cmds = ['git clone https://github.com/vkarak/affinity']
59+
60+
# Dict with the partition's topology - output of "lscpu -e"
61+
topology = variable(dict, value={
62+
'dom:gpu': 'topo_dom_gpu.json',
63+
'dom:mc': 'topo_dom_mc.json',
64+
'daint:gpu': 'topo_dom_gpu.json',
65+
'daint:mc': 'topo_dom_mc.json',
66+
'eiger:mc': 'topo_eiger_mc.json',
67+
'pilatus:mc': 'topo_eiger_mc.json',
68+
'ault:amdv100': 'topo_ault_amdv100.json',
69+
})
70+
71+
# Reference topology file as required variable
72+
topo_file = variable(str)
73+
74+
maintainers = ['RS', 'SK']
75+
tags = {'production', 'scs', 'maintenance', 'craype'}
76+
4477
def __init__(self):
45-
self.valid_systems = ['daint:gpu', 'daint:mc',
46-
'dom:gpu', 'dom:mc', 'eiger:mc',
47-
'ault:amdv100']
48-
self.valid_prog_environs = [
49-
'PrgEnv-gnu', 'PrgEnv-cray', 'PrgEnv-intel', 'PrgEnv-pgi'
50-
]
51-
self.build_system = 'Make'
78+
# FIXME: These two right now cannot be set in the class body.
79+
self.executable = './affinity/affinity'
5280
self.build_system.options = ['-C affinity', 'MPI=1']
5381

54-
# The github URL can not be specifid as `self.sourcedir` as that
55-
# would prevent the src folder from being copied to stage which is
56-
# necessary since these tests need files from it.
57-
self.sourcesdir = os.path.join('src/affinity_ref')
58-
self.prebuild_cmds = ['git clone https://github.com/vkarak/affinity']
59-
self.executable = './affinity/affinity'
82+
@rfm.run_before('sanity')
83+
def set_sanity(self):
6084
self.sanity_patterns = self.assert_consumed_cpu_set()
61-
self.maintainers = ['RS', 'SK']
62-
self.tags = {'production', 'scs', 'maintenance', 'craype'}
63-
64-
# Dict with the partition's topology - output of "lscpu -e"
65-
self.topology = {
66-
'dom:gpu': 'topo_dom_gpu.json',
67-
'dom:mc': 'topo_dom_mc.json',
68-
'daint:gpu': 'topo_dom_gpu.json',
69-
'daint:mc': 'topo_dom_mc.json',
70-
'eiger:mc': 'topo_eiger_mc.json',
71-
'ault:amdv100': 'topo_ault_amdv100.json',
72-
}
7385

7486
@rfm.run_before('compile')
7587
def set_topo_file(self):
88+
'''Set the topo_file variable.
89+
90+
If not present in the topology dict, leave it as required.
91+
'''
7692
cp = self.current_partition.fullname
77-
self.topo_file = self.topology[cp]
93+
if cp in self.topology:
94+
self.topo_file = self.topology[cp]
7895

7996
# FIXME: Update the hook below once the PR #1773 is merged.
8097
@rfm.run_after('compile')
@@ -220,10 +237,7 @@ class AffinityOpenMPBase(AffinityTestBase):
220237

221238
omp_bind = variable(str)
222239
omp_proc_bind = variable(str, value='spread')
223-
224-
def __init__(self):
225-
super().__init__()
226-
self.num_tasks = 1
240+
num_tasks = 1
227241

228242
@property
229243
def ncpus_per_task(self):
@@ -252,10 +266,7 @@ class OneThreadPerLogicalCoreOpenMP(AffinityOpenMPBase):
252266
'''Pin each OMP thread to a different logical core.'''
253267

254268
omp_bind = 'threads'
255-
256-
def __init__(self):
257-
super().__init__()
258-
self.descr = 'Pin one OMP thread per CPU.'
269+
descr = 'Pin one OMP thread per CPU.'
259270

260271
@property
261272
def num_omp_threads(self):
@@ -281,10 +292,7 @@ class OneThreadPerPhysicalCoreOpenMP(AffinityOpenMPBase):
281292
'''Pin each OMP thread to a different physical core.'''
282293

283294
omp_bind = 'cores'
284-
285-
def __init__(self):
286-
super().__init__()
287-
self.descr = 'Pin one OMP thread per core.'
295+
descr = 'Pin one OMP thread per core.'
288296

289297
@property
290298
def num_omp_threads(self):
@@ -312,10 +320,8 @@ def consume_cpu_set(self):
312320
class OneThreadPerPhysicalCoreOpenMPnomt(OneThreadPerPhysicalCoreOpenMP):
313321
'''Only one cpu per core booked without multithread.'''
314322

315-
def __init__(self):
316-
super().__init__()
317-
self.descr = 'Pin one OMP thread per core wo. multithreading.'
318-
self.use_multithreading = False
323+
use_multithreading = False
324+
descr = 'Pin one OMP thread per core wo. multithreading.'
319325

320326
@property
321327
def ncpus_per_task(self):
@@ -333,10 +339,7 @@ class OneThreadPerSocketOpenMP(AffinityOpenMPBase):
333339
'''Pin each OMP thread to a different socket.'''
334340

335341
omp_bind = 'sockets'
336-
337-
def __init__(self):
338-
super().__init__()
339-
self.descr = 'Pin one OMP thread per socket.'
342+
descr = 'Pin one OMP thread per socket.'
340343

341344
@property
342345
def num_omp_threads(self):
@@ -367,11 +370,8 @@ class OneTaskPerSocketOpenMPnomt(AffinityOpenMPBase):
367370

368371
omp_bind = 'sockets'
369372
omp_proc_bind = 'close'
370-
371-
def __init__(self):
372-
super().__init__()
373-
self.descr = 'One task per socket - wo. multithreading.'
374-
self.use_multithreading = False
373+
descr = 'One task per socket - wo. multithreading.'
374+
use_multithreading = False
375375

376376
@property
377377
def num_omp_threads(self):
@@ -424,29 +424,24 @@ class OneTaskPerSocketOpenMP(OneTaskPerSocketOpenMPnomt):
424424
and the number of OMP threads.
425425
'''
426426

427-
def __init__(self):
428-
super().__init__()
429-
self.descr = 'One task per socket - w. multithreading.'
430-
self.use_multithreading = True
427+
descr = 'One task per socket - w. multithreading.'
428+
use_multithreading = True
431429

432430
@property
433431
def num_omp_threads(self):
434432
return int(self.num_cpus/self.num_sockets)
435433

436434

437435
@rfm.simple_test
438-
class ConsecutiveNumaFilling(AffinityTestBase):
439-
'''Fill the NUMA nodes with the tasks in consecutive order.
436+
class ConsecutiveSocketFilling(AffinityTestBase):
437+
'''Fill the sockets with the tasks in consecutive order.
440438
441439
This test uses as many tasks as physical cores available in a node.
442440
Multithreading is disabled.
443441
'''
444442

445443
cpu_bind = 'rank'
446-
447-
def __init__(self):
448-
super().__init__()
449-
self.use_multithreading = False
444+
use_multithreading = False
450445

451446
@rfm.run_before('run')
452447
def set_tasks(self):
@@ -457,10 +452,10 @@ def set_tasks(self):
457452
def consume_cpu_set(self):
458453
'''Check that all physical cores have been used in the right order.'''
459454
task_count = 0
460-
for numa_number in range(self.num_numa_nodes):
461-
# Keep track of the CPUs present in this NUMA node
455+
for socket_number in range(self.num_sockets):
456+
# Keep track of the CPUs present in this socket
462457
cpus_present = set()
463-
for task_number in range(int(self.num_tasks/self.num_numa_nodes)):
458+
for task_number in range(int(self.num_tasks/self.num_sockets)):
464459
# Get the list of CPUs with affinity
465460
affinity_set = self.aff_cpus[task_count]
466461

@@ -478,14 +473,14 @@ def consume_cpu_set(self):
478473

479474
task_count += 1
480475

481-
# Ensure all CPUs belong to the same NUMA node
482-
cpuset_by_numa = self.get_sibling_cpus(
483-
next(iter(cpus_present)), by='node'
476+
# Ensure all CPUs belong to the same socket
477+
cpuset_by_socket = self.get_sibling_cpus(
478+
next(iter(cpus_present)), by='socket'
484479
)
485-
if (not all(cpu in cpuset_by_numa for cpu in cpus_present) and
486-
len(cpuset_by_numa) == len(cpus_present)):
480+
if (not all(cpu in cpuset_by_socket for cpu in cpus_present) and
481+
len(cpuset_by_socket) == len(cpus_present)):
487482
raise SanityError(
488-
f'numa node {numa_number} not filled in order'
483+
f'socket {socket_number} not filled in order'
489484
)
490485

491486
else:
@@ -494,56 +489,96 @@ def consume_cpu_set(self):
494489

495490

496491
@rfm.simple_test
497-
class AlternateNumaFilling(AffinityTestBase):
498-
'''Numa nodes are filled in a round-robin fashion.
492+
class AlternateSocketFilling(AffinityTestBase):
493+
'''Sockets are filled in a round-robin fashion.
499494
500495
This test uses as many tasks as physical cores available in a node.
501496
Multithreading is disabled.
502497
'''
503498

504-
def __init__(self):
505-
super().__init__()
506-
self.use_multithreading = False
499+
use_multithreading = False
507500

508501
@rfm.run_before('run')
509502
def set_tasks(self):
510503
self.num_tasks = int(self.num_cpus/self.num_cpus_per_core)
511504
self.num_cpus_per_task = 1
512-
self.num_tasks_per_numa = int(self.num_tasks/self.num_numa_nodes)
505+
self.num_tasks_per_socket = int(self.num_tasks/self.num_sockets)
513506

514507
@rfm.run_before('sanity')
515508
def consume_cpu_set(self):
516-
'''Check that consecutive tasks are round-robin pinned to numa nodes.
517-
'''
509+
'''Check that consecutive tasks are round-robin pinned to sockets.'''
518510

519-
# Get a set per numa node to keep track of the CPUs
520-
numa_nodes = [set() for s in range(self.num_numa_nodes)]
511+
# Get a set per socket to keep track of the CPUs
512+
sockets = [set() for s in range(self.num_sockets)]
521513
task_count = 0
522-
for task in range(self.num_tasks_per_numa):
523-
for s in range(self.num_numa_nodes):
514+
for task in range(self.num_tasks_per_socket):
515+
for s in range(self.num_sockets):
524516
# Get the list of CPUs with affinity
525517
affinity_set = self.aff_cpus[task_count]
526518

527519
# Only 1 CPU per affinity set is allowed
528520
if ((len(affinity_set) > 1) or
529-
(any(cpu in numa_nodes[s] for cpu in affinity_set)) or
530-
(any(cpu not in self.numa_nodes[s]
531-
for cpu in affinity_set))):
521+
(any(cpu in sockets[s] for cpu in affinity_set)) or
522+
(any(cpu not in self.sockets[s] for cpu in affinity_set))):
532523
raise SanityError(
533524
f'incorrect affinity set for task {task_count}'
534525
)
535526

536527
else:
537-
numa_nodes[s].update(
528+
sockets[s].update(
538529
self.get_sibling_cpus(affinity_set[0], by='core')
539530
)
540531

541532
task_count += 1
542533

543-
# Check that all numa nodes have the same CPU count
544-
if not all(len(s) == (task+1)*2 for s in numa_nodes):
534+
# Check that all sockets have the same CPU count
535+
if not all(len(s) == (task+1)*2 for s in sockets):
545536
self.cpu_set.add(-1)
546537

547-
# Decrement the NUMA nodes from the CPU set
548-
for s in numa_nodes:
538+
# Decrement the socket set from the CPU set
539+
for s in sockets:
549540
self.cpu_set -= s
541+
542+
543+
@rfm.simple_test
544+
class OneTaskPerNumaNode(AffinityTestBase):
545+
'''Place a task on each NUMA node.
546+
547+
The trick here is to "pad" the tasks with --cpus-per-task.
548+
The same could be done to target any cache level instead.
549+
Multithreading is disabled.
550+
'''
551+
552+
valid_systems = ['eiger:mc', 'pilatus:mc']
553+
use_multithreading = False
554+
num_cpus_per_task = required
555+
556+
@rfm.run_before('compile')
557+
def build_settings(self):
558+
self.build_system.options += ['OPENMP=0']
559+
560+
@rfm.run_before('run')
561+
def set_tasks(self):
562+
self.num_tasks = self.num_numa_nodes
563+
if self.current_partition.fullname in {'eiger:mc', 'pilatus:mc'}:
564+
self.num_cpus_per_task = 16
565+
566+
@rfm.run_before('sanity')
567+
def consume_cpu_set(self):
568+
'''Check that each task lives in a different NUMA node.'''
569+
570+
if len(self.aff_cpus) != self.num_numa_nodes:
571+
raise SanityError(
572+
'number of tasks does not match the number of numa nodes'
573+
)
574+
575+
for numa_node, aff_set in enumerate(self.aff_cpus):
576+
cpuset_by_numa = self.get_sibling_cpus(aff_set[0], by='node')
577+
if (len(aff_set) != self.num_cpus_per_task or
578+
any(cpu not in cpuset_by_numa for cpu in aff_set)):
579+
raise SanityError(
580+
f'incorrect affinity set for numa node {numa_node}'
581+
)
582+
else:
583+
# Decrement the current NUMA node from the available CPU set
584+
self.cpu_set -= cpuset_by_numa

docs/config_reference.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,18 @@ It can either be a simple string or a JSON object with the following attributes:
12761276
A boolean value indicating whether this module refers to a module collection.
12771277
Module collections are treated differently from simple modules when loading.
12781278

1279+
.. js:attribute:: .path
1280+
1281+
:required: No
1282+
:default: ``null``
1283+
1284+
If the module is not present in the default ``MODULEPATH``, the module's location can be specified here.
1285+
ReFrame will make sure to set and restore the ``MODULEPATH`` accordingly for loading the module.
1286+
1287+
1288+
.. versionadded:: 3.5.0
1289+
1290+
12791291
.. seealso::
12801292

12811293
Module collections with `Environment Modules <https://modules.readthedocs.io/en/latest/MIGRATING.html#module-collection>`__ and `Lmod <https://lmod.readthedocs.io/en/latest/010_user.html#user-collections>`__.

0 commit comments

Comments
 (0)