Skip to content

Commit 3a5f03c

Browse files
author
Vasileios Karakasis
committed
Merge branch 'master' into refactor/move-builtins
2 parents 6a8f36a + 2db7e2f commit 3a5f03c

File tree

2 files changed

+86
-52
lines changed

2 files changed

+86
-52
lines changed

cscs-checks/apps/cpmd/cpmd_check.py

Lines changed: 76 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3,80 +3,106 @@
33
#
44
# SPDX-License-Identifier: BSD-3-Clause
55

6-
import contextlib
76
import reframe as rfm
87
import reframe.utility.sanity as sn
98

109

1110
@rfm.simple_test
1211
class CPMDCheck(rfm.RunOnlyRegressionTest):
13-
scale = parameter(['small', 'large'])
14-
descr = 'CPMD check (C4H6 metadynamics)'
15-
maintainers = ['AJ', 'LM']
16-
tags = {'production'}
17-
valid_systems = ['daint:gpu']
18-
num_tasks_per_node = 1
19-
valid_prog_environs = ['builtin']
2012
modules = ['CPMD']
2113
executable = 'cpmd.x'
22-
executable_opts = ['ana_c4h6.in > stdout.txt']
23-
readonly_files = ['ana_c4h6.in', 'C_MT_BLYP', 'H_MT_BLYP']
24-
use_multithreading = True
25-
strict_check = False
14+
executable_opts = ['ana_c4h6.in']
2615
extra_resources = {
2716
'switches': {
2817
'num_switches': 1
2918
}
3019
}
31-
allref = {
32-
'9': {
33-
'p100': {
34-
'time': (284, None, 0.15, 's')
20+
strict_check = False
21+
use_multithreading = False
22+
tags = {'maintenance', 'production'}
23+
maintainers = ['AJ', 'LM']
24+
25+
num_nodes = parameter([6, 16], loggable=True)
26+
references = {
27+
6: {
28+
'sm_60': {
29+
'dom:gpu': {'time': (120, None, 0.15, 's')},
30+
'daint:gpu': {'time': (120, None, 0.15, 's')},
31+
},
32+
'broadwell': {
33+
'dom:mc': {'time': (150.0, None, 0.15, 's')},
34+
'daint:mc': {'time': (150.0, None, 0.15, 's')},
3535
},
3636
},
37-
'16': {
38-
'p100': {
39-
'time': (226, None, 0.15, 's')
40-
}
37+
16: {
38+
'sm_60': {
39+
'daint:gpu': {'time': (120, None, 0.15, 's')}
40+
},
41+
'broadwell': {
42+
'daint:mc': {'time': (150.0, None, 0.15, 's')},
43+
},
4144
}
4245
}
4346

44-
@run_after('init')
45-
def setup_by_scale(self):
46-
if self.scale == 'small':
47-
self.valid_systems += ['dom:gpu']
48-
self.num_tasks = 9
49-
else:
50-
self.num_tasks = 16
51-
52-
@run_before('performance')
53-
def set_perf_reference(self):
54-
proc = self.current_partition.processor
55-
pname = self.current_partition.fullname
56-
if pname in ('daint:gpu', 'dom:gpu'):
57-
arch = 'p100'
58-
else:
59-
arch = proc.arch
60-
61-
with contextlib.suppress(KeyError):
62-
self.reference = {
63-
pname: {
64-
'perf': self.allref[self.num_tasks][arch][self.benchmark]
65-
}
66-
}
47+
@performance_function('s')
48+
def elapsed_time(self):
49+
return sn.extractsingle(r'^ cpmd(\s+[\d\.]+){3}\s+(?P<time>\S+)',
50+
self.stdout, 'time', float)
6751

6852
@sanity_function
6953
def assert_energy_diff(self):
70-
# OpenMP version of CPMD segfaults
71-
# self.variables = { 'OMP_NUM_THREADS' : '8' }
7254
energy = sn.extractsingle(
7355
r'CLASSICAL ENERGY\s+-(?P<result>\S+)',
74-
'stdout.txt', 'result', float)
56+
self.stdout, 'result', float)
7557
energy_reference = 25.81
7658
energy_diff = sn.abs(energy - energy_reference)
7759
return sn.assert_lt(energy_diff, 0.26)
7860

79-
@performance_function('s')
80-
def time(self):
81-
return sn.extractsingle(r'^ cpmd(\s+[\d\.]+){3}\s+(?P<perf>\S+)',
82-
'stdout.txt', 'perf', float)
61+
@run_after('init')
62+
def setup_system_filtering(self):
63+
self.descr = f'CPMD check ({self.num_nodes} node(s))'
64+
65+
# setup system filter
66+
valid_systems = {
67+
6: ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc'],
68+
16: ['daint:gpu', 'daint:mc']
69+
}
70+
71+
self.skip_if(self.num_nodes not in valid_systems,
72+
f'No valid systems found for {self.num_nodes}(s)')
73+
self.valid_systems = valid_systems[self.num_nodes]
74+
75+
# setup programming environment filter
76+
self.valid_prog_environs = ['builtin']
77+
78+
@run_before('run')
79+
def setup_run(self):
80+
# retrieve processor data
81+
self.skip_if_no_procinfo()
82+
proc = self.current_partition.processor
83+
84+
# set architecture for GPU partition (no auto-detection)
85+
if self.current_partition.fullname in ['daint:gpu', 'dom:gpu']:
86+
arch = 'sm_60'
87+
self.variables = {
88+
'CRAY_CUDA_MPS': '1'
89+
}
90+
else:
91+
arch = proc.arch
92+
93+
# common setup for every architecture
94+
self.job.launcher.options = ['--cpu-bind=cores']
95+
self.job.options = ['--distribution=block:block']
96+
# FIXME: the current test case does not scale beyond 72 MPI tasks,
97+
# so the last node in 16-nodes jobs will be used only partially.
98+
# The test case needs to be updated (warning about XC_DRIVER IN &DFT)
99+
self.num_tasks = 72
100+
101+
try:
102+
found = self.references[self.num_nodes][arch]
103+
except KeyError:
104+
self.skip(f'Configuration with {self.num_nodes} node(s) '
105+
f'is not supported on {arch!r}')
106+
107+
# setup performance references
108+
self.reference = self.references[self.num_nodes][arch]

reframe/frontend/statistics.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,14 @@ def print_failure_report(self, printer):
235235
f"{r['dependencies_actual']}")
236236
printer.info(f" * Maintainers: {r['maintainers']}")
237237
printer.info(f" * Failing phase: {r['fail_phase']}")
238-
printer.info(f" * Rerun with '-n {r['unique_name']}"
238+
if rt.runtime().get_option('general/0/compact_test_names'):
239+
cls = r['display_name'].split(' ')[0]
240+
variant = r['unique_name'].replace(cls, '').replace('_', '@')
241+
nameoptarg = cls + variant
242+
else:
243+
nameoptarg = r['unique_name']
244+
245+
printer.info(f" * Rerun with '-n {nameoptarg}"
239246
f" -p {r['environment']} --system {r['system']} -r'")
240247
printer.info(f" * Reason: {r['fail_reason']}")
241248

@@ -256,7 +263,8 @@ def print_failure_stats(self, printer):
256263
partfullname = partition.fullname if partition else 'None'
257264
environ_name = (check.current_environ.name
258265
if check.current_environ else 'None')
259-
f = f'[{check.unique_name}, {environ_name}, {partfullname}]'
266+
f = (f'[{check.display_name} (uid: {check.unique_name}), '
267+
f'{environ_name}, {partfullname}]')
260268
if tf.failed_stage not in failures:
261269
failures[tf.failed_stage] = []
262270

0 commit comments

Comments
 (0)