Skip to content

Commit 3abb6db

Browse files
author
Vasileios Karakasis
authored
Merge pull request #2172 from hurricane642/amber_test
[test] Add new Amber NVE library test and refactor the CSCS Amber check
2 parents c68b119 + d443220 commit 3abb6db

File tree

3 files changed

+250
-171
lines changed

3 files changed

+250
-171
lines changed

cscs-checks/apps/amber/amber_check.py

Lines changed: 96 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -3,197 +3,122 @@
33
#
44
# SPDX-License-Identifier: BSD-3-Clause
55

6+
import contextlib
67
import reframe as rfm
7-
import reframe.utility.sanity as sn
8+
from hpctestlib.apps.amber.nve import amber_nve_check
89

910

10-
# FIXME: Use tuples as dictionary keys as soon as
11-
# https://github.com/eth-cscs/reframe/issues/2022 is in
12-
daint_gpu_performance = {
13-
'Cellulose_production_NVE': (30.0, -0.05, None, 'ns/day'),
14-
'FactorIX_production_NVE': (134.0, -0.05, None, 'ns/day'),
15-
'JAC_production_NVE': (388.0, -0.05, None, 'ns/day'),
16-
'JAC_production_NVE_4fs': (742, -0.05, None, 'ns/day'),
17-
}
18-
19-
REFERENCE_GPU_PERFORMANCE = {
20-
'daint:gpu': daint_gpu_performance,
21-
'dom:gpu': daint_gpu_performance
22-
}
23-
24-
daint_mc_performance_small = {
25-
'Cellulose_production_NVE': (8.0, -0.30, None, 'ns/day'),
26-
'FactorIX_production_NVE': (34.0, -0.30, None, 'ns/day'),
27-
'JAC_production_NVE': (90.0, -0.30, None, 'ns/day'),
28-
'JAC_production_NVE_4fs': (150.0, -0.30, None, 'ns/day'),
29-
}
30-
31-
eiger_mc_performance_small = {
32-
'Cellulose_production_NVE': (3.2, -0.30, None, 'ns/day'),
33-
'FactorIX_production_NVE': (7.0, -0.30, None, 'ns/day'),
34-
'JAC_production_NVE': (30.0, -0.30, None, 'ns/day'),
35-
'JAC_production_NVE_4fs': (45.0, -0.30, None, 'ns/day'),
36-
}
37-
38-
REFERENCE_CPU_PERFORMANCE_SMALL = {
39-
'daint:mc': daint_mc_performance_small,
40-
'dom:mc': daint_mc_performance_small,
41-
'eiger:mc': eiger_mc_performance_small,
42-
'pilatus:mc': eiger_mc_performance_small,
43-
}
44-
45-
REFERENCE_CPU_PERFORMANCE_LARGE = {
46-
'daint:mc': {
47-
'Cellulose_production_NVE': (10.0, -0.30, None, 'ns/day'),
48-
'FactorIX_production_NVE': (36.0, -0.30, None, 'ns/day'),
49-
'JAC_production_NVE': (78.0, -0.30, None, 'ns/day'),
50-
'JAC_production_NVE_4fs': (135.0, -0.30, None, 'ns/day'),
51-
},
52-
'eiger:mc': {
53-
'Cellulose_production_NVE': (1.3, -0.30, None, 'ns/day'),
54-
'FactorIX_production_NVE': (3.5, -0.30, None, 'ns/day'),
55-
'JAC_production_NVE': (17.0, -0.30, None, 'ns/day'),
56-
'JAC_production_NVE_4fs': (30.5, -0.30, None, 'ns/day'),
57-
},
58-
}
59-
60-
61-
class AmberBaseCheck(rfm.RunOnlyRegressionTest):
62-
valid_prog_environs = ['builtin']
63-
strict_check = False
11+
@rfm.simple_test
12+
class cscs_amber_check(amber_nve_check):
6413
modules = ['Amber']
14+
valid_prog_environs = ['builtin']
6515
extra_resources = {
6616
'switches': {
6717
'num_switches': 1
6818
}
6919
}
20+
tags |= {'maintenance', 'production'}
7021
maintainers = ['VH', 'SO']
71-
tags = {'scs', 'external-resources'}
72-
73-
benchmark = parameter([
74-
# NVE simulations
75-
'Cellulose_production_NVE',
76-
'FactorIX_production_NVE',
77-
'JAC_production_NVE_4fs',
78-
'JAC_production_NVE',
79-
])
80-
81-
@run_after('init')
82-
def download_files(self):
83-
self.prerun_cmds = [
84-
# cannot use wget because it is not installed on eiger
85-
f'curl -LJO https://github.com/victorusu/amber_benchmark_suite'
86-
f'/raw/main/amber_16_benchmark_suite/PME/{self.benchmark}.tar.bz2',
87-
f'tar xf {self.benchmark}.tar.bz2'
88-
]
89-
90-
@run_after('init')
91-
def set_energy_and_tolerance_reference(self):
92-
self.ener_ref = {
93-
# every system has a different reference energy and drift
94-
'Cellulose_production_NVE': (-443246, 5.0E-05),
95-
'FactorIX_production_NVE': (-234188, 1.0E-04),
96-
'JAC_production_NVE_4fs': (-44810, 1.0E-03),
97-
'JAC_production_NVE': (-58138, 5.0E-04),
98-
}
99-
100-
@run_after('setup')
101-
def set_executable_opts(self):
102-
self.executable_opts = ['-O',
103-
'-i', self.input_file,
104-
'-o', self.output_file]
105-
self.keep_files = [self.output_file]
106-
107-
@run_after('setup')
108-
def set_sanity_patterns(self):
109-
energy = sn.extractsingle(r' Etot\s+=\s+(?P<energy>\S+)',
110-
self.output_file, 'energy', float, item=-2)
111-
energy_reference = self.ener_ref[self.benchmark][0]
112-
energy_diff = sn.abs(energy - energy_reference)
113-
ref_ener_diff = sn.abs(self.ener_ref[self.benchmark][0] *
114-
self.ener_ref[self.benchmark][1])
115-
self.sanity_patterns = sn.all([
116-
sn.assert_found(r'Final Performance Info:', self.output_file),
117-
sn.assert_lt(energy_diff, ref_ener_diff)
118-
])
119-
120-
@run_after('setup')
121-
def set_generic_perf_references(self):
122-
self.reference.update({'*': {
123-
self.benchmark: (0, None, None, 'ns/day')
124-
}})
125-
126-
@run_after('setup')
127-
def set_perf_patterns(self):
128-
self.perf_patterns = {
129-
self.benchmark: sn.extractsingle(r'ns/day =\s+(?P<perf>\S+)',
130-
self.output_file, 'perf',
131-
float, item=1)
22+
num_nodes = parameter([1, 4, 6, 8, 16])
23+
allref = {
24+
1: {
25+
'p100': {
26+
'Cellulose_production_NVE': (30.0, -0.05, None, 'ns/day'),
27+
'FactorIX_production_NVE': (134.0, -0.05, None, 'ns/day'),
28+
'JAC_production_NVE': (388.0, -0.05, None, 'ns/day'),
29+
'JAC_production_NVE_4fs': (742, -0.05, None, 'ns/day')
30+
}
31+
},
32+
4: {
33+
'zen2': {
34+
'Cellulose_production_NVE': (3.2, -0.30, None, 'ns/day'),
35+
'FactorIX_production_NVE': (7.0, -0.30, None, 'ns/day'),
36+
'JAC_production_NVE': (30.0, -0.30, None, 'ns/day'),
37+
'JAC_production_NVE_4fs': (45.0, -0.30, None, 'ns/day')
38+
}
39+
},
40+
6: {
41+
'broadwell': {
42+
'Cellulose_production_NVE': (8.0, -0.30, None, 'ns/day'),
43+
'FactorIX_production_NVE': (34.0, -0.30, None, 'ns/day'),
44+
'JAC_production_NVE': (90.0, -0.30, None, 'ns/day'),
45+
'JAC_production_NVE_4fs': (150.0, -0.30, None, 'ns/day')
46+
}
47+
},
48+
8: {
49+
'zen2': {
50+
'Cellulose_production_NVE': (1.3, -0.30, None, 'ns/day'),
51+
'FactorIX_production_NVE': (3.5, -0.30, None, 'ns/day'),
52+
'JAC_production_NVE': (17.0, -0.30, None, 'ns/day'),
53+
'JAC_production_NVE_4fs': (30.5, -0.30, None, 'ns/day')
54+
}
55+
},
56+
16: {
57+
'broadwell': {
58+
'Cellulose_production_NVE': (10.0, -0.30, None, 'ns/day'),
59+
'FactorIX_production_NVE': (36.0, -0.30, None, 'ns/day'),
60+
'JAC_production_NVE': (78.0, -0.30, None, 'ns/day'),
61+
'JAC_production_NVE_4fs': (135.0, -0.30, None, 'ns/day')
62+
}
13263
}
133-
134-
135-
@rfm.simple_test
136-
class AmberGPUCheck(AmberBaseCheck):
137-
num_tasks = 1
138-
num_tasks_per_node = 1
139-
num_gpus_per_node = 1
140-
valid_systems = ['daint:gpu', 'dom:gpu']
141-
executable = 'pmemd.cuda.MPI'
142-
input_file = 'mdin.GPU'
143-
output_file = 'amber.out'
144-
descr = f'Amber GPU check'
145-
tags = {'maintenance', 'production', 'health'}
146-
reference = REFERENCE_GPU_PERFORMANCE
147-
148-
149-
@rfm.simple_test
150-
class AmberCPUCheck(AmberBaseCheck):
151-
scale = parameter(['small', 'large'])
152-
valid_systems = ['daint:mc', 'eiger:mc']
153-
executable = 'pmemd.MPI'
154-
input_file = 'mdin.CPU'
155-
output_file = 'amber.out'
156-
tags = {'maintenance', 'production'}
157-
158-
@run_after('init')
159-
def set_description(self):
160-
self.mydescr = f'Amber parallel {self.scale} CPU check'
64+
}
16165

16266
@run_after('init')
163-
def set_additional_systems(self):
164-
if self.scale == 'small':
165-
self.valid_systems += ['dom:mc', 'pilatus:mc']
67+
def scope_systems(self):
68+
valid_systems = {
69+
'cuda': {1: ['daint:gpu', 'dom:gpu']},
70+
'mpi': {
71+
4: ['eiger:mc', 'pilatus:mc'],
72+
6: ['daint:mc', 'dom:mc'],
73+
8: ['pilatus:mc'],
74+
16: ['daint:mc']
75+
}
76+
}
77+
try:
78+
self.valid_systems = valid_systems[self.variant][self.num_nodes]
79+
except KeyError:
80+
self.valid_systems = []
16681

16782
@run_after('init')
16883
def set_hierarchical_prgenvs(self):
16984
if self.current_system.name in ['eiger', 'pilatus']:
17085
self.valid_prog_environs = ['cpeIntel']
17186

87+
@run_after('init')
88+
def set_num_gpus_per_node(self):
89+
if self.variant == 'cuda':
90+
self.num_gpus_per_node = 1
91+
17292
@run_after('setup')
173-
def set_perf_reference(self):
174-
if self.scale == 'small':
175-
self.reference = REFERENCE_CPU_PERFORMANCE_SMALL
93+
def skip_if_no_topo(self):
94+
proc = self.current_partition.processor
95+
pname = self.current_partition.fullname
96+
if not proc.info:
97+
self.skip(f'no topology information found for partition {pname!r}')
98+
99+
@run_after('setup')
100+
def set_num_tasks(self):
101+
if self.variant == 'cuda':
102+
self.num_tasks_per_node = 1
176103
else:
177-
self.reference = REFERENCE_CPU_PERFORMANCE_LARGE
104+
proc = self.current_partition.processor
105+
pname = self.current_partition.fullname
106+
self.num_tasks_per_node = proc.num_cores
178107

179-
@run_after('init')
180-
def set_num_tasks_cray_xc(self):
181-
if self.current_system.name in ['daint', 'dom']:
182-
self.num_tasks_per_node = 36
183-
if self.scale == 'small':
184-
self.num_nodes = 6
185-
else:
186-
self.num_nodes = 16
187-
self.num_tasks = self.num_nodes * self.num_tasks_per_node
108+
self.num_tasks = self.num_nodes * self.num_tasks_per_node
188109

189-
@run_after('init')
190-
def set_num_tasks_cray_shasta(self):
191-
if self.current_system.name in ['eiger', 'pilatus']:
192-
self.num_tasks_per_node = 128
193-
if self.scale == 'small':
194-
self.num_nodes = 4
195-
else:
196-
# there are too many processors, the large jobs cannot start
197-
# need to decrease to just 8 nodes
198-
self.num_nodes = 8
199-
self.num_tasks = self.num_nodes * self.num_tasks_per_node
110+
@run_before('performance')
111+
def set_perf_reference(self):
112+
proc = self.current_partition.processor
113+
pname = self.current_partition.fullname
114+
if pname in ('daint:gpu', 'dom:gpu'):
115+
arch = 'p100'
116+
else:
117+
arch = proc.arch
118+
119+
with contextlib.suppress(KeyError):
120+
self.reference = {
121+
pname: {
122+
'perf': self.allref[self.num_nodes][arch][self.benchmark]
123+
}
124+
}

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,4 @@ Publications
5656
usecases
5757
migration_2_to_3
5858
manuals
59+
hpctestlib

0 commit comments

Comments
 (0)