|
3 | 3 | # |
4 | 4 | # SPDX-License-Identifier: BSD-3-Clause |
5 | 5 |
|
| 6 | +import contextlib |
6 | 7 | import reframe as rfm |
7 | | -import reframe.utility.sanity as sn |
| 8 | +from hpctestlib.apps.amber.nve import amber_nve_check |
8 | 9 |
|
9 | 10 |
|
10 | | -# FIXME: Use tuples as dictionary keys as soon as |
11 | | -# https://github.com/eth-cscs/reframe/issues/2022 is in |
12 | | -daint_gpu_performance = { |
13 | | - 'Cellulose_production_NVE': (30.0, -0.05, None, 'ns/day'), |
14 | | - 'FactorIX_production_NVE': (134.0, -0.05, None, 'ns/day'), |
15 | | - 'JAC_production_NVE': (388.0, -0.05, None, 'ns/day'), |
16 | | - 'JAC_production_NVE_4fs': (742, -0.05, None, 'ns/day'), |
17 | | -} |
18 | | - |
19 | | -REFERENCE_GPU_PERFORMANCE = { |
20 | | - 'daint:gpu': daint_gpu_performance, |
21 | | - 'dom:gpu': daint_gpu_performance |
22 | | -} |
23 | | - |
24 | | -daint_mc_performance_small = { |
25 | | - 'Cellulose_production_NVE': (8.0, -0.30, None, 'ns/day'), |
26 | | - 'FactorIX_production_NVE': (34.0, -0.30, None, 'ns/day'), |
27 | | - 'JAC_production_NVE': (90.0, -0.30, None, 'ns/day'), |
28 | | - 'JAC_production_NVE_4fs': (150.0, -0.30, None, 'ns/day'), |
29 | | -} |
30 | | - |
31 | | -eiger_mc_performance_small = { |
32 | | - 'Cellulose_production_NVE': (3.2, -0.30, None, 'ns/day'), |
33 | | - 'FactorIX_production_NVE': (7.0, -0.30, None, 'ns/day'), |
34 | | - 'JAC_production_NVE': (30.0, -0.30, None, 'ns/day'), |
35 | | - 'JAC_production_NVE_4fs': (45.0, -0.30, None, 'ns/day'), |
36 | | -} |
37 | | - |
38 | | -REFERENCE_CPU_PERFORMANCE_SMALL = { |
39 | | - 'daint:mc': daint_mc_performance_small, |
40 | | - 'dom:mc': daint_mc_performance_small, |
41 | | - 'eiger:mc': eiger_mc_performance_small, |
42 | | - 'pilatus:mc': eiger_mc_performance_small, |
43 | | -} |
44 | | - |
45 | | -REFERENCE_CPU_PERFORMANCE_LARGE = { |
46 | | - 'daint:mc': { |
47 | | - 'Cellulose_production_NVE': (10.0, -0.30, None, 'ns/day'), |
48 | | - 'FactorIX_production_NVE': (36.0, -0.30, None, 'ns/day'), |
49 | | - 'JAC_production_NVE': (78.0, -0.30, None, 'ns/day'), |
50 | | - 'JAC_production_NVE_4fs': (135.0, -0.30, None, 'ns/day'), |
51 | | - }, |
52 | | - 'eiger:mc': { |
53 | | - 'Cellulose_production_NVE': (1.3, -0.30, None, 'ns/day'), |
54 | | - 'FactorIX_production_NVE': (3.5, -0.30, None, 'ns/day'), |
55 | | - 'JAC_production_NVE': (17.0, -0.30, None, 'ns/day'), |
56 | | - 'JAC_production_NVE_4fs': (30.5, -0.30, None, 'ns/day'), |
57 | | - }, |
58 | | -} |
59 | | - |
60 | | - |
61 | | -class AmberBaseCheck(rfm.RunOnlyRegressionTest): |
62 | | - valid_prog_environs = ['builtin'] |
63 | | - strict_check = False |
| 11 | +@rfm.simple_test |
| 12 | +class cscs_amber_check(amber_nve_check): |
64 | 13 | modules = ['Amber'] |
| 14 | + valid_prog_environs = ['builtin'] |
65 | 15 | extra_resources = { |
66 | 16 | 'switches': { |
67 | 17 | 'num_switches': 1 |
68 | 18 | } |
69 | 19 | } |
| 20 | + tags |= {'maintenance', 'production'} |
70 | 21 | maintainers = ['VH', 'SO'] |
71 | | - tags = {'scs', 'external-resources'} |
72 | | - |
73 | | - benchmark = parameter([ |
74 | | - # NVE simulations |
75 | | - 'Cellulose_production_NVE', |
76 | | - 'FactorIX_production_NVE', |
77 | | - 'JAC_production_NVE_4fs', |
78 | | - 'JAC_production_NVE', |
79 | | - ]) |
80 | | - |
81 | | - @run_after('init') |
82 | | - def download_files(self): |
83 | | - self.prerun_cmds = [ |
84 | | - # cannot use wget because it is not installed on eiger |
85 | | - f'curl -LJO https://github.com/victorusu/amber_benchmark_suite' |
86 | | - f'/raw/main/amber_16_benchmark_suite/PME/{self.benchmark}.tar.bz2', |
87 | | - f'tar xf {self.benchmark}.tar.bz2' |
88 | | - ] |
89 | | - |
90 | | - @run_after('init') |
91 | | - def set_energy_and_tolerance_reference(self): |
92 | | - self.ener_ref = { |
93 | | - # every system has a different reference energy and drift |
94 | | - 'Cellulose_production_NVE': (-443246, 5.0E-05), |
95 | | - 'FactorIX_production_NVE': (-234188, 1.0E-04), |
96 | | - 'JAC_production_NVE_4fs': (-44810, 1.0E-03), |
97 | | - 'JAC_production_NVE': (-58138, 5.0E-04), |
98 | | - } |
99 | | - |
100 | | - @run_after('setup') |
101 | | - def set_executable_opts(self): |
102 | | - self.executable_opts = ['-O', |
103 | | - '-i', self.input_file, |
104 | | - '-o', self.output_file] |
105 | | - self.keep_files = [self.output_file] |
106 | | - |
107 | | - @run_after('setup') |
108 | | - def set_sanity_patterns(self): |
109 | | - energy = sn.extractsingle(r' Etot\s+=\s+(?P<energy>\S+)', |
110 | | - self.output_file, 'energy', float, item=-2) |
111 | | - energy_reference = self.ener_ref[self.benchmark][0] |
112 | | - energy_diff = sn.abs(energy - energy_reference) |
113 | | - ref_ener_diff = sn.abs(self.ener_ref[self.benchmark][0] * |
114 | | - self.ener_ref[self.benchmark][1]) |
115 | | - self.sanity_patterns = sn.all([ |
116 | | - sn.assert_found(r'Final Performance Info:', self.output_file), |
117 | | - sn.assert_lt(energy_diff, ref_ener_diff) |
118 | | - ]) |
119 | | - |
120 | | - @run_after('setup') |
121 | | - def set_generic_perf_references(self): |
122 | | - self.reference.update({'*': { |
123 | | - self.benchmark: (0, None, None, 'ns/day') |
124 | | - }}) |
125 | | - |
126 | | - @run_after('setup') |
127 | | - def set_perf_patterns(self): |
128 | | - self.perf_patterns = { |
129 | | - self.benchmark: sn.extractsingle(r'ns/day =\s+(?P<perf>\S+)', |
130 | | - self.output_file, 'perf', |
131 | | - float, item=1) |
| 22 | + num_nodes = parameter([1, 4, 6, 8, 16]) |
| 23 | + allref = { |
| 24 | + 1: { |
| 25 | + 'p100': { |
| 26 | + 'Cellulose_production_NVE': (30.0, -0.05, None, 'ns/day'), |
| 27 | + 'FactorIX_production_NVE': (134.0, -0.05, None, 'ns/day'), |
| 28 | + 'JAC_production_NVE': (388.0, -0.05, None, 'ns/day'), |
| 29 | + 'JAC_production_NVE_4fs': (742, -0.05, None, 'ns/day') |
| 30 | + } |
| 31 | + }, |
| 32 | + 4: { |
| 33 | + 'zen2': { |
| 34 | + 'Cellulose_production_NVE': (3.2, -0.30, None, 'ns/day'), |
| 35 | + 'FactorIX_production_NVE': (7.0, -0.30, None, 'ns/day'), |
| 36 | + 'JAC_production_NVE': (30.0, -0.30, None, 'ns/day'), |
| 37 | + 'JAC_production_NVE_4fs': (45.0, -0.30, None, 'ns/day') |
| 38 | + } |
| 39 | + }, |
| 40 | + 6: { |
| 41 | + 'broadwell': { |
| 42 | + 'Cellulose_production_NVE': (8.0, -0.30, None, 'ns/day'), |
| 43 | + 'FactorIX_production_NVE': (34.0, -0.30, None, 'ns/day'), |
| 44 | + 'JAC_production_NVE': (90.0, -0.30, None, 'ns/day'), |
| 45 | + 'JAC_production_NVE_4fs': (150.0, -0.30, None, 'ns/day') |
| 46 | + } |
| 47 | + }, |
| 48 | + 8: { |
| 49 | + 'zen2': { |
| 50 | + 'Cellulose_production_NVE': (1.3, -0.30, None, 'ns/day'), |
| 51 | + 'FactorIX_production_NVE': (3.5, -0.30, None, 'ns/day'), |
| 52 | + 'JAC_production_NVE': (17.0, -0.30, None, 'ns/day'), |
| 53 | + 'JAC_production_NVE_4fs': (30.5, -0.30, None, 'ns/day') |
| 54 | + } |
| 55 | + }, |
| 56 | + 16: { |
| 57 | + 'broadwell': { |
| 58 | + 'Cellulose_production_NVE': (10.0, -0.30, None, 'ns/day'), |
| 59 | + 'FactorIX_production_NVE': (36.0, -0.30, None, 'ns/day'), |
| 60 | + 'JAC_production_NVE': (78.0, -0.30, None, 'ns/day'), |
| 61 | + 'JAC_production_NVE_4fs': (135.0, -0.30, None, 'ns/day') |
| 62 | + } |
132 | 63 | } |
133 | | - |
134 | | - |
135 | | -@rfm.simple_test |
136 | | -class AmberGPUCheck(AmberBaseCheck): |
137 | | - num_tasks = 1 |
138 | | - num_tasks_per_node = 1 |
139 | | - num_gpus_per_node = 1 |
140 | | - valid_systems = ['daint:gpu', 'dom:gpu'] |
141 | | - executable = 'pmemd.cuda.MPI' |
142 | | - input_file = 'mdin.GPU' |
143 | | - output_file = 'amber.out' |
144 | | - descr = f'Amber GPU check' |
145 | | - tags = {'maintenance', 'production', 'health'} |
146 | | - reference = REFERENCE_GPU_PERFORMANCE |
147 | | - |
148 | | - |
149 | | -@rfm.simple_test |
150 | | -class AmberCPUCheck(AmberBaseCheck): |
151 | | - scale = parameter(['small', 'large']) |
152 | | - valid_systems = ['daint:mc', 'eiger:mc'] |
153 | | - executable = 'pmemd.MPI' |
154 | | - input_file = 'mdin.CPU' |
155 | | - output_file = 'amber.out' |
156 | | - tags = {'maintenance', 'production'} |
157 | | - |
158 | | - @run_after('init') |
159 | | - def set_description(self): |
160 | | - self.mydescr = f'Amber parallel {self.scale} CPU check' |
| 64 | + } |
161 | 65 |
|
162 | 66 | @run_after('init') |
163 | | - def set_additional_systems(self): |
164 | | - if self.scale == 'small': |
165 | | - self.valid_systems += ['dom:mc', 'pilatus:mc'] |
| 67 | + def scope_systems(self): |
| 68 | + valid_systems = { |
| 69 | + 'cuda': {1: ['daint:gpu', 'dom:gpu']}, |
| 70 | + 'mpi': { |
| 71 | + 4: ['eiger:mc', 'pilatus:mc'], |
| 72 | + 6: ['daint:mc', 'dom:mc'], |
| 73 | + 8: ['pilatus:mc'], |
| 74 | + 16: ['daint:mc'] |
| 75 | + } |
| 76 | + } |
| 77 | + try: |
| 78 | + self.valid_systems = valid_systems[self.variant][self.num_nodes] |
| 79 | + except KeyError: |
| 80 | + self.valid_systems = [] |
166 | 81 |
|
167 | 82 | @run_after('init') |
168 | 83 | def set_hierarchical_prgenvs(self): |
169 | 84 | if self.current_system.name in ['eiger', 'pilatus']: |
170 | 85 | self.valid_prog_environs = ['cpeIntel'] |
171 | 86 |
|
| 87 | + @run_after('init') |
| 88 | + def set_num_gpus_per_node(self): |
| 89 | + if self.variant == 'cuda': |
| 90 | + self.num_gpus_per_node = 1 |
| 91 | + |
172 | 92 | @run_after('setup') |
173 | | - def set_perf_reference(self): |
174 | | - if self.scale == 'small': |
175 | | - self.reference = REFERENCE_CPU_PERFORMANCE_SMALL |
| 93 | + def skip_if_no_topo(self): |
| 94 | + proc = self.current_partition.processor |
| 95 | + pname = self.current_partition.fullname |
| 96 | + if not proc.info: |
| 97 | + self.skip(f'no topology information found for partition {pname!r}') |
| 98 | + |
| 99 | + @run_after('setup') |
| 100 | + def set_num_tasks(self): |
| 101 | + if self.variant == 'cuda': |
| 102 | + self.num_tasks_per_node = 1 |
176 | 103 | else: |
177 | | - self.reference = REFERENCE_CPU_PERFORMANCE_LARGE |
| 104 | + proc = self.current_partition.processor |
| 105 | + pname = self.current_partition.fullname |
| 106 | + self.num_tasks_per_node = proc.num_cores |
178 | 107 |
|
179 | | - @run_after('init') |
180 | | - def set_num_tasks_cray_xc(self): |
181 | | - if self.current_system.name in ['daint', 'dom']: |
182 | | - self.num_tasks_per_node = 36 |
183 | | - if self.scale == 'small': |
184 | | - self.num_nodes = 6 |
185 | | - else: |
186 | | - self.num_nodes = 16 |
187 | | - self.num_tasks = self.num_nodes * self.num_tasks_per_node |
| 108 | + self.num_tasks = self.num_nodes * self.num_tasks_per_node |
188 | 109 |
|
189 | | - @run_after('init') |
190 | | - def set_num_tasks_cray_shasta(self): |
191 | | - if self.current_system.name in ['eiger', 'pilatus']: |
192 | | - self.num_tasks_per_node = 128 |
193 | | - if self.scale == 'small': |
194 | | - self.num_nodes = 4 |
195 | | - else: |
196 | | - # there are too many processors, the large jobs cannot start |
197 | | - # need to decrease to just 8 nodes |
198 | | - self.num_nodes = 8 |
199 | | - self.num_tasks = self.num_nodes * self.num_tasks_per_node |
| 110 | + @run_before('performance') |
| 111 | + def set_perf_reference(self): |
| 112 | + proc = self.current_partition.processor |
| 113 | + pname = self.current_partition.fullname |
| 114 | + if pname in ('daint:gpu', 'dom:gpu'): |
| 115 | + arch = 'p100' |
| 116 | + else: |
| 117 | + arch = proc.arch |
| 118 | + |
| 119 | + with contextlib.suppress(KeyError): |
| 120 | + self.reference = { |
| 121 | + pname: { |
| 122 | + 'perf': self.allref[self.num_nodes][arch][self.benchmark] |
| 123 | + } |
| 124 | + } |
0 commit comments