Merge pull request #2469 from lucamar/cpmd-update

Vasileios Karakasis · web-flow · commit e4a7776fa488 · 2022-03-14T18:47:49.000+01:00
[test] Update CPMD check
diff --git a/cscs-checks/apps/cpmd/cpmd_check.py b/cscs-checks/apps/cpmd/cpmd_check.py
@@ -3,80 +3,106 @@
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
-import contextlib
 import reframe as rfm
 import reframe.utility.sanity as sn
 
 
 @rfm.simple_test
 class CPMDCheck(rfm.RunOnlyRegressionTest):
-    scale = parameter(['small', 'large'])
-    descr = 'CPMD check (C4H6 metadynamics)'
-    maintainers = ['AJ', 'LM']
-    tags = {'production'}
-    valid_systems = ['daint:gpu']
-    num_tasks_per_node = 1
-    valid_prog_environs = ['builtin']
     modules = ['CPMD']
     executable = 'cpmd.x'
-    executable_opts = ['ana_c4h6.in > stdout.txt']
-    readonly_files = ['ana_c4h6.in', 'C_MT_BLYP', 'H_MT_BLYP']
-    use_multithreading = True
-    strict_check = False
+    executable_opts = ['ana_c4h6.in']
     extra_resources = {
         'switches': {
             'num_switches': 1
         }
     }
-    allref = {
-        '9': {
-            'p100': {
-                'time': (284, None, 0.15, 's')
+    strict_check = False
+    use_multithreading = False
+    tags = {'maintenance', 'production'}
+    maintainers = ['AJ', 'LM']
+
+    num_nodes = parameter([6, 16], loggable=True)
+    references = {
+        6: {
+            'sm_60': {
+                'dom:gpu': {'time': (120, None, 0.15, 's')},
+                'daint:gpu': {'time': (120, None, 0.15, 's')},
+            },
+            'broadwell': {
+                'dom:mc': {'time': (150.0, None, 0.15, 's')},
+                'daint:mc': {'time': (150.0, None, 0.15, 's')},
             },
         },
-        '16': {
-            'p100': {
-                'time': (226, None, 0.15, 's')
-            }
+        16: {
+            'sm_60': {
+                'daint:gpu': {'time': (120, None, 0.15, 's')}
+            },
+            'broadwell': {
+                'daint:mc': {'time': (150.0, None, 0.15, 's')},
+            },
         }
     }
 
-    @run_after('init')
-    def setup_by_scale(self):
-        if self.scale == 'small':
-            self.valid_systems += ['dom:gpu']
-            self.num_tasks = 9
-        else:
-            self.num_tasks = 16
-
-    @run_before('performance')
-    def set_perf_reference(self):
-        proc = self.current_partition.processor
-        pname = self.current_partition.fullname
-        if pname in ('daint:gpu', 'dom:gpu'):
-            arch = 'p100'
-        else:
-            arch = proc.arch
-
-        with contextlib.suppress(KeyError):
-            self.reference = {
-                pname: {
-                    'perf': self.allref[self.num_tasks][arch][self.benchmark]
-                }
-            }
+    @performance_function('s')
+    def elapsed_time(self):
+        return sn.extractsingle(r'^ cpmd(\s+[\d\.]+){3}\s+(?P<time>\S+)',
+                                self.stdout, 'time', float)
 
     @sanity_function
     def assert_energy_diff(self):
-        #  OpenMP version of CPMD segfaults
-        #  self.variables = { 'OMP_NUM_THREADS' : '8' }
         energy = sn.extractsingle(
             r'CLASSICAL ENERGY\s+-(?P<result>\S+)',
-            'stdout.txt', 'result', float)
+            self.stdout, 'result', float)
         energy_reference = 25.81
         energy_diff = sn.abs(energy - energy_reference)
         return sn.assert_lt(energy_diff, 0.26)
 
-    @performance_function('s')
-    def time(self):
-        return sn.extractsingle(r'^ cpmd(\s+[\d\.]+){3}\s+(?P<perf>\S+)',
-                                'stdout.txt', 'perf', float)
+    @run_after('init')
+    def setup_system_filtering(self):
+        self.descr = f'CPMD check ({self.num_nodes} node(s))'
+
+        # setup system filter
+        valid_systems = {
+            6: ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc'],
+            16: ['daint:gpu', 'daint:mc']
+        }
+
+        self.skip_if(self.num_nodes not in valid_systems,
+                     f'No valid systems found for {self.num_nodes}(s)')
+        self.valid_systems = valid_systems[self.num_nodes]
+
+        # setup programming environment filter
+        self.valid_prog_environs = ['builtin']
+
+    @run_before('run')
+    def setup_run(self):
+        # retrieve processor data
+        self.skip_if_no_procinfo()
+        proc = self.current_partition.processor
+
+        # set architecture for GPU partition (no auto-detection)
+        if self.current_partition.fullname in ['daint:gpu', 'dom:gpu']:
+            arch = 'sm_60'
+            self.variables = {
+                'CRAY_CUDA_MPS': '1'
+            }
+        else:
+            arch = proc.arch
+
+        # common setup for every architecture
+        self.job.launcher.options = ['--cpu-bind=cores']
+        self.job.options = ['--distribution=block:block']
+        # FIXME: the current test case does not scale beyond 72 MPI tasks,
+        # so the last node in 16-nodes jobs will be used only partially.
+        # The test case needs to be updated (warning about XC_DRIVER IN &DFT)
+        self.num_tasks = 72
+
+        try:
+            found = self.references[self.num_nodes][arch]
+        except KeyError:
+            self.skip(f'Configuration with {self.num_nodes} node(s) '
+                      f'is not supported on {arch!r}')
+
+        # setup performance references
+        self.reference = self.references[self.num_nodes][arch]