reframe-hpc
diff --git a/‎cscs-checks/apps/gromacs/gromacs_check.py‎
Lines changed: 227 additions & 126 deletions b/‎cscs-checks/apps/gromacs/gromacs_check.py‎
Lines changed: 227 additions & 126 deletions
diff --git a/‎cscs-checks/apps/python/numpy_check.py‎
Lines changed: 2 additions & 2 deletions b/‎cscs-checks/apps/python/numpy_check.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cscs-checks/microbenchmarks/cpu/likwid/check_cpu_bandwith.py‎
Lines changed: 149 additions & 0 deletions b/‎cscs-checks/microbenchmarks/cpu/likwid/check_cpu_bandwith.py‎
Lines changed: 149 additions & 0 deletions
diff --git a/‎cscs-checks/microbenchmarks/cpu/likwid/likwid.py‎
Lines changed: 0 additions & 158 deletions b/‎cscs-checks/microbenchmarks/cpu/likwid/likwid.py‎
Lines changed: 0 additions & 158 deletions
diff --git a/‎docs/configure.rst‎
Lines changed: 10 additions & 5 deletions b/‎docs/configure.rst‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎docs/dependencies.rst‎
Lines changed: 2 additions & 1 deletion b/‎docs/dependencies.rst‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/hpctestlib.rst‎
Lines changed: 3 additions & 0 deletions b/‎docs/hpctestlib.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/started.rst‎
Lines changed: 2 additions & 0 deletions b/‎docs/started.rst‎
Lines changed: 2 additions & 0 deletions
@@ -37,8 +37,8 @@ class cscs_numpy_test(numpy_ops_check):
     @run_after('setup')
     def set_num_cpus_per_task(self):
         self.num_cpus_per_task = self.current_partition.processor.num_cores
-        variables = {
-            'OMP_NUM_THREADS': self.num_cpus_per_task
+        self.variables = {
+            'OMP_NUM_THREADS': str(self.num_cpus_per_task)
         }
 
     @run_before('performance')
 
@@ -0,0 +1,149 @@
+# Copyright 2016-2021 Swiss National Supercomputing Centre (CSCS/ETH Zurich)
+# ReFrame Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import reframe as rfm
+import reframe.utility.sanity as sn
+
+
+class MemBandwidthTest(rfm.RunOnlyRegressionTest):
+    modules = ['likwid']
+    valid_prog_environs = ['PrgEnv-gnu']
+    sourcesdir = None
+    executable = 'likwid-bench'
+    num_tasks = 1
+    num_tasks_per_node = 1
+    num_tasks_per_core = 2
+
+    # Test each level at half capacity times nthreads per domain
+    # FIXME: This should be adapted to use the topology autodetection features
+    system_cache_sizes = {
+        'daint:mc':  {
+            'L1': '288kB', 'L2': '2304kB', 'L3': '23MB', 'memory': '1800MB'
+        },
+        'daint:gpu': {
+            'L1': '192kB', 'L2': '1536kB', 'L3': '15MB', 'memory': '1200MB'
+        },
+        'dom:mc': {
+            'L1': '288kB', 'L2': '2304kB', 'L3': '23MB', 'memory': '1800MB'
+        },
+        'dom:gpu': {
+            'L1': '192kB', 'L2': '1536kB', 'L3': '15MB', 'memory': '1200MB'
+        }
+    }
+    maintainers = ['SK', 'CB']
+    tags = {'benchmark', 'diagnostic', 'health'}
+
+    @sanity_function
+    def validate_test(self):
+        self.bw_pattern = sn.min(sn.extractall(r'MByte/s:\s*(?P<bw>\S+)',
+                                               self.stdout, 'bw', float))
+        return sn.assert_ge(self.bw_pattern, 0.0)
+
+    @performance_function('MB/s')
+    def bandwidth(self):
+        return self.bw_pattern
+
+    def set_processor_properties(self):
+        self.skip_if_no_procinfo()
+        self.num_cpus_per_task = self.current_partition.processor.num_cpus
+        numa_nodes = self.current_partition.processor.topology['numa_nodes']
+        self.numa_domains = [f'S{i}' for i, _ in enumerate(numa_nodes)]
+        self.num_cpu_domain = (
+            self.num_cpus_per_task // (len(self.numa_domains) *
+                                       self.num_tasks_per_core)
+        )
+
+
+@rfm.simple_test
+class CPUBandwidth(MemBandwidthTest):
+    # FIXME: This should be expressed in a better way
+    config = parameter([*[[l, k] for l in ['L1', 'L2', 'L3']
+                          for k in ['load_avx', 'store_avx']],
+                        ['memory', 'load_avx'],
+                        ['memory', 'store_mem_avx']])
+    valid_systems = ['daint:mc', 'daint:gpu', 'dom:gpu', 'dom:mc']
+    # the kernel to run in likwid
+    kernel_name = variable(str)
+    mem_level = variable(str)
+    refs = {
+        'mc':  {
+            'load_avx': {'L1': 5100000, 'L2': 2000000, 'L3': 900000,
+                         'memory': 130000},
+            'store_avx': {'L1': 2800000, 'L2': 900000, 'L3': 480000},
+            'store_mem_avx': {'memory': 85000},
+        },
+        'gpu': {
+            'load_avx': {'L1': 2100000, 'L2': 850000, 'L3': 360000,
+                         'memory': 65000},
+            'store_avx': {'L1': 1200000, 'L2': 340000, 'L3': 210000},
+            'store_mem_avx': {'memory': 42500},
+        }
+    }
+
+    @run_after('init')
+    def setup_descr(self):
+        self.mem_level, self.kernel_name = self.config
+        self.descr = f'CPU <- {self.mem_level} {self.kernel_name} benchmark'
+
+    @run_before('performance')
+    def set_reference(self):
+        ref_proxy = {part: self.refs[part][self.kernel_name][self.mem_level]
+                     for part in self.refs.keys()}
+        self.reference = {
+            'daint:gpu': {
+                'bandwidth': (ref_proxy['gpu'], -0.1, None, 'MB/s')
+            },
+            'daint:mc': {
+                'bandwidth': (ref_proxy['mc'], -0.1, None, 'MB/s')
+            },
+            'dom:gpu': {
+                'bandwidth': (ref_proxy['gpu'], -0.1, None, 'MB/s')
+            },
+            'dom:mc': {
+                'bandwidth': (ref_proxy['mc'], -0.1, None, 'MB/s')
+            },
+        }
+
+    @run_before('run')
+    def set_exec_opts(self):
+        self.set_processor_properties()
+        partname = self.current_partition.fullname
+        data_size = self.system_cache_sizes[partname][self.mem_level]
+        # result for daint:mc: '-w S0:100MB:18:1:2 -w S1:100MB:18:1:2'
+        # format: -w domain:data_size:nthreads:chunk_size:stride
+        # chunk_size and stride affect which cpus from <domain> are selected
+        workgroups = [f'-w {dom}:{data_size}:{self.num_cpu_domain:d}:1:2'
+                      for dom in self.numa_domains]
+        self.executable_opts = [f'-t {self.kernel_name}'] + workgroups
+
+
+@rfm.simple_test
+class CPUBandwidthCrossSocket(MemBandwidthTest):
+    descr = ('CPU S0 <- main memory S1 read '
+             'CPU S1 <- main memory S0 read')
+    valid_systems = ['daint:mc', 'dom:mc']
+    kernel_name = 'load_avx'
+    reference = {
+        'daint:mc': {
+            'bandwidth': (56000, -0.1, None, 'MB/s')
+        },
+        'dom:mc': {
+            'bandwidth': (56000, -0.1, None, 'MB/s')
+        },
+    }
+
+    @run_before('run')
+    def set_exec_opts(self):
+        self.set_processor_properties()
+        # daint:mc: '-w S0:100MB:18:1:2-0:S1 -w S1:100MB:18:1:2-0:S0'
+        # format:
+        # -w domain:data_size:nthreads:chunk_size:stride-stream_nr:mem_domain
+        # chunk_size and stride affect which cpus from <domain> are selected
+        workgroups = [
+            f'-w {dom_cpu}:100MB:{self.num_cpu_domain:d}:1:2-0:{dom_mem}'
+            for dom_cpu, dom_mem in
+            zip(self.numa_domains[:2], reversed(self.numa_domains[:2]))
+        ]
+        self.executable_opts = ['-t %s' % self.kernel_name] + workgroups
@@ -53,7 +53,8 @@ We will explore the basic configuration of ReFrame by looking into the configura
 For the complete listing and description of all configuration options, you should refer to the :doc:`config_reference`.
 
 .. literalinclude:: ../tutorials/config/settings.py
-   :lines: 10-
+   :start-after: # rfmdocstart: site-configuration
+   :end-before: # rfmdocend: site-configuration
 
 There are three required sections that each configuration file must provide: ``systems``, ``environments`` and ``logging``.
 We will first cover these and then move on to the optional ones.
@@ -68,7 +69,8 @@ Each system is a different object inside the ``systems`` section.
 In our example we define three systems, a Mac laptop, Piz Daint and a generic fallback system:
 
 .. literalinclude:: ../tutorials/config/settings.py
-   :lines: 11-94
+   :start-after: # rfmdocstart: systems
+   :end-before: # rfmdocend: systems
 
 Each system is associated with a set of properties, which in this case are the following:
 
@@ -90,7 +92,8 @@ The ``login`` partition refers to the login nodes of the system, whereas the ``g
 Let's pick the ``gpu`` partition and look into it in more detail:
 
 .. literalinclude:: ../tutorials/config/settings.py
-   :lines: 39-63
+   :start-after: # rfmdocstart: gpu-partition
+   :end-before: # rfmdocend: gpu-partition
 
 The basic properties of a partition are the following:
 
@@ -132,7 +135,8 @@ In our example, we define environments for all the basic compilers as well as a
 In certain contexts, it is useful to see a ReFrame environment as a wrapper of a programming toolchain (MPI + compiler combination):
 
 .. literalinclude:: ../tutorials/config/settings.py
-   :lines: 95-153
+   :start-after: # rfmdocstart: environments
+   :end-before: # rfmdocend: environments
 
 Each environment is associated with a name.
 This name will be used to reference this environment in different contexts, as for example in the ``environs`` property of the system partitions.
@@ -154,7 +158,8 @@ Additionally, it allows for logging performance data from performance tests into
 Let's see how logging is defined in our example configuration, which also represents a typical one for logging:
 
 .. literalinclude:: ../tutorials/config/settings.py
-   :lines: 154-189
+   :start-after: # rfmdocstart: logging
+   :end-before: # rfmdocend: logging
 
 Logging is configured under the ``logging`` section of the configuration, which is a list of logger objects.
 Unless you want to configure logging differently for different systems, a single logger object is enough.
 
@@ -197,7 +197,8 @@ As shown in the :doc:`tutorial_deps`, test dependencies would be of limited usag
 Let's reiterate over the :func:`set_executable` function of the :class:`OSULatencyTest` that we presented previously:
 
 .. literalinclude:: ../tutorials/deps/osu_benchmarks.py
-   :lines: 37-43
+   :start-after: # rfmdocstart: set_exec
+   :end-before: # rfmdocend: set_exec
 
 The ``@require_deps`` decorator does some magic -- we will unravel this shortly -- with the function arguments of the :func:`set_executable` function and binds them to the target test dependencies by their name.
 However, as discussed in this section, dependencies are defined at test case level, so the ``OSUBuildTest`` function argument is bound to a special function that allows you to retrieve an actual test case of the target dependency.
 
@@ -11,6 +11,9 @@ Scientific Applications
    :members:
    :show-inheritance:
 
+.. automodule:: hpctestlib.sciapps.gromacs.benchmarks
+   :members:
+   :show-inheritance:
 
 Data Analytics
 --------------
 
@@ -10,6 +10,8 @@ Requirements
 * The required Python packages are the following:
 
 .. literalinclude:: ../requirements.txt
+   :start-after: # rfmdocstart: required-packages
+   :end-before: # rfmdocend: required-packages
 
 
 .. note::