reframe-hpc
diff --git a/‎.pep8speaks.yml‎
Lines changed: 6 additions & 2 deletions b/‎.pep8speaks.yml‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎Jenkinsfile‎
Lines changed: 11 additions & 0 deletions b/‎Jenkinsfile‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 18 additions & 0 deletions b/‎README.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎cscs-checks/apps/tensorflow/tf_horovod_check.py‎
Lines changed: 54 additions & 0 deletions b/‎cscs-checks/apps/tensorflow/tf_horovod_check.py‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎cscs-checks/libraries/hpx/hpx_hello_world.py‎
Lines changed: 59 additions & 0 deletions b/‎cscs-checks/libraries/hpx/hpx_hello_world.py‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎cscs-checks/libraries/hpx/hpx_stencil.py‎
Lines changed: 183 additions & 0 deletions b/‎cscs-checks/libraries/hpx/hpx_stencil.py‎
Lines changed: 183 additions & 0 deletions
@@ -6,12 +6,12 @@ message:
         header: "Hello @{name}, Thank you for updating!"
         footer: "Do see the ReFrame [Coding Style Guide](https://github.com/eth-cscs/reframe/wiki/coding-style-guide)"
     no_errors: "Cheers! There are no PEP8 issues in this Pull Request!"
-    
+
 only_mention_files_with_errors: True
 
 scanner:
     diff_only: True
-    
+
 pycodestyle:
     max-line-length: 79
     ignore:
@@ -20,3 +20,7 @@ pycodestyle:
         - E226
         - E241
         - E272
+        - E741
+        - E742
+        - E743
+        - W504
@@ -5,6 +5,7 @@ def loginBash = '#!/bin/bash -l'
 def bashScript = 'ci-scripts/ci-runner.bash'
 def machinesList = ['daint', 'dom', 'kesch']
 def machinesToRun = machinesList
+def runTests = true
 def uniqueID
 
 stage('Initialization') {
@@ -37,6 +38,11 @@ stage('Initialization') {
                 currentBuild.result = 'SUCCESS'
                 return
             }
+            else if (splittedComment[2] == 'none') {
+                runTests = false
+                currentBuild.result = 'SUCCESS'
+                return
+            }
 
             machinesRequested = []
             for (i = 2; i < splittedComment.size(); i++) {
@@ -66,6 +72,11 @@ stage('Initialization') {
     }
 }
 
+if (!runTests) {
+    println "Won't execute any test (${currentBuild.result}). Exiting..."
+    return
+}
+
 if (currentBuild.result != 'SUCCESS') {
     println "Initialization failed (${currentBuild.result}). Exiting..."
     return
 
@@ -44,3 +44,21 @@ The documentation is now up on [localhost:8000](http://localhost:8000), where yo
 In the `cscs-checks/` folder, you can find realistic regression tests used for the CSCS systems that you can reuse and adapt to your system.
 Notice that these tests are published as examples and may not run as-is in your system.
 However, they can serve as a very good starting point for implementing your system tests in ReFrame.
+
+
+## Contact
+
+You can get in contact with the ReFrame community in the following ways:
+
+### Mailing list
+
+For keeping up with the latest news about ReFrame, posting questions and, generally getting in touch with other users and the developers, you may follow the mailing list: [[email protected]](mailto:[email protected]).
+
+Only subscribers may send messages to the list.
+To subscribe, please send an empty message to [[email protected]](mailto:[email protected]).
+
+For unsubscribing, you may send an empty message to [[email protected]](mailto:[email protected]).
+
+### Slack
+
+You may also reach the community through Slack at [reframetalk.slack.com](https://reframetalk.slack.com/join/signup). Currently, you may join the Slack workspace by invitation only, which you will get as soon as you subscribe to the mailing list.
@@ -0,0 +1,54 @@
+import reframe as rfm
+import reframe.utility.sanity as sn
+
+
+@rfm.required_version('>=2.16-dev0')
+@rfm.simple_test
+class TensorFlowHorovodTest(rfm.RunOnlyRegressionTest):
+    def __init__(self):
+        super().__init__()
+        self.descr = 'Distributed training with TensorFlow and Horovod'
+        self.valid_systems = ['daint:gpu', 'dom:gpu']
+        self.valid_prog_environs = ['PrgEnv-gnu']
+        tfshortver = '1.11'
+        self.sourcesdir = 'https://github.com/tensorflow/benchmarks'
+        self.modules = ['Horovod/0.15.0-CrayGNU-18.08-tf-%s.0' % tfshortver]
+        self.reference = {
+            'dom:gpu': {
+                'throughput': (1133.6, None, 0.05, 'images/s'),
+            },
+            'daint:gpu': {
+                'throughput': (4403.0, None, 0.05, 'images/s')
+            },
+        }
+        self.perf_patterns = {
+            'throughput': sn.avg(sn.extractall(
+                r'total images/sec:\s+(?P<throughput>\S+)',
+                self.stdout, 'throughput', float))
+        }
+        self.sanity_patterns = sn.assert_found(
+            r'[\S+\s+] INFO NET\/IB : Using interface ipogif0'
+            r' for sideband communication', self.stdout)
+        self.num_tasks_per_node = 1
+        if self.current_system.name == 'dom':
+            self.num_tasks = 8
+        elif self.current_system.name == 'daint':
+            self.num_tasks = 32
+
+        self.pre_run = ['git checkout cnn_tf_v%s_compatible' % tfshortver]
+        self.variables = {
+            'NCCL_DEBUG': 'INFO',
+            'NCCL_IB_HCA': 'ipogif0',
+            'NCCL_IB_CUDA_SUPPORT': '1',
+            'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK',
+        }
+        self.executable = ('python')
+        self.executable_opts = [
+            'scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py',
+            '--model inception3',
+            '--batch_size 64',
+            '--variable_update horovod',
+            '--log_dir ./logs',
+            '--train_dir ./checkpoints']
+        self.tags = {'production'}
+        self.maintainers = ['MS', 'RS']
@@ -0,0 +1,59 @@
+import reframe as rfm
+import reframe.utility.sanity as sn
+
+
+@rfm.simple_test
+class HelloWorldHPXCheck(rfm.RunOnlyRegressionTest):
+    def __init__(self):
+        super().__init__()
+
+        self.descr = 'HPX hello, world check'
+        self.valid_systems = ['daint:gpu, daint:mc', 'dom:gpu', 'dom:mc']
+        self.valid_prog_environs = ['PrgEnv-gnu']
+
+        self.modules = ['HPX']
+        self.executable = 'hello_world'
+        self.sourcesdir = None
+
+        self.use_multithreading = None
+
+        self.tags = {'production'}
+        self.maintainers = ['VH', 'JG']
+
+    def setup(self, partition, environ, **job_opts):
+        hellos = sn.findall(r'hello world from OS-thread \s*(?P<tid>\d+) on '
+                            r'locality (?P<lid>\d+)', self.stdout)
+
+        if partition.fullname == 'daint:gpu':
+            self.num_tasks = 2
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 12
+        elif partition.fullname == 'daint:mc':
+            self.num_tasks = 2
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 36
+        elif partition.fullname == 'dom:gpu':
+            self.num_tasks = 2
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 12
+        elif partition.fullname == 'dom:mc':
+            self.num_tasks = 2
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 36
+
+        self.executable_opts = ['--hpx:threads=%s' % self.num_cpus_per_task]
+
+        # https://stellar-group.github.io/hpx/docs/sphinx/branches/master/html/terminology.html#term-locality
+        num_localities = self.num_tasks // self.num_tasks_per_node
+        assert_num_tasks = sn.assert_eq(sn.count(hellos),
+                                        self.num_tasks*self.num_cpus_per_task)
+        assert_threads = sn.map(lambda x: sn.assert_lt(int(x.group('tid')),
+                                self.num_cpus_per_task), hellos)
+        assert_localities = sn.map(lambda x: sn.assert_lt(int(x.group('lid')),
+                                   num_localities), hellos)
+
+        self.sanity_patterns = sn.all(sn.chain([assert_num_tasks],
+                                               assert_threads,
+                                               assert_localities))
+
+        super().setup(partition, environ, **job_opts)
@@ -0,0 +1,183 @@
+import reframe as rfm
+import reframe.utility.sanity as sn
+
+
+@rfm.simple_test
+class Stencil4HPXCheck(rfm.RunOnlyRegressionTest):
+    def __init__(self):
+        super().__init__()
+
+        self.descr = 'HPX 1d_stencil_4 check'
+        self.valid_systems = ['daint:gpu, daint:mc', 'dom:gpu', 'dom:mc']
+        self.valid_prog_environs = ['PrgEnv-gnu']
+
+        self.modules = ['HPX']
+        self.executable = '1d_stencil_4'
+
+        self.nt_opts = '100'  # number of time steps
+        self.np_opts = '100'  # number of partitions
+        self.nx_opts = '10000000'  # number of points per partition
+        self.executable_opts = ['--nt', self.nt_opts,
+                                '--np', self.np_opts,
+                                '--nx', self.nx_opts]
+        self.sourcesdir = None
+
+        self.use_multithreading = None
+
+        self.perf_patterns = {
+            'time': sn.extractsingle(r'\d+,\s*(?P<time>(\d+)?.?\d+),\s*\d+,'
+                                     r'\s*\d+,\s*\d+',
+                                     self.stdout, 'time', float)
+        }
+        self.reference = {
+            'dom:gpu': {
+                'time': (42, None, 0.1, 's')
+            },
+            'dom:mc': {
+                'time': (30, None, 0.1, 's')
+            },
+            'daint:gpu': {
+                'time': (42, None, 0.1, 's')
+            },
+            'daint:mc': {
+                'time': (30, None, 0.1, 's')
+            },
+        }
+
+        self.tags = {'production'}
+        self.maintainers = ['VH', 'JG']
+
+    def setup(self, partition, environ, **job_opts):
+        result = sn.findall(r'(?P<tid>\d+),\s*(?P<time>(\d+)?.?\d+),'
+                            r'\s*(?P<pts>\d+),\s*(?P<parts>\d+),'
+                            r'\s*(?P<steps>\d+)',
+                            self.stdout)
+
+        if partition.fullname == 'daint:gpu':
+            self.num_tasks = 1
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 12
+        elif partition.fullname == 'daint:mc':
+            self.num_tasks = 1
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 36
+        elif partition.fullname == 'dom:gpu':
+            self.num_tasks = 1
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 12
+        elif partition.fullname == 'dom:mc':
+            self.num_tasks = 1
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 36
+
+        self.executable_opts += ['--hpx:threads=%s' % self.num_cpus_per_task]
+
+        assert_num_threads = sn.map(lambda x: sn.assert_eq(
+            int(x.group('tid')), self.num_cpus_per_task), result)
+        assert_num_points = sn.map(lambda x: sn.assert_eq(
+            x.group('pts'), self.nx_opts), result)
+        assert_num_parts = sn.map(lambda x: sn.assert_eq(x.group('parts'),
+                                                         self.np_opts), result)
+        assert_num_steps = sn.map(lambda x: sn.assert_eq(x.group('steps'),
+                                                         self.nt_opts), result)
+
+        self.sanity_patterns = sn.all(sn.chain(assert_num_threads,
+                                               assert_num_points,
+                                               assert_num_parts,
+                                               assert_num_steps))
+
+        super().setup(partition, environ, **job_opts)
+
+
+@rfm.simple_test
+class Stencil8HPXCheck(rfm.RunOnlyRegressionTest):
+    def __init__(self):
+        super().__init__()
+
+        self.descr = 'HPX 1d_stencil_8 check'
+        self.valid_systems = ['daint:gpu, daint:mc', 'dom:gpu', 'dom:mc']
+        self.valid_prog_environs = ['PrgEnv-gnu']
+
+        self.modules = ['HPX']
+        self.executable = '1d_stencil_8'
+
+        self.nt_opts = '100'  # number of time steps
+        self.np_opts = '100'  # number of partitions
+        self.nx_opts = '10000000'  # number of points per partition
+        self.executable_opts = ['--nt', self.nt_opts,
+                                '--np', self.np_opts,
+                                '--nx', self.nx_opts]
+        self.sourcesdir = None
+
+        self.use_multithreading = None
+
+        self.perf_patterns = {
+            'time': sn.extractsingle(r'\d+,\s*\d+,\s*(?P<time>(\d+)?.?\d+),'
+                                     r'\s*\d+,\s*\d+,\s*\d+',
+                                     self.stdout, 'time', float)
+        }
+        self.reference = {
+            'dom:gpu': {
+                'time': (26, None, 0.1, 's')
+            },
+            'dom:mc': {
+                'time': (19, None, 0.1, 's')
+            },
+            'daint:gpu': {
+                'time': (26, None, 0.1, 's')
+            },
+            'daint:mc': {
+                'time': (19, None, 0.1, 's')
+            },
+        }
+
+        self.tags = {'production'}
+        self.maintainers = ['VH', 'JG']
+
+    def setup(self, partition, environ, **job_opts):
+        result = sn.findall(r'(?P<lid>\d+),\s*(?P<tid>\d+),'
+                            r'\s*(?P<time>(\d+)?.?\d+),'
+                            r'\s*(?P<pts>\d+),'
+                            r'\s*(?P<parts>\d+),'
+                            r'\s*(?P<steps>\d+)', self.stdout)
+
+        if partition.fullname == 'daint:gpu':
+            self.num_tasks = 2
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 12
+        elif partition.fullname == 'daint:mc':
+            self.num_tasks = 4
+            self.num_tasks_per_node = 2
+            self.num_cpus_per_task = 18
+            self.num_tasks_per_socket = 1
+        elif partition.fullname == 'dom:gpu':
+            self.num_tasks = 2
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 12
+        elif partition.fullname == 'dom:mc':
+            self.num_tasks = 4
+            self.num_tasks_per_node = 2
+            self.num_cpus_per_task = 18
+            self.num_tasks_per_socket = 1
+
+        self.executable_opts += ['--hpx:threads=%s' % self.num_cpus_per_task]
+
+        num_threads = self.num_tasks * self.num_cpus_per_task
+        assert_num_tasks = sn.map(lambda x: sn.assert_eq(int(x.group('lid')),
+                                                         self.num_tasks), result)
+        assert_num_threads = sn.map(lambda x: sn.assert_eq(int(x.group('tid')),
+                                                           num_threads), result)
+        assert_num_points = sn.map(lambda x: sn.assert_eq(x.group('pts'),
+                                                          self.nx_opts), result)
+        assert_num_parts = sn.map(lambda x: sn.assert_eq(x.group('parts'),
+                                                         self.np_opts), result)
+        assert_num_steps = sn.map(lambda x: sn.assert_eq(x.group('steps'),
+                                                         self.nt_opts), result)
+
+        self.sanity_patterns = sn.all(sn.chain(assert_num_tasks,
+                                               assert_num_threads,
+                                               assert_num_points,
+                                               assert_num_parts,
+                                               assert_num_steps))
+
+        super().setup(partition, environ, **job_opts)