reframe-hpc
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cscs-checks/libraries/hpx/hpx_stencil.py‎
Lines changed: 183 additions & 0 deletions b/‎cscs-checks/libraries/hpx/hpx_stencil.py‎
Lines changed: 183 additions & 0 deletions
diff --git a/‎cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py‎
Lines changed: 84 additions & 0 deletions b/‎cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎cscs-checks/microbenchmarks/kernel_latency/src/kernel_latency.cu‎
Lines changed: 59 additions & 0 deletions b/‎cscs-checks/microbenchmarks/kernel_latency/src/kernel_latency.cu‎
Lines changed: 59 additions & 0 deletions
@@ -61,4 +61,4 @@ For unsubscribing, you may send an empty message to [[email protected]
 
 ### Slack
 
-You may also reach the community through Slack at [reframetalk.slack.com](https://reframetalk.slack.com/join/signup). Currently, you may join the Slack workspace by invitation only, which you will get as soon as you subscribe to the mailing list.
+You may also reach the community through Slack [here](https://reframe-slack.herokuapp.com).
@@ -0,0 +1,183 @@
+import reframe as rfm
+import reframe.utility.sanity as sn
+
+
+@rfm.simple_test
+class Stencil4HPXCheck(rfm.RunOnlyRegressionTest):
+    def __init__(self):
+        super().__init__()
+
+        self.descr = 'HPX 1d_stencil_4 check'
+        self.valid_systems = ['daint:gpu, daint:mc', 'dom:gpu', 'dom:mc']
+        self.valid_prog_environs = ['PrgEnv-gnu']
+
+        self.modules = ['HPX']
+        self.executable = '1d_stencil_4'
+
+        self.nt_opts = '100'  # number of time steps
+        self.np_opts = '100'  # number of partitions
+        self.nx_opts = '10000000'  # number of points per partition
+        self.executable_opts = ['--nt', self.nt_opts,
+                                '--np', self.np_opts,
+                                '--nx', self.nx_opts]
+        self.sourcesdir = None
+
+        self.use_multithreading = None
+
+        self.perf_patterns = {
+            'time': sn.extractsingle(r'\d+,\s*(?P<time>(\d+)?.?\d+),\s*\d+,'
+                                     r'\s*\d+,\s*\d+',
+                                     self.stdout, 'time', float)
+        }
+        self.reference = {
+            'dom:gpu': {
+                'time': (42, None, 0.1, 's')
+            },
+            'dom:mc': {
+                'time': (30, None, 0.1, 's')
+            },
+            'daint:gpu': {
+                'time': (42, None, 0.1, 's')
+            },
+            'daint:mc': {
+                'time': (30, None, 0.1, 's')
+            },
+        }
+
+        self.tags = {'production'}
+        self.maintainers = ['VH', 'JG']
+
+    def setup(self, partition, environ, **job_opts):
+        result = sn.findall(r'(?P<tid>\d+),\s*(?P<time>(\d+)?.?\d+),'
+                            r'\s*(?P<pts>\d+),\s*(?P<parts>\d+),'
+                            r'\s*(?P<steps>\d+)',
+                            self.stdout)
+
+        if partition.fullname == 'daint:gpu':
+            self.num_tasks = 1
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 12
+        elif partition.fullname == 'daint:mc':
+            self.num_tasks = 1
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 36
+        elif partition.fullname == 'dom:gpu':
+            self.num_tasks = 1
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 12
+        elif partition.fullname == 'dom:mc':
+            self.num_tasks = 1
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 36
+
+        self.executable_opts += ['--hpx:threads=%s' % self.num_cpus_per_task]
+
+        assert_num_threads = sn.map(lambda x: sn.assert_eq(
+            int(x.group('tid')), self.num_cpus_per_task), result)
+        assert_num_points = sn.map(lambda x: sn.assert_eq(
+            x.group('pts'), self.nx_opts), result)
+        assert_num_parts = sn.map(lambda x: sn.assert_eq(x.group('parts'),
+                                                         self.np_opts), result)
+        assert_num_steps = sn.map(lambda x: sn.assert_eq(x.group('steps'),
+                                                         self.nt_opts), result)
+
+        self.sanity_patterns = sn.all(sn.chain(assert_num_threads,
+                                               assert_num_points,
+                                               assert_num_parts,
+                                               assert_num_steps))
+
+        super().setup(partition, environ, **job_opts)
+
+
+@rfm.simple_test
+class Stencil8HPXCheck(rfm.RunOnlyRegressionTest):
+    def __init__(self):
+        super().__init__()
+
+        self.descr = 'HPX 1d_stencil_8 check'
+        self.valid_systems = ['daint:gpu, daint:mc', 'dom:gpu', 'dom:mc']
+        self.valid_prog_environs = ['PrgEnv-gnu']
+
+        self.modules = ['HPX']
+        self.executable = '1d_stencil_8'
+
+        self.nt_opts = '100'  # number of time steps
+        self.np_opts = '100'  # number of partitions
+        self.nx_opts = '10000000'  # number of points per partition
+        self.executable_opts = ['--nt', self.nt_opts,
+                                '--np', self.np_opts,
+                                '--nx', self.nx_opts]
+        self.sourcesdir = None
+
+        self.use_multithreading = None
+
+        self.perf_patterns = {
+            'time': sn.extractsingle(r'\d+,\s*\d+,\s*(?P<time>(\d+)?.?\d+),'
+                                     r'\s*\d+,\s*\d+,\s*\d+',
+                                     self.stdout, 'time', float)
+        }
+        self.reference = {
+            'dom:gpu': {
+                'time': (26, None, 0.1, 's')
+            },
+            'dom:mc': {
+                'time': (19, None, 0.1, 's')
+            },
+            'daint:gpu': {
+                'time': (26, None, 0.1, 's')
+            },
+            'daint:mc': {
+                'time': (19, None, 0.1, 's')
+            },
+        }
+
+        self.tags = {'production'}
+        self.maintainers = ['VH', 'JG']
+
+    def setup(self, partition, environ, **job_opts):
+        result = sn.findall(r'(?P<lid>\d+),\s*(?P<tid>\d+),'
+                            r'\s*(?P<time>(\d+)?.?\d+),'
+                            r'\s*(?P<pts>\d+),'
+                            r'\s*(?P<parts>\d+),'
+                            r'\s*(?P<steps>\d+)', self.stdout)
+
+        if partition.fullname == 'daint:gpu':
+            self.num_tasks = 2
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 12
+        elif partition.fullname == 'daint:mc':
+            self.num_tasks = 4
+            self.num_tasks_per_node = 2
+            self.num_cpus_per_task = 18
+            self.num_tasks_per_socket = 1
+        elif partition.fullname == 'dom:gpu':
+            self.num_tasks = 2
+            self.num_tasks_per_node = 1
+            self.num_cpus_per_task = 12
+        elif partition.fullname == 'dom:mc':
+            self.num_tasks = 4
+            self.num_tasks_per_node = 2
+            self.num_cpus_per_task = 18
+            self.num_tasks_per_socket = 1
+
+        self.executable_opts += ['--hpx:threads=%s' % self.num_cpus_per_task]
+
+        num_threads = self.num_tasks * self.num_cpus_per_task
+        assert_num_tasks = sn.map(lambda x: sn.assert_eq(int(x.group('lid')),
+                                                         self.num_tasks), result)
+        assert_num_threads = sn.map(lambda x: sn.assert_eq(int(x.group('tid')),
+                                                           num_threads), result)
+        assert_num_points = sn.map(lambda x: sn.assert_eq(x.group('pts'),
+                                                          self.nx_opts), result)
+        assert_num_parts = sn.map(lambda x: sn.assert_eq(x.group('parts'),
+                                                         self.np_opts), result)
+        assert_num_steps = sn.map(lambda x: sn.assert_eq(x.group('steps'),
+                                                         self.nt_opts), result)
+
+        self.sanity_patterns = sn.all(sn.chain(assert_num_tasks,
+                                               assert_num_threads,
+                                               assert_num_points,
+                                               assert_num_parts,
+                                               assert_num_steps))
+
+        super().setup(partition, environ, **job_opts)
@@ -0,0 +1,84 @@
+import reframe as rfm
+import reframe.utility.sanity as sn
+
+
+@rfm.required_version('>=2.16-dev0')
+@rfm.parameterized_test(['sync'], ['async'])
+class KernelLatencyTest(rfm.RegressionTest):
+    def __init__(self, kernel_version):
+        super().__init__()
+        self.sourcepath = 'kernel_latency.cu'
+        self.build_system = 'SingleSource'
+        self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
+        self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
+        self.num_tasks = 0
+        self.num_tasks_per_node = 1
+
+        if self.current_system.name in {'dom', 'daint'}:
+            self.num_gpus_per_node = 1
+            gpu_arch = '60'
+            self.modules = ['craype-accel-nvidia60']
+            self.valid_prog_environs += ['PrgEnv-gnu']
+        else:
+            self.num_gpus_per_node = 16
+            self.modules = ['craype-accel-nvidia35']
+            gpu_arch = '37'
+
+        self.build_system.cxxflags = ['-arch=compute_%s' % gpu_arch,
+                                      '-code=sm_%s' % gpu_arch, '-std=c++11']
+
+        if kernel_version == 'sync':
+            self.build_system.cppflags = ['-D SYNCKERNEL=1']
+        else:
+            self.build_system.cppflags = ['-D SYNCKERNEL=0']
+
+        self.sanity_patterns = sn.all([
+            sn.assert_eq(
+                sn.count(sn.findall(r'\[\S+\] Found \d+ gpu\(s\)',
+                                    self.stdout)),
+                self.num_tasks_assigned),
+            sn.assert_eq(
+                sn.count(sn.findall(r'\[\S+\] \[gpu \d+\] Kernel launch '
+                                    r'latency: \S+ us', self.stdout)),
+                self.num_tasks_assigned * self.num_gpus_per_node)
+        ])
+
+        self.perf_patterns = {
+            'latency': sn.max(sn.extractall(
+                r'\[\S+\] \[gpu \d+\] Kernel launch latency: '
+                r'(?P<latency>\S+) us', self.stdout, 'latency', float))
+        }
+        self.sys_reference = {
+            'sync': {
+                'dom:gpu': {
+                    'latency': (6.6, None, 0.10, 's')
+                },
+                'daint:gpu': {
+                    'latency': (6.6, None, 0.10, 'us')
+                },
+                'kesch:cn': {
+                    'latency': (12.0, None, 0.10, 'us')
+                },
+            },
+            'async': {
+                'dom:gpu': {
+                    'latency': (2.2, None, 0.10, 'us')
+                },
+                'daint:gpu': {
+                    'latency': (2.2, None, 0.10, 's')
+                },
+                'kesch:cn': {
+                    'latency': (5.7, None, 0.10, 'us')
+                },
+            },
+        }
+
+        self.reference = self.sys_reference[kernel_version]
+
+        self.maintainers = ['TM']
+        self.tags = {'benchmark', 'diagnostic'}
+
+    @property
+    @sn.sanity_function
+    def num_tasks_assigned(self):
+        return self.job.num_tasks
@@ -0,0 +1,59 @@
+#include <iostream>
+#include <chrono>
+#include <ratio>
+#include <unistd.h>
+#include <cuda.h>
+
+__global__ void null_kernel() {
+};
+
+int main(int argc, char* argv[]) {
+
+    char hostname[256];
+    hostname[255]='\0';
+    gethostname(hostname, 255);
+
+    cudaError_t error;
+    int gpu_count = 0;
+
+    error = cudaGetDeviceCount(&gpu_count);
+
+    if (error == cudaSuccess) {
+        if (gpu_count <= 0) {
+            std::cout << "[" << hostname << "] " << "Could not find any gpu\n";
+            return 1;
+        }
+        std::cout << "[" << hostname << "] " << "Found " << gpu_count << " gpu(s)\n";
+    }
+    else{
+        std::cout << "[" << hostname << "] " << "Error getting gpu count, exiting...\n";
+        return 1;
+    }
+
+    for (int i = 0; i < gpu_count; i++) {
+
+        cudaSetDevice(i);
+        // Single kernel launch to initialize cuda runtime
+        null_kernel<<<1, 1>>>();
+
+        auto t_start = std::chrono::system_clock::now();
+        const int kernel_count = 1000;
+
+        for (int i = 0; i < kernel_count; ++i) {
+            null_kernel<<<1, 1>>>();
+            #if SYNCKERNEL == 1
+            cudaDeviceSynchronize();
+            #endif
+        }
+
+        #if SYNCKERNEL != 1
+        cudaDeviceSynchronize();
+        #endif
+
+        auto t_end = std::chrono::system_clock::now();
+        std::cout << "[" << hostname << "] " << "[gpu " << i << "] " <<  "Kernel launch latency: " << std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(t_end - t_start).count() / kernel_count << " us\n";
+    }
+
+    return 0;
+}
+
Original file line number	Diff line number	Diff line change
`@@ -61,4 +61,4 @@ For unsubscribing, you may send an empty message to [[email protected]`
`61`	`61`
`62`	`62`	`### Slack`
`63`	`63`
`64`		`-You may also reach the community through Slack at [reframetalk.slack.com](https://reframetalk.slack.com/join/signup). Currently, you may join the Slack workspace by invitation only, which you will get as soon as you subscribe to the mailing list.`
	`64`	`+You may also reach the community through Slack [here](https://reframe-slack.herokuapp.com).`