reframe-hpc
diff --git a/‎ci-scripts/genrelnotes.py‎
Lines changed: 1 addition & 1 deletion b/‎ci-scripts/genrelnotes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cscs-checks/microbenchmarks/fft/fftw_benchmark.py‎
Lines changed: 69 additions & 0 deletions b/‎cscs-checks/microbenchmarks/fft/fftw_benchmark.py‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c‎
Lines changed: 125 additions & 0 deletions b/‎cscs-checks/microbenchmarks/fft/src/fftw_benchmark.c‎
Lines changed: 125 additions & 0 deletions
diff --git a/‎cscs-checks/microbenchmarks/osu/osu_tests.py‎
Lines changed: 30 additions & 11 deletions b/‎cscs-checks/microbenchmarks/osu/osu_tests.py‎
Lines changed: 30 additions & 11 deletions
@@ -22,7 +22,7 @@ def extract_release_notes(git_output, tag):
 
     prev_release, curr_release, *_ = sys.argv[1:]
     try:
-        git_cmd = 'git log --merges v%s..v%s' % (prev_release, curr_release)
+        git_cmd = 'git log --merges %s..%s' % (prev_release, curr_release)
         completed = subprocess.run(git_cmd.split(),
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.STDOUT,
 
@@ -0,0 +1,69 @@
+import reframe as rfm
+import reframe.utility.sanity as sn
+
+
+@rfm.required_version('>=2.16-dev0')
+@rfm.parameterized_test(['nompi'], ['mpi'])
+class FFTWTest(rfm.RegressionTest):
+    def __init__(self, exec_mode):
+        super().__init__()
+        self.sourcepath = 'fftw_benchmark.c'
+        self.build_system = 'SingleSource'
+        self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
+        self.modules = ['cray-fftw']
+        self.num_tasks_per_node = 12
+        self.num_gpus_per_node = 0
+        self.sanity_patterns = sn.assert_eq(
+            sn.count(sn.findall(r'execution time', self.stdout)), 1)
+        self.build_system.cflags = ['-O2']
+        if self.current_system.name == 'kesch':
+            self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
+            self.build_system.cflags += ['-I$FFTW_INC', '-L$FFTW_DIR',
+                                         '-lfftw3']
+        elif self.current_system.name in {'daint', 'dom'}:
+            self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi',
+                                        'PrgEnv-gnu']
+
+        self.perf_patterns = {
+            'fftw_exec_time': sn.extractsingle(
+                r'execution time:\s+(?P<exec_time>\S+)', self.stdout,
+                'exec_time', float),
+        }
+
+        if exec_mode == 'nompi':
+            self.num_tasks = 12
+            self.executable_opts = ['72 12 1000 0']
+            self.reference = {
+                'dom:gpu': {
+                    'fftw_exec_time': (0.55, None, 0.05, 's'),
+                },
+                'daint:gpu': {
+                    'fftw_exec_time': (0.55, None, 0.05, 's'),
+                },
+                'kesch:cn': {
+                    'fftw_exec_time': (0.61, None, 0.05, 's'),
+                },
+                '*': {
+                    'fftw_exec_time': (0, None, None, 's'),
+                }
+            }
+        else:
+            self.num_tasks = 72
+            self.executable_opts = ['144 72 200 1']
+            self.reference = {
+                'dom:gpu': {
+                    'fftw_exec_time': (0.47, None, 0.50, 's'),
+                },
+                'daint:gpu': {
+                    'fftw_exec_time': (0.47, None, 0.50, 's'),
+                },
+                'kesch:cn': {
+                    'fftw_exec_time': (1.58, None, 0.50, 's'),
+                },
+                '*': {
+                    'fftw_exec_time': (0, None, None, 's'),
+                }
+            }
+
+        self.maintainers = ['AJ']
+        self.tags = {'benchmark', 'scs'}
@@ -0,0 +1,125 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <complex.h>
+#include <fftw3.h>
+#include <mpi.h>
+
+fftw_complex *deri_temp_x, *deri_temp_y, *deri_temp_z;
+fftw_plan plan_forward_x, plan_backward_x, plan_forward_y, plan_backward_y, plan_forward_z, plan_backward_z;
+
+void init_derivatives(double *func, double *deri, int npx, int npy, int npz, int npy2, int npz2){
+    int nnn;
+    deri_temp_x = (fftw_complex *) malloc(npy*npz*(npx/2+1)*sizeof(fftw_complex));
+    deri_temp_y = (fftw_complex *) malloc(npx*(npy/2+1)*sizeof(fftw_complex));
+    deri_temp_z = (fftw_complex *) malloc(npx*npy2*(npz2/2+1)*sizeof(fftw_complex));
+    nnn = npx;
+    plan_forward_x = fftw_plan_many_dft_r2c(1, &nnn, npy*npz, func, &nnn, 1, npx, deri_temp_x, &nnn, 1, npx/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
+    nnn = npy;
+    plan_forward_y = fftw_plan_many_dft_r2c(1, &nnn, npx, func, &nnn, npx, 1, deri_temp_y, &nnn, 1, npy/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
+    nnn = npz2;
+    plan_forward_z = fftw_plan_many_dft_r2c(1, &nnn, npx*npy2, func, &nnn, npx*npy2, 1, deri_temp_z, &nnn, 1, npz2/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
+    nnn = npx;
+    plan_backward_x = fftw_plan_many_dft_c2r(1, &nnn, npy*npz, deri_temp_x, &nnn, 1, npx/2+1, deri, &nnn, 1, npx, FFTW_MEASURE+FFTW_UNALIGNED);
+    nnn = npy;
+    plan_backward_y = fftw_plan_many_dft_c2r(1, &nnn, npx, deri_temp_y, &nnn, 1, npy/2+1, deri, &nnn, npx, 1, FFTW_MEASURE+FFTW_UNALIGNED);
+    nnn = npz2;
+    plan_backward_z = fftw_plan_many_dft_c2r(1, &nnn, npx*npy2, deri_temp_z, &nnn, 1, npz2/2+1, deri, &nnn, npx*npy2, 1, FFTW_MEASURE+FFTW_UNALIGNED);
+}
+
+void done_derivatives(){
+    fftw_destroy_plan(plan_backward_z);
+    fftw_destroy_plan(plan_backward_y);
+    fftw_destroy_plan(plan_backward_x);
+    fftw_destroy_plan(plan_forward_z);
+    fftw_destroy_plan(plan_forward_y);
+    fftw_destroy_plan(plan_forward_x);
+    free(deri_temp_z);
+    free(deri_temp_y);
+    free(deri_temp_x);
+}
+
+void derivative_x1(double *func, double *deri, int npx, int npy, int npz){
+    int i, jk;
+    fftw_execute_dft_r2c(plan_forward_x, func, deri_temp_x);
+    fftw_execute_dft_c2r(plan_backward_x, deri_temp_x, deri);
+}
+
+void derivative_y1(double *func, double *deri, int npx, int npy, int npz){
+    int i, j, k;
+    for (k = 0; k<npz; k++){
+        fftw_execute_dft_r2c(plan_forward_y, func+k*npy*npx, deri_temp_y);
+        fftw_execute_dft_c2r(plan_backward_y, deri_temp_y, deri+k*npy*npx);
+    }
+}
+
+void derivative_z1(double *func, double *deri, int npx, int npy, int npz){
+    int k, ij;
+    fftw_execute_dft_r2c(plan_forward_z, func, deri_temp_z);
+    fftw_execute_dft_c2r(plan_backward_z, deri_temp_z, deri);
+}
+
+int main(int argc, char *argv[]){
+    int mpi_size, mpi_rank;
+    int npoints, nproc, iter, withmpi;
+    double *fvalue, *dvalue;
+    int npx, npy, npz, npy2, npz2;
+    int i, j, k;
+    double my_time;
+    MPI_Init(&argc, &argv);
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
+    if (argc != 5){
+        if (mpi_rank == 0){
+            printf("Usage: %s npoints nproc niter withmpi\n", argv[0]);
+        }
+        MPI_Finalize();
+        exit(1);
+    }
+    npoints = atoi(argv[1]);
+    nproc = atoi(argv[2]);
+    iter = atoi(argv[3]);
+    withmpi = atoi(argv[4]);
+    if ((npoints <= 0) || (nproc <= 0) || (iter <= 0) || (withmpi < 0)){
+        if (mpi_rank == 0){
+            printf("%s: invalid input arguments\n", argv[0]);
+        }
+        MPI_Finalize();
+        exit(1);
+    }
+    if (mpi_size != nproc){
+        if (mpi_rank == 0){
+            printf("number of MPI processes must be %d\n", nproc);
+        }
+        MPI_Finalize();
+        exit(1);
+    }
+    npx = npy = npz2 = npoints;
+    npz = npy2 = npoints/nproc;
+    fvalue = (double *) malloc(npz*npy*npx*sizeof(double));
+    dvalue = (double *) malloc(npz*npy*npx*sizeof(double));
+    init_derivatives(fvalue, dvalue, npx, npy, npz, npy2, npz2);
+    MPI_Barrier(MPI_COMM_WORLD);
+    my_time = MPI_Wtime();
+    for (i = 0; i<iter; i++){
+        derivative_x1(fvalue, dvalue, npx, npy, npz);
+        derivative_y1(fvalue, dvalue, npx, npy, npz);
+        if (withmpi){
+            MPI_Alltoall(fvalue, npx*npy2*npz, MPI_DOUBLE, dvalue, npx*npy2*npz, MPI_DOUBLE, MPI_COMM_WORLD);
+        }
+        derivative_z1(fvalue, dvalue, npx, npy, npz);
+        if (withmpi){
+            MPI_Alltoall(fvalue, npx*npy2*npz, MPI_DOUBLE, dvalue, npx*npy2*npz, MPI_DOUBLE, MPI_COMM_WORLD);
+        }
+    }
+    my_time = MPI_Wtime()-my_time;
+    if (mpi_rank == 0){
+        MPI_Reduce(MPI_IN_PLACE, &my_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+        printf("npoints: %d nproc: %d iter: %d withmpi: %d execution time: %e\n", npoints, nproc, iter, withmpi, my_time);
+    }else{
+        MPI_Reduce(&my_time, &my_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
+    }
+    done_derivatives();
+    MPI_Finalize();
+    return(0);
+}
@@ -13,8 +13,9 @@ def __init__(self, variant):
         self.build_system = 'Make'
         self.build_system.makefile = 'Makefile_alltoall'
         self.executable = './osu_alltoall'
-        # The -x option controls the number of warm-up iterations
-        # The -i option controls the number of iterations
+        # The -m option sets the maximum message size
+        # The -x option sets the number of warm-up iterations
+        # The -i option sets the number of iterations
         self.executable_opts = ['-m', '8', '-x', '1000', '-i', '20000']
         self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu',
                                     'PrgEnv-intel']
@@ -24,22 +25,24 @@ def __init__(self, variant):
             'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
                                         self.stdout, 'latency', float)
         }
-        self.tags = {variant}
+        self.tags = {variant, 'benchmark'}
         self.reference = {
             'dom:gpu': {
                 'latency': (8.23, None, 0.1, 'us')
             },
             'daint:gpu': {
                 'latency': (20.73, None, 2.0, 'us')
             },
+            '*': {
+                'latency': (0, None, None, 'us')
+            },
         }
         self.num_tasks_per_node = 1
         self.num_gpus_per_node  = 1
-        if self.current_system.name == 'dom':
-            self.num_tasks = 6
-
         if self.current_system.name == 'daint':
             self.num_tasks = 16
+        else:
+            self.num_tasks = 6
 
         self.extra_resources = {
             'switches': {
@@ -69,7 +72,7 @@ def __init__(self):
         self.num_tasks_per_node = 1
         self.num_tasks = 0
         self.sanity_patterns = sn.assert_found(r'^1048576', self.stdout)
-        self.tags = {'diagnostic', 'ops'}
+        self.tags = {'diagnostic', 'ops', 'benchmark'}
 
 
 @rfm.required_version('>=2.16')
@@ -96,7 +99,7 @@ def __init__(self, variant):
             'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
                                         self.stdout, 'latency', float)
         }
-        self.tags = {'production'}
+        self.tags = {'production', 'benchmark'}
         if variant == 'small':
             self.num_tasks = 6
             self.reference = {
@@ -108,6 +111,9 @@ def __init__(self, variant):
                 },
                 'daint:mc': {
                     'latency': (8.79, None, 0.25, 'us')
+                },
+                '*': {
+                    'latency': (0, None, None, 'us')
                 }
             }
         else:
@@ -118,11 +124,12 @@ def __init__(self, variant):
                 },
                 'daint:mc': {
                     'latency': (10.85, None, 0.20, 'us')
+                },
+                '*': {
+                    'latency': (0, None, None, 'us')
                 }
             }
 
-        # Allow test to run on new systems without errors
-        self.reference['*:latency'] = (0, None, None, 'us')
         self.num_tasks_per_node = 1
         self.num_gpus_per_node  = 1
         self.extra_resources = {
@@ -175,7 +182,7 @@ def __init__(self):
             self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu',
                                         'PrgEnv-intel']
         self.maintainers = ['RS', 'VK']
-        self.tags = {'production'}
+        self.tags = {'production', 'benchmark'}
         self.sanity_patterns = sn.assert_found(r'^4194304', self.stdout)
 
         self.extra_resources = {
@@ -213,6 +220,9 @@ def __init__(self):
             },
             'kesch:cn': {
                 'bw': (6311.48, -0.15, None, 'MB/s')
+            },
+            '*': {
+                'bw': (0, None, None, 'MB/s')
             }
         }
         self.perf_patterns = {
@@ -250,6 +260,9 @@ def __init__(self):
             },
             'kesch:cn': {
                 'latency': (1.17, None, 0.1, 'us')
+            },
+            '*': {
+                'latency': (0, None, None, 'us')
             }
         }
         self.perf_patterns = {
@@ -280,6 +293,9 @@ def __init__(self):
             'kesch:cn': {
                 'bw': (6288.98, -0.1, None, 'MB/s')
             },
+            '*': {
+                'bw': (0, None, None, 'MB/s')
+            }
         }
         self.perf_patterns = {
             'bw': sn.extractsingle(r'^4194304\s+(?P<bw>\S+)',
@@ -317,6 +333,9 @@ def __init__(self):
             'kesch:cn': {
                 'latency': (23.09, None, 0.1, 'us')
             },
+            '*': {
+                'latency': (0, None, None, 'us')
+            }
         }
         self.perf_patterns = {
             'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',