Skip to content

Commit 1a530cb

Browse files
author
Vasileios Karakasis
authored
Merge branch 'master' into refactor/fix-abstract-classes
2 parents a2e6cbf + 1a7ff13 commit 1a530cb

File tree

11 files changed

+582
-17
lines changed

11 files changed

+582
-17
lines changed

ci-scripts/genrelnotes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def extract_release_notes(git_output, tag):
2222

2323
prev_release, curr_release, *_ = sys.argv[1:]
2424
try:
25-
git_cmd = 'git log --merges v%s..v%s' % (prev_release, curr_release)
25+
git_cmd = 'git log --merges %s..%s' % (prev_release, curr_release)
2626
completed = subprocess.run(git_cmd.split(),
2727
stdout=subprocess.PIPE,
2828
stderr=subprocess.STDOUT,
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import reframe as rfm
2+
import reframe.utility.sanity as sn
3+
4+
5+
@rfm.required_version('>=2.16-dev0')
6+
@rfm.parameterized_test(['nompi'], ['mpi'])
7+
class FFTWTest(rfm.RegressionTest):
8+
def __init__(self, exec_mode):
9+
super().__init__()
10+
self.sourcepath = 'fftw_benchmark.c'
11+
self.build_system = 'SingleSource'
12+
self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
13+
self.modules = ['cray-fftw']
14+
self.num_tasks_per_node = 12
15+
self.num_gpus_per_node = 0
16+
self.sanity_patterns = sn.assert_eq(
17+
sn.count(sn.findall(r'execution time', self.stdout)), 1)
18+
self.build_system.cflags = ['-O2']
19+
if self.current_system.name == 'kesch':
20+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
21+
self.build_system.cflags += ['-I$FFTW_INC', '-L$FFTW_DIR',
22+
'-lfftw3']
23+
elif self.current_system.name in {'daint', 'dom'}:
24+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi',
25+
'PrgEnv-gnu']
26+
27+
self.perf_patterns = {
28+
'fftw_exec_time': sn.extractsingle(
29+
r'execution time:\s+(?P<exec_time>\S+)', self.stdout,
30+
'exec_time', float),
31+
}
32+
33+
if exec_mode == 'nompi':
34+
self.num_tasks = 12
35+
self.executable_opts = ['72 12 1000 0']
36+
self.reference = {
37+
'dom:gpu': {
38+
'fftw_exec_time': (0.55, None, 0.05, 's'),
39+
},
40+
'daint:gpu': {
41+
'fftw_exec_time': (0.55, None, 0.05, 's'),
42+
},
43+
'kesch:cn': {
44+
'fftw_exec_time': (0.61, None, 0.05, 's'),
45+
},
46+
'*': {
47+
'fftw_exec_time': (0, None, None, 's'),
48+
}
49+
}
50+
else:
51+
self.num_tasks = 72
52+
self.executable_opts = ['144 72 200 1']
53+
self.reference = {
54+
'dom:gpu': {
55+
'fftw_exec_time': (0.47, None, 0.50, 's'),
56+
},
57+
'daint:gpu': {
58+
'fftw_exec_time': (0.47, None, 0.50, 's'),
59+
},
60+
'kesch:cn': {
61+
'fftw_exec_time': (1.58, None, 0.50, 's'),
62+
},
63+
'*': {
64+
'fftw_exec_time': (0, None, None, 's'),
65+
}
66+
}
67+
68+
self.maintainers = ['AJ']
69+
self.tags = {'benchmark', 'scs'}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#include <stdlib.h>
2+
#include <stdio.h>
3+
#include <string.h>
4+
#include <complex.h>
5+
#include <fftw3.h>
6+
#include <mpi.h>
7+
8+
fftw_complex *deri_temp_x, *deri_temp_y, *deri_temp_z;
9+
fftw_plan plan_forward_x, plan_backward_x, plan_forward_y, plan_backward_y, plan_forward_z, plan_backward_z;
10+
11+
void init_derivatives(double *func, double *deri, int npx, int npy, int npz, int npy2, int npz2){
12+
int nnn;
13+
deri_temp_x = (fftw_complex *) malloc(npy*npz*(npx/2+1)*sizeof(fftw_complex));
14+
deri_temp_y = (fftw_complex *) malloc(npx*(npy/2+1)*sizeof(fftw_complex));
15+
deri_temp_z = (fftw_complex *) malloc(npx*npy2*(npz2/2+1)*sizeof(fftw_complex));
16+
nnn = npx;
17+
plan_forward_x = fftw_plan_many_dft_r2c(1, &nnn, npy*npz, func, &nnn, 1, npx, deri_temp_x, &nnn, 1, npx/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
18+
nnn = npy;
19+
plan_forward_y = fftw_plan_many_dft_r2c(1, &nnn, npx, func, &nnn, npx, 1, deri_temp_y, &nnn, 1, npy/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
20+
nnn = npz2;
21+
plan_forward_z = fftw_plan_many_dft_r2c(1, &nnn, npx*npy2, func, &nnn, npx*npy2, 1, deri_temp_z, &nnn, 1, npz2/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
22+
nnn = npx;
23+
plan_backward_x = fftw_plan_many_dft_c2r(1, &nnn, npy*npz, deri_temp_x, &nnn, 1, npx/2+1, deri, &nnn, 1, npx, FFTW_MEASURE+FFTW_UNALIGNED);
24+
nnn = npy;
25+
plan_backward_y = fftw_plan_many_dft_c2r(1, &nnn, npx, deri_temp_y, &nnn, 1, npy/2+1, deri, &nnn, npx, 1, FFTW_MEASURE+FFTW_UNALIGNED);
26+
nnn = npz2;
27+
plan_backward_z = fftw_plan_many_dft_c2r(1, &nnn, npx*npy2, deri_temp_z, &nnn, 1, npz2/2+1, deri, &nnn, npx*npy2, 1, FFTW_MEASURE+FFTW_UNALIGNED);
28+
}
29+
30+
void done_derivatives(){
31+
fftw_destroy_plan(plan_backward_z);
32+
fftw_destroy_plan(plan_backward_y);
33+
fftw_destroy_plan(plan_backward_x);
34+
fftw_destroy_plan(plan_forward_z);
35+
fftw_destroy_plan(plan_forward_y);
36+
fftw_destroy_plan(plan_forward_x);
37+
free(deri_temp_z);
38+
free(deri_temp_y);
39+
free(deri_temp_x);
40+
}
41+
42+
void derivative_x1(double *func, double *deri, int npx, int npy, int npz){
43+
int i, jk;
44+
fftw_execute_dft_r2c(plan_forward_x, func, deri_temp_x);
45+
fftw_execute_dft_c2r(plan_backward_x, deri_temp_x, deri);
46+
}
47+
48+
void derivative_y1(double *func, double *deri, int npx, int npy, int npz){
49+
int i, j, k;
50+
for (k = 0; k<npz; k++){
51+
fftw_execute_dft_r2c(plan_forward_y, func+k*npy*npx, deri_temp_y);
52+
fftw_execute_dft_c2r(plan_backward_y, deri_temp_y, deri+k*npy*npx);
53+
}
54+
}
55+
56+
void derivative_z1(double *func, double *deri, int npx, int npy, int npz){
57+
int k, ij;
58+
fftw_execute_dft_r2c(plan_forward_z, func, deri_temp_z);
59+
fftw_execute_dft_c2r(plan_backward_z, deri_temp_z, deri);
60+
}
61+
62+
int main(int argc, char *argv[]){
63+
int mpi_size, mpi_rank;
64+
int npoints, nproc, iter, withmpi;
65+
double *fvalue, *dvalue;
66+
int npx, npy, npz, npy2, npz2;
67+
int i, j, k;
68+
double my_time;
69+
MPI_Init(&argc, &argv);
70+
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
71+
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
72+
if (argc != 5){
73+
if (mpi_rank == 0){
74+
printf("Usage: %s npoints nproc niter withmpi\n", argv[0]);
75+
}
76+
MPI_Finalize();
77+
exit(1);
78+
}
79+
npoints = atoi(argv[1]);
80+
nproc = atoi(argv[2]);
81+
iter = atoi(argv[3]);
82+
withmpi = atoi(argv[4]);
83+
if ((npoints <= 0) || (nproc <= 0) || (iter <= 0) || (withmpi < 0)){
84+
if (mpi_rank == 0){
85+
printf("%s: invalid input arguments\n", argv[0]);
86+
}
87+
MPI_Finalize();
88+
exit(1);
89+
}
90+
if (mpi_size != nproc){
91+
if (mpi_rank == 0){
92+
printf("number of MPI processes must be %d\n", nproc);
93+
}
94+
MPI_Finalize();
95+
exit(1);
96+
}
97+
npx = npy = npz2 = npoints;
98+
npz = npy2 = npoints/nproc;
99+
fvalue = (double *) malloc(npz*npy*npx*sizeof(double));
100+
dvalue = (double *) malloc(npz*npy*npx*sizeof(double));
101+
init_derivatives(fvalue, dvalue, npx, npy, npz, npy2, npz2);
102+
MPI_Barrier(MPI_COMM_WORLD);
103+
my_time = MPI_Wtime();
104+
for (i = 0; i<iter; i++){
105+
derivative_x1(fvalue, dvalue, npx, npy, npz);
106+
derivative_y1(fvalue, dvalue, npx, npy, npz);
107+
if (withmpi){
108+
MPI_Alltoall(fvalue, npx*npy2*npz, MPI_DOUBLE, dvalue, npx*npy2*npz, MPI_DOUBLE, MPI_COMM_WORLD);
109+
}
110+
derivative_z1(fvalue, dvalue, npx, npy, npz);
111+
if (withmpi){
112+
MPI_Alltoall(fvalue, npx*npy2*npz, MPI_DOUBLE, dvalue, npx*npy2*npz, MPI_DOUBLE, MPI_COMM_WORLD);
113+
}
114+
}
115+
my_time = MPI_Wtime()-my_time;
116+
if (mpi_rank == 0){
117+
MPI_Reduce(MPI_IN_PLACE, &my_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
118+
printf("npoints: %d nproc: %d iter: %d withmpi: %d execution time: %e\n", npoints, nproc, iter, withmpi, my_time);
119+
}else{
120+
MPI_Reduce(&my_time, &my_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
121+
}
122+
done_derivatives();
123+
MPI_Finalize();
124+
return(0);
125+
}

cscs-checks/microbenchmarks/osu/osu_tests.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ def __init__(self, variant):
1313
self.build_system = 'Make'
1414
self.build_system.makefile = 'Makefile_alltoall'
1515
self.executable = './osu_alltoall'
16-
# The -x option controls the number of warm-up iterations
17-
# The -i option controls the number of iterations
16+
# The -m option sets the maximum message size
17+
# The -x option sets the number of warm-up iterations
18+
# The -i option sets the number of iterations
1819
self.executable_opts = ['-m', '8', '-x', '1000', '-i', '20000']
1920
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu',
2021
'PrgEnv-intel']
@@ -24,22 +25,24 @@ def __init__(self, variant):
2425
'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
2526
self.stdout, 'latency', float)
2627
}
27-
self.tags = {variant}
28+
self.tags = {variant, 'benchmark'}
2829
self.reference = {
2930
'dom:gpu': {
3031
'latency': (8.23, None, 0.1, 'us')
3132
},
3233
'daint:gpu': {
3334
'latency': (20.73, None, 2.0, 'us')
3435
},
36+
'*': {
37+
'latency': (0, None, None, 'us')
38+
},
3539
}
3640
self.num_tasks_per_node = 1
3741
self.num_gpus_per_node = 1
38-
if self.current_system.name == 'dom':
39-
self.num_tasks = 6
40-
4142
if self.current_system.name == 'daint':
4243
self.num_tasks = 16
44+
else:
45+
self.num_tasks = 6
4346

4447
self.extra_resources = {
4548
'switches': {
@@ -69,7 +72,7 @@ def __init__(self):
6972
self.num_tasks_per_node = 1
7073
self.num_tasks = 0
7174
self.sanity_patterns = sn.assert_found(r'^1048576', self.stdout)
72-
self.tags = {'diagnostic', 'ops'}
75+
self.tags = {'diagnostic', 'ops', 'benchmark'}
7376

7477

7578
@rfm.required_version('>=2.16')
@@ -96,7 +99,7 @@ def __init__(self, variant):
9699
'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',
97100
self.stdout, 'latency', float)
98101
}
99-
self.tags = {'production'}
102+
self.tags = {'production', 'benchmark'}
100103
if variant == 'small':
101104
self.num_tasks = 6
102105
self.reference = {
@@ -108,6 +111,9 @@ def __init__(self, variant):
108111
},
109112
'daint:mc': {
110113
'latency': (8.79, None, 0.25, 'us')
114+
},
115+
'*': {
116+
'latency': (0, None, None, 'us')
111117
}
112118
}
113119
else:
@@ -118,11 +124,12 @@ def __init__(self, variant):
118124
},
119125
'daint:mc': {
120126
'latency': (10.85, None, 0.20, 'us')
127+
},
128+
'*': {
129+
'latency': (0, None, None, 'us')
121130
}
122131
}
123132

124-
# Allow test to run on new systems without errors
125-
self.reference['*:latency'] = (0, None, None, 'us')
126133
self.num_tasks_per_node = 1
127134
self.num_gpus_per_node = 1
128135
self.extra_resources = {
@@ -175,7 +182,7 @@ def __init__(self):
175182
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu',
176183
'PrgEnv-intel']
177184
self.maintainers = ['RS', 'VK']
178-
self.tags = {'production'}
185+
self.tags = {'production', 'benchmark'}
179186
self.sanity_patterns = sn.assert_found(r'^4194304', self.stdout)
180187

181188
self.extra_resources = {
@@ -213,6 +220,9 @@ def __init__(self):
213220
},
214221
'kesch:cn': {
215222
'bw': (6311.48, -0.15, None, 'MB/s')
223+
},
224+
'*': {
225+
'bw': (0, None, None, 'MB/s')
216226
}
217227
}
218228
self.perf_patterns = {
@@ -250,6 +260,9 @@ def __init__(self):
250260
},
251261
'kesch:cn': {
252262
'latency': (1.17, None, 0.1, 'us')
263+
},
264+
'*': {
265+
'latency': (0, None, None, 'us')
253266
}
254267
}
255268
self.perf_patterns = {
@@ -280,6 +293,9 @@ def __init__(self):
280293
'kesch:cn': {
281294
'bw': (6288.98, -0.1, None, 'MB/s')
282295
},
296+
'*': {
297+
'bw': (0, None, None, 'MB/s')
298+
}
283299
}
284300
self.perf_patterns = {
285301
'bw': sn.extractsingle(r'^4194304\s+(?P<bw>\S+)',
@@ -317,6 +333,9 @@ def __init__(self):
317333
'kesch:cn': {
318334
'latency': (23.09, None, 0.1, 'us')
319335
},
336+
'*': {
337+
'latency': (0, None, None, 'us')
338+
}
320339
}
321340
self.perf_patterns = {
322341
'latency': sn.extractsingle(r'^8\s+(?P<latency>\S+)',

0 commit comments

Comments
 (0)