Skip to content

Commit d8517e5

Browse files
author
Vasileios Karakasis
authored
Merge pull request #776 from ajocksch/checks/fft
[test] Add simple FFTW benchmark
2 parents 12b7110 + 84fee46 commit d8517e5

File tree

2 files changed

+194
-0
lines changed

2 files changed

+194
-0
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import reframe as rfm
2+
import reframe.utility.sanity as sn
3+
4+
5+
@rfm.required_version('>=2.16-dev0')
6+
@rfm.parameterized_test(['nompi'], ['mpi'])
7+
class FFTWTest(rfm.RegressionTest):
8+
def __init__(self, exec_mode):
9+
super().__init__()
10+
self.sourcepath = 'fftw_benchmark.c'
11+
self.build_system = 'SingleSource'
12+
self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
13+
self.modules = ['cray-fftw']
14+
self.num_tasks_per_node = 12
15+
self.num_gpus_per_node = 0
16+
self.sanity_patterns = sn.assert_eq(
17+
sn.count(sn.findall(r'execution time', self.stdout)), 1)
18+
self.build_system.cflags = ['-O2']
19+
if self.current_system.name == 'kesch':
20+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
21+
self.build_system.cflags += ['-I$FFTW_INC', '-L$FFTW_DIR',
22+
'-lfftw3']
23+
elif self.current_system.name in {'daint', 'dom'}:
24+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi',
25+
'PrgEnv-gnu']
26+
27+
self.perf_patterns = {
28+
'fftw_exec_time': sn.extractsingle(
29+
r'execution time:\s+(?P<exec_time>\S+)', self.stdout,
30+
'exec_time', float),
31+
}
32+
33+
if exec_mode == 'nompi':
34+
self.num_tasks = 12
35+
self.executable_opts = ['72 12 1000 0']
36+
self.reference = {
37+
'dom:gpu': {
38+
'fftw_exec_time': (0.55, None, 0.05, 's'),
39+
},
40+
'daint:gpu': {
41+
'fftw_exec_time': (0.55, None, 0.05, 's'),
42+
},
43+
'kesch:cn': {
44+
'fftw_exec_time': (0.61, None, 0.05, 's'),
45+
},
46+
'*': {
47+
'fftw_exec_time': (0, None, None, 's'),
48+
}
49+
}
50+
else:
51+
self.num_tasks = 72
52+
self.executable_opts = ['144 72 200 1']
53+
self.reference = {
54+
'dom:gpu': {
55+
'fftw_exec_time': (0.47, None, 0.50, 's'),
56+
},
57+
'daint:gpu': {
58+
'fftw_exec_time': (0.47, None, 0.50, 's'),
59+
},
60+
'kesch:cn': {
61+
'fftw_exec_time': (1.58, None, 0.50, 's'),
62+
},
63+
'*': {
64+
'fftw_exec_time': (0, None, None, 's'),
65+
}
66+
}
67+
68+
self.maintainers = ['AJ']
69+
self.tags = {'benchmark', 'scs'}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#include <stdlib.h>
2+
#include <stdio.h>
3+
#include <string.h>
4+
#include <complex.h>
5+
#include <fftw3.h>
6+
#include <mpi.h>
7+
8+
fftw_complex *deri_temp_x, *deri_temp_y, *deri_temp_z;
9+
fftw_plan plan_forward_x, plan_backward_x, plan_forward_y, plan_backward_y, plan_forward_z, plan_backward_z;
10+
11+
void init_derivatives(double *func, double *deri, int npx, int npy, int npz, int npy2, int npz2){
12+
int nnn;
13+
deri_temp_x = (fftw_complex *) malloc(npy*npz*(npx/2+1)*sizeof(fftw_complex));
14+
deri_temp_y = (fftw_complex *) malloc(npx*(npy/2+1)*sizeof(fftw_complex));
15+
deri_temp_z = (fftw_complex *) malloc(npx*npy2*(npz2/2+1)*sizeof(fftw_complex));
16+
nnn = npx;
17+
plan_forward_x = fftw_plan_many_dft_r2c(1, &nnn, npy*npz, func, &nnn, 1, npx, deri_temp_x, &nnn, 1, npx/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
18+
nnn = npy;
19+
plan_forward_y = fftw_plan_many_dft_r2c(1, &nnn, npx, func, &nnn, npx, 1, deri_temp_y, &nnn, 1, npy/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
20+
nnn = npz2;
21+
plan_forward_z = fftw_plan_many_dft_r2c(1, &nnn, npx*npy2, func, &nnn, npx*npy2, 1, deri_temp_z, &nnn, 1, npz2/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
22+
nnn = npx;
23+
plan_backward_x = fftw_plan_many_dft_c2r(1, &nnn, npy*npz, deri_temp_x, &nnn, 1, npx/2+1, deri, &nnn, 1, npx, FFTW_MEASURE+FFTW_UNALIGNED);
24+
nnn = npy;
25+
plan_backward_y = fftw_plan_many_dft_c2r(1, &nnn, npx, deri_temp_y, &nnn, 1, npy/2+1, deri, &nnn, npx, 1, FFTW_MEASURE+FFTW_UNALIGNED);
26+
nnn = npz2;
27+
plan_backward_z = fftw_plan_many_dft_c2r(1, &nnn, npx*npy2, deri_temp_z, &nnn, 1, npz2/2+1, deri, &nnn, npx*npy2, 1, FFTW_MEASURE+FFTW_UNALIGNED);
28+
}
29+
30+
void done_derivatives(){
31+
fftw_destroy_plan(plan_backward_z);
32+
fftw_destroy_plan(plan_backward_y);
33+
fftw_destroy_plan(plan_backward_x);
34+
fftw_destroy_plan(plan_forward_z);
35+
fftw_destroy_plan(plan_forward_y);
36+
fftw_destroy_plan(plan_forward_x);
37+
free(deri_temp_z);
38+
free(deri_temp_y);
39+
free(deri_temp_x);
40+
}
41+
42+
void derivative_x1(double *func, double *deri, int npx, int npy, int npz){
43+
int i, jk;
44+
fftw_execute_dft_r2c(plan_forward_x, func, deri_temp_x);
45+
fftw_execute_dft_c2r(plan_backward_x, deri_temp_x, deri);
46+
}
47+
48+
void derivative_y1(double *func, double *deri, int npx, int npy, int npz){
49+
int i, j, k;
50+
for (k = 0; k<npz; k++){
51+
fftw_execute_dft_r2c(plan_forward_y, func+k*npy*npx, deri_temp_y);
52+
fftw_execute_dft_c2r(plan_backward_y, deri_temp_y, deri+k*npy*npx);
53+
}
54+
}
55+
56+
void derivative_z1(double *func, double *deri, int npx, int npy, int npz){
57+
int k, ij;
58+
fftw_execute_dft_r2c(plan_forward_z, func, deri_temp_z);
59+
fftw_execute_dft_c2r(plan_backward_z, deri_temp_z, deri);
60+
}
61+
62+
int main(int argc, char *argv[]){
63+
int mpi_size, mpi_rank;
64+
int npoints, nproc, iter, withmpi;
65+
double *fvalue, *dvalue;
66+
int npx, npy, npz, npy2, npz2;
67+
int i, j, k;
68+
double my_time;
69+
MPI_Init(&argc, &argv);
70+
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
71+
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
72+
if (argc != 5){
73+
if (mpi_rank == 0){
74+
printf("Usage: %s npoints nproc niter withmpi\n", argv[0]);
75+
}
76+
MPI_Finalize();
77+
exit(1);
78+
}
79+
npoints = atoi(argv[1]);
80+
nproc = atoi(argv[2]);
81+
iter = atoi(argv[3]);
82+
withmpi = atoi(argv[4]);
83+
if ((npoints <= 0) || (nproc <= 0) || (iter <= 0) || (withmpi < 0)){
84+
if (mpi_rank == 0){
85+
printf("%s: invalid input arguments\n", argv[0]);
86+
}
87+
MPI_Finalize();
88+
exit(1);
89+
}
90+
if (mpi_size != nproc){
91+
if (mpi_rank == 0){
92+
printf("number of MPI processes must be %d\n", nproc);
93+
}
94+
MPI_Finalize();
95+
exit(1);
96+
}
97+
npx = npy = npz2 = npoints;
98+
npz = npy2 = npoints/nproc;
99+
fvalue = (double *) malloc(npz*npy*npx*sizeof(double));
100+
dvalue = (double *) malloc(npz*npy*npx*sizeof(double));
101+
init_derivatives(fvalue, dvalue, npx, npy, npz, npy2, npz2);
102+
MPI_Barrier(MPI_COMM_WORLD);
103+
my_time = MPI_Wtime();
104+
for (i = 0; i<iter; i++){
105+
derivative_x1(fvalue, dvalue, npx, npy, npz);
106+
derivative_y1(fvalue, dvalue, npx, npy, npz);
107+
if (withmpi){
108+
MPI_Alltoall(fvalue, npx*npy2*npz, MPI_DOUBLE, dvalue, npx*npy2*npz, MPI_DOUBLE, MPI_COMM_WORLD);
109+
}
110+
derivative_z1(fvalue, dvalue, npx, npy, npz);
111+
if (withmpi){
112+
MPI_Alltoall(fvalue, npx*npy2*npz, MPI_DOUBLE, dvalue, npx*npy2*npz, MPI_DOUBLE, MPI_COMM_WORLD);
113+
}
114+
}
115+
my_time = MPI_Wtime()-my_time;
116+
if (mpi_rank == 0){
117+
MPI_Reduce(MPI_IN_PLACE, &my_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
118+
printf("npoints: %d nproc: %d iter: %d withmpi: %d execution time: %e\n", npoints, nproc, iter, withmpi, my_time);
119+
}else{
120+
MPI_Reduce(&my_time, &my_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
121+
}
122+
done_derivatives();
123+
MPI_Finalize();
124+
return(0);
125+
}

0 commit comments

Comments
 (0)