Skip to content

Commit 1521c31

Browse files
author
Vasileios Karakasis
authored
Merge branch 'master' into containerplatform
2 parents 15826e9 + d8517e5 commit 1521c31

File tree

29 files changed

+1194
-115
lines changed

29 files changed

+1194
-115
lines changed

ci-scripts/genrelnotes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def extract_release_notes(git_output, tag):
2222

2323
prev_release, curr_release, *_ = sys.argv[1:]
2424
try:
25-
git_cmd = 'git log --merges v%s..v%s' % (prev_release, curr_release)
25+
git_cmd = 'git log --merges %s..%s' % (prev_release, curr_release)
2626
completed = subprocess.run(git_cmd.split(),
2727
stdout=subprocess.PIPE,
2828
stderr=subprocess.STDOUT,

config/cscs.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,49 @@ class ReframeSettings:
1111
checks_path_recurse = True
1212
site_configuration = {
1313
'systems': {
14+
'ault': {
15+
'descr': 'Ault TDS',
16+
'hostnames': ['ault'],
17+
'modules_system': 'lmod',
18+
'resourcesdir': '/apps/common/UES/reframe/resources',
19+
'partitions': {
20+
'login': {
21+
'scheduler': 'local',
22+
'environs': ['PrgEnv-gnu'],
23+
'descr': 'Login nodes',
24+
'max_jobs': 4
25+
},
26+
'amdv100': {
27+
'scheduler': 'nativeslurm',
28+
'access': ['-pamdv100'],
29+
'environs': ['PrgEnv-gnu'],
30+
'descr': 'AMD Naples 32c + 2x NVIDIA V100',
31+
'max_jobs': 100,
32+
},
33+
'amdvega': {
34+
'scheduler': 'nativeslurm',
35+
'access': ['-pamdvega'],
36+
'environs': ['PrgEnv-gnu'],
37+
'descr': 'AMD Naples 32c + 3x AMD GFX900',
38+
'max_jobs': 100,
39+
},
40+
'intelv100': {
41+
'scheduler': 'nativeslurm',
42+
'access': ['-pintelv100'],
43+
'environs': ['PrgEnv-gnu'],
44+
'descr': 'Intel Skylake 36c + 4x NVIDIA V100',
45+
'max_jobs': 100,
46+
},
47+
'intel': {
48+
'scheduler': 'nativeslurm',
49+
'access': ['-pintel'],
50+
'environs': ['PrgEnv-gnu'],
51+
'descr': 'Intel Skylake 36c',
52+
'max_jobs': 100,
53+
}
54+
}
55+
},
56+
1457
'daint': {
1558
'descr': 'Piz Daint',
1659
'hostnames': ['daint'],
@@ -244,6 +287,30 @@ class ReframeSettings:
244287
},
245288

246289
'environments': {
290+
291+
'ault': {
292+
'PrgEnv-gnu': {
293+
'type': 'ProgEnvironment',
294+
# defaults were gcc/8.3.0, cuda/10.1, openmpi/4.0.0
295+
'modules': ['gcc', 'cuda/10.1', 'openmpi'],
296+
'cc': 'mpicc',
297+
'cxx': 'mpicxx',
298+
'ftn': 'mpif90',
299+
},
300+
'builtin': {
301+
'type': 'ProgEnvironment',
302+
'cc': 'cc',
303+
'cxx': '',
304+
'ftn': '',
305+
},
306+
'builtin-gcc': {
307+
'type': 'ProgEnvironment',
308+
'cc': 'gcc',
309+
'cxx': 'g++',
310+
'ftn': 'gfortran',
311+
}
312+
},
313+
247314
'kesch': {
248315
'PrgEnv-pgi-nompi': {
249316
'type': 'ProgEnvironment',
@@ -327,6 +394,7 @@ class ReframeSettings:
327394
'ftn': 'mpif90',
328395
},
329396
},
397+
330398
'leone': {
331399
'PrgEnv-gnu': {
332400
'type': 'ProgEnvironment',
@@ -336,6 +404,7 @@ class ReframeSettings:
336404
'ftn': 'mpif90',
337405
},
338406
},
407+
339408
'monch': {
340409
'PrgEnv-gnu': {
341410
'type': 'ProgEnvironment',
@@ -345,6 +414,7 @@ class ReframeSettings:
345414
'ftn': 'mpif90',
346415
}
347416
},
417+
348418
'*': {
349419
'PrgEnv-cray': {
350420
'type': 'ProgEnvironment',

cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ def __init__(self, hugepages):
1616
if hugepages == 'no':
1717
self.valid_systems += ['kesch:cn', 'kesch:pn']
1818
else:
19-
self.modules = ['craype-hugepages%s' % hugepages]
19+
if self.current_system.name in {'dom', 'daint'}:
20+
self.modules = ['craype-hugepages%s' % hugepages]
2021

2122
self.sanity_patterns = sn.assert_found('4096 MB', self.stdout)
2223
self.perf_patterns = {
@@ -42,6 +43,9 @@ def __init__(self, hugepages):
4243
},
4344
'kesch:pn': {
4445
'time': (0.55, None, 0.10, 's')
46+
},
47+
'*': {
48+
'time': (0, None, None, 's')
4549
}
4650
},
4751
'2M': {
@@ -57,6 +61,9 @@ def __init__(self, hugepages):
5761
'daint:mc': {
5862
'time': (0.20, None, 0.10, 's')
5963
},
64+
'*': {
65+
'time': (0, None, None, 's')
66+
}
6067
},
6168
}
6269
self.reference = self.sys_reference[hugepages]
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import reframe as rfm
2+
import reframe.utility.sanity as sn
3+
4+
5+
@rfm.required_version('>=2.16-dev0')
6+
@rfm.parameterized_test(['nompi'], ['mpi'])
7+
class FFTWTest(rfm.RegressionTest):
8+
def __init__(self, exec_mode):
9+
super().__init__()
10+
self.sourcepath = 'fftw_benchmark.c'
11+
self.build_system = 'SingleSource'
12+
self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
13+
self.modules = ['cray-fftw']
14+
self.num_tasks_per_node = 12
15+
self.num_gpus_per_node = 0
16+
self.sanity_patterns = sn.assert_eq(
17+
sn.count(sn.findall(r'execution time', self.stdout)), 1)
18+
self.build_system.cflags = ['-O2']
19+
if self.current_system.name == 'kesch':
20+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
21+
self.build_system.cflags += ['-I$FFTW_INC', '-L$FFTW_DIR',
22+
'-lfftw3']
23+
elif self.current_system.name in {'daint', 'dom'}:
24+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi',
25+
'PrgEnv-gnu']
26+
27+
self.perf_patterns = {
28+
'fftw_exec_time': sn.extractsingle(
29+
r'execution time:\s+(?P<exec_time>\S+)', self.stdout,
30+
'exec_time', float),
31+
}
32+
33+
if exec_mode == 'nompi':
34+
self.num_tasks = 12
35+
self.executable_opts = ['72 12 1000 0']
36+
self.reference = {
37+
'dom:gpu': {
38+
'fftw_exec_time': (0.55, None, 0.05, 's'),
39+
},
40+
'daint:gpu': {
41+
'fftw_exec_time': (0.55, None, 0.05, 's'),
42+
},
43+
'kesch:cn': {
44+
'fftw_exec_time': (0.61, None, 0.05, 's'),
45+
},
46+
'*': {
47+
'fftw_exec_time': (0, None, None, 's'),
48+
}
49+
}
50+
else:
51+
self.num_tasks = 72
52+
self.executable_opts = ['144 72 200 1']
53+
self.reference = {
54+
'dom:gpu': {
55+
'fftw_exec_time': (0.47, None, 0.50, 's'),
56+
},
57+
'daint:gpu': {
58+
'fftw_exec_time': (0.47, None, 0.50, 's'),
59+
},
60+
'kesch:cn': {
61+
'fftw_exec_time': (1.58, None, 0.50, 's'),
62+
},
63+
'*': {
64+
'fftw_exec_time': (0, None, None, 's'),
65+
}
66+
}
67+
68+
self.maintainers = ['AJ']
69+
self.tags = {'benchmark', 'scs'}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#include <stdlib.h>
2+
#include <stdio.h>
3+
#include <string.h>
4+
#include <complex.h>
5+
#include <fftw3.h>
6+
#include <mpi.h>
7+
8+
fftw_complex *deri_temp_x, *deri_temp_y, *deri_temp_z;
9+
fftw_plan plan_forward_x, plan_backward_x, plan_forward_y, plan_backward_y, plan_forward_z, plan_backward_z;
10+
11+
void init_derivatives(double *func, double *deri, int npx, int npy, int npz, int npy2, int npz2){
12+
int nnn;
13+
deri_temp_x = (fftw_complex *) malloc(npy*npz*(npx/2+1)*sizeof(fftw_complex));
14+
deri_temp_y = (fftw_complex *) malloc(npx*(npy/2+1)*sizeof(fftw_complex));
15+
deri_temp_z = (fftw_complex *) malloc(npx*npy2*(npz2/2+1)*sizeof(fftw_complex));
16+
nnn = npx;
17+
plan_forward_x = fftw_plan_many_dft_r2c(1, &nnn, npy*npz, func, &nnn, 1, npx, deri_temp_x, &nnn, 1, npx/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
18+
nnn = npy;
19+
plan_forward_y = fftw_plan_many_dft_r2c(1, &nnn, npx, func, &nnn, npx, 1, deri_temp_y, &nnn, 1, npy/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
20+
nnn = npz2;
21+
plan_forward_z = fftw_plan_many_dft_r2c(1, &nnn, npx*npy2, func, &nnn, npx*npy2, 1, deri_temp_z, &nnn, 1, npz2/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
22+
nnn = npx;
23+
plan_backward_x = fftw_plan_many_dft_c2r(1, &nnn, npy*npz, deri_temp_x, &nnn, 1, npx/2+1, deri, &nnn, 1, npx, FFTW_MEASURE+FFTW_UNALIGNED);
24+
nnn = npy;
25+
plan_backward_y = fftw_plan_many_dft_c2r(1, &nnn, npx, deri_temp_y, &nnn, 1, npy/2+1, deri, &nnn, npx, 1, FFTW_MEASURE+FFTW_UNALIGNED);
26+
nnn = npz2;
27+
plan_backward_z = fftw_plan_many_dft_c2r(1, &nnn, npx*npy2, deri_temp_z, &nnn, 1, npz2/2+1, deri, &nnn, npx*npy2, 1, FFTW_MEASURE+FFTW_UNALIGNED);
28+
}
29+
30+
void done_derivatives(){
31+
fftw_destroy_plan(plan_backward_z);
32+
fftw_destroy_plan(plan_backward_y);
33+
fftw_destroy_plan(plan_backward_x);
34+
fftw_destroy_plan(plan_forward_z);
35+
fftw_destroy_plan(plan_forward_y);
36+
fftw_destroy_plan(plan_forward_x);
37+
free(deri_temp_z);
38+
free(deri_temp_y);
39+
free(deri_temp_x);
40+
}
41+
42+
void derivative_x1(double *func, double *deri, int npx, int npy, int npz){
43+
int i, jk;
44+
fftw_execute_dft_r2c(plan_forward_x, func, deri_temp_x);
45+
fftw_execute_dft_c2r(plan_backward_x, deri_temp_x, deri);
46+
}
47+
48+
void derivative_y1(double *func, double *deri, int npx, int npy, int npz){
49+
int i, j, k;
50+
for (k = 0; k<npz; k++){
51+
fftw_execute_dft_r2c(plan_forward_y, func+k*npy*npx, deri_temp_y);
52+
fftw_execute_dft_c2r(plan_backward_y, deri_temp_y, deri+k*npy*npx);
53+
}
54+
}
55+
56+
void derivative_z1(double *func, double *deri, int npx, int npy, int npz){
57+
int k, ij;
58+
fftw_execute_dft_r2c(plan_forward_z, func, deri_temp_z);
59+
fftw_execute_dft_c2r(plan_backward_z, deri_temp_z, deri);
60+
}
61+
62+
int main(int argc, char *argv[]){
63+
int mpi_size, mpi_rank;
64+
int npoints, nproc, iter, withmpi;
65+
double *fvalue, *dvalue;
66+
int npx, npy, npz, npy2, npz2;
67+
int i, j, k;
68+
double my_time;
69+
MPI_Init(&argc, &argv);
70+
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
71+
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
72+
if (argc != 5){
73+
if (mpi_rank == 0){
74+
printf("Usage: %s npoints nproc niter withmpi\n", argv[0]);
75+
}
76+
MPI_Finalize();
77+
exit(1);
78+
}
79+
npoints = atoi(argv[1]);
80+
nproc = atoi(argv[2]);
81+
iter = atoi(argv[3]);
82+
withmpi = atoi(argv[4]);
83+
if ((npoints <= 0) || (nproc <= 0) || (iter <= 0) || (withmpi < 0)){
84+
if (mpi_rank == 0){
85+
printf("%s: invalid input arguments\n", argv[0]);
86+
}
87+
MPI_Finalize();
88+
exit(1);
89+
}
90+
if (mpi_size != nproc){
91+
if (mpi_rank == 0){
92+
printf("number of MPI processes must be %d\n", nproc);
93+
}
94+
MPI_Finalize();
95+
exit(1);
96+
}
97+
npx = npy = npz2 = npoints;
98+
npz = npy2 = npoints/nproc;
99+
fvalue = (double *) malloc(npz*npy*npx*sizeof(double));
100+
dvalue = (double *) malloc(npz*npy*npx*sizeof(double));
101+
init_derivatives(fvalue, dvalue, npx, npy, npz, npy2, npz2);
102+
MPI_Barrier(MPI_COMM_WORLD);
103+
my_time = MPI_Wtime();
104+
for (i = 0; i<iter; i++){
105+
derivative_x1(fvalue, dvalue, npx, npy, npz);
106+
derivative_y1(fvalue, dvalue, npx, npy, npz);
107+
if (withmpi){
108+
MPI_Alltoall(fvalue, npx*npy2*npz, MPI_DOUBLE, dvalue, npx*npy2*npz, MPI_DOUBLE, MPI_COMM_WORLD);
109+
}
110+
derivative_z1(fvalue, dvalue, npx, npy, npz);
111+
if (withmpi){
112+
MPI_Alltoall(fvalue, npx*npy2*npz, MPI_DOUBLE, dvalue, npx*npy2*npz, MPI_DOUBLE, MPI_COMM_WORLD);
113+
}
114+
}
115+
my_time = MPI_Wtime()-my_time;
116+
if (mpi_rank == 0){
117+
MPI_Reduce(MPI_IN_PLACE, &my_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
118+
printf("npoints: %d nproc: %d iter: %d withmpi: %d execution time: %e\n", npoints, nproc, iter, withmpi, my_time);
119+
}else{
120+
MPI_Reduce(&my_time, &my_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
121+
}
122+
done_derivatives();
123+
MPI_Finalize();
124+
return(0);
125+
}

0 commit comments

Comments
 (0)