Skip to content

Commit 665fc7f

Browse files
author
Vasileios Karakasis
authored
Merge branch 'master' into checks/magma_revised
2 parents d69b0b0 + 8b6a3b3 commit 665fc7f

File tree

35 files changed

+1200
-137
lines changed

35 files changed

+1200
-137
lines changed

ci-scripts/genrelnotes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def extract_release_notes(git_output, tag):
2222

2323
prev_release, curr_release, *_ = sys.argv[1:]
2424
try:
25-
git_cmd = 'git log --merges v%s..v%s' % (prev_release, curr_release)
25+
git_cmd = 'git log --merges %s..%s' % (prev_release, curr_release)
2626
completed = subprocess.run(git_cmd.split(),
2727
stdout=subprocess.PIPE,
2828
stderr=subprocess.STDOUT,

config/cscs.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ class ReframeSettings:
1111
checks_path_recurse = True
1212
site_configuration = {
1313
'systems': {
14-
1514
'ault': {
1615
'descr': 'Ault TDS',
1716
'hostnames': ['ault'],
@@ -21,25 +20,37 @@ class ReframeSettings:
2120
'login': {
2221
'scheduler': 'local',
2322
'environs': ['PrgEnv-gnu'],
24-
'descr': 'Login nodes (SkylakeSilver)',
23+
'descr': 'Login nodes',
2524
'max_jobs': 4
2625
},
27-
28-
'v100': {
26+
'amdv100': {
27+
'scheduler': 'nativeslurm',
28+
'access': ['-pamdv100'],
29+
'environs': ['PrgEnv-gnu'],
30+
'descr': 'AMD Naples 32c + 2x NVIDIA V100',
31+
'max_jobs': 100,
32+
},
33+
'amdvega': {
34+
'scheduler': 'nativeslurm',
35+
'access': ['-pamdvega'],
36+
'environs': ['PrgEnv-gnu'],
37+
'descr': 'AMD Naples 32c + 3x AMD GFX900',
38+
'max_jobs': 100,
39+
},
40+
'intelv100': {
2941
'scheduler': 'nativeslurm',
3042
'access': ['-pintelv100'],
3143
'environs': ['PrgEnv-gnu'],
32-
'descr': 'Hybrid Nvidia nodes (Skylake36c/4*V100)',
44+
'descr': 'Intel Skylake 36c + 4x NVIDIA V100',
3345
'max_jobs': 100,
3446
},
35-
36-
'skl': {
47+
'intel': {
3748
'scheduler': 'nativeslurm',
3849
'access': ['-pintel'],
3950
'environs': ['PrgEnv-gnu'],
40-
'descr': 'Multicore nodes (Skylake36c)',
51+
'descr': 'Intel Skylake 36c',
4152
'max_jobs': 100,
42-
},
53+
}
4354
}
4455
},
4556

@@ -281,7 +292,7 @@ class ReframeSettings:
281292
'PrgEnv-gnu': {
282293
'type': 'ProgEnvironment',
283294
# defaults were gcc/8.3.0, cuda/10.1, openmpi/4.0.0
284-
'modules': ['gcc', 'cuda', 'openmpi'],
295+
'modules': ['gcc', 'cuda/10.1', 'openmpi'],
285296
'cc': 'mpicc',
286297
'cxx': 'mpicxx',
287298
'ftn': 'mpif90',

cscs-checks/microbenchmarks/alloc_speed/alloc_speed.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ def __init__(self, hugepages):
1616
if hugepages == 'no':
1717
self.valid_systems += ['kesch:cn', 'kesch:pn']
1818
else:
19-
self.modules = ['craype-hugepages%s' % hugepages]
19+
if self.current_system.name in {'dom', 'daint'}:
20+
self.modules = ['craype-hugepages%s' % hugepages]
2021

2122
self.sanity_patterns = sn.assert_found('4096 MB', self.stdout)
2223
self.perf_patterns = {
@@ -42,6 +43,9 @@ def __init__(self, hugepages):
4243
},
4344
'kesch:pn': {
4445
'time': (0.55, None, 0.10, 's')
46+
},
47+
'*': {
48+
'time': (0, None, None, 's')
4549
}
4650
},
4751
'2M': {
@@ -57,6 +61,9 @@ def __init__(self, hugepages):
5761
'daint:mc': {
5862
'time': (0.20, None, 0.10, 's')
5963
},
64+
'*': {
65+
'time': (0, None, None, 's')
66+
}
6067
},
6168
}
6269
self.reference = self.sys_reference[hugepages]
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import reframe as rfm
2+
import reframe.utility.sanity as sn
3+
4+
5+
@rfm.required_version('>=2.16-dev0')
6+
@rfm.parameterized_test(['nompi'], ['mpi'])
7+
class FFTWTest(rfm.RegressionTest):
8+
def __init__(self, exec_mode):
9+
super().__init__()
10+
self.sourcepath = 'fftw_benchmark.c'
11+
self.build_system = 'SingleSource'
12+
self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
13+
self.modules = ['cray-fftw']
14+
self.num_tasks_per_node = 12
15+
self.num_gpus_per_node = 0
16+
self.sanity_patterns = sn.assert_eq(
17+
sn.count(sn.findall(r'execution time', self.stdout)), 1)
18+
self.build_system.cflags = ['-O2']
19+
if self.current_system.name == 'kesch':
20+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
21+
self.build_system.cflags += ['-I$FFTW_INC', '-L$FFTW_DIR',
22+
'-lfftw3']
23+
elif self.current_system.name in {'daint', 'dom'}:
24+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi',
25+
'PrgEnv-gnu']
26+
27+
self.perf_patterns = {
28+
'fftw_exec_time': sn.extractsingle(
29+
r'execution time:\s+(?P<exec_time>\S+)', self.stdout,
30+
'exec_time', float),
31+
}
32+
33+
if exec_mode == 'nompi':
34+
self.num_tasks = 12
35+
self.executable_opts = ['72 12 1000 0']
36+
self.reference = {
37+
'dom:gpu': {
38+
'fftw_exec_time': (0.55, None, 0.05, 's'),
39+
},
40+
'daint:gpu': {
41+
'fftw_exec_time': (0.55, None, 0.05, 's'),
42+
},
43+
'kesch:cn': {
44+
'fftw_exec_time': (0.61, None, 0.05, 's'),
45+
},
46+
'*': {
47+
'fftw_exec_time': (0, None, None, 's'),
48+
}
49+
}
50+
else:
51+
self.num_tasks = 72
52+
self.executable_opts = ['144 72 200 1']
53+
self.reference = {
54+
'dom:gpu': {
55+
'fftw_exec_time': (0.47, None, 0.50, 's'),
56+
},
57+
'daint:gpu': {
58+
'fftw_exec_time': (0.47, None, 0.50, 's'),
59+
},
60+
'kesch:cn': {
61+
'fftw_exec_time': (1.58, None, 0.50, 's'),
62+
},
63+
'*': {
64+
'fftw_exec_time': (0, None, None, 's'),
65+
}
66+
}
67+
68+
self.maintainers = ['AJ']
69+
self.tags = {'benchmark', 'scs'}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#include <stdlib.h>
2+
#include <stdio.h>
3+
#include <string.h>
4+
#include <complex.h>
5+
#include <fftw3.h>
6+
#include <mpi.h>
7+
8+
fftw_complex *deri_temp_x, *deri_temp_y, *deri_temp_z;
9+
fftw_plan plan_forward_x, plan_backward_x, plan_forward_y, plan_backward_y, plan_forward_z, plan_backward_z;
10+
11+
void init_derivatives(double *func, double *deri, int npx, int npy, int npz, int npy2, int npz2){
12+
int nnn;
13+
deri_temp_x = (fftw_complex *) malloc(npy*npz*(npx/2+1)*sizeof(fftw_complex));
14+
deri_temp_y = (fftw_complex *) malloc(npx*(npy/2+1)*sizeof(fftw_complex));
15+
deri_temp_z = (fftw_complex *) malloc(npx*npy2*(npz2/2+1)*sizeof(fftw_complex));
16+
nnn = npx;
17+
plan_forward_x = fftw_plan_many_dft_r2c(1, &nnn, npy*npz, func, &nnn, 1, npx, deri_temp_x, &nnn, 1, npx/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
18+
nnn = npy;
19+
plan_forward_y = fftw_plan_many_dft_r2c(1, &nnn, npx, func, &nnn, npx, 1, deri_temp_y, &nnn, 1, npy/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
20+
nnn = npz2;
21+
plan_forward_z = fftw_plan_many_dft_r2c(1, &nnn, npx*npy2, func, &nnn, npx*npy2, 1, deri_temp_z, &nnn, 1, npz2/2+1, FFTW_MEASURE+FFTW_UNALIGNED);
22+
nnn = npx;
23+
plan_backward_x = fftw_plan_many_dft_c2r(1, &nnn, npy*npz, deri_temp_x, &nnn, 1, npx/2+1, deri, &nnn, 1, npx, FFTW_MEASURE+FFTW_UNALIGNED);
24+
nnn = npy;
25+
plan_backward_y = fftw_plan_many_dft_c2r(1, &nnn, npx, deri_temp_y, &nnn, 1, npy/2+1, deri, &nnn, npx, 1, FFTW_MEASURE+FFTW_UNALIGNED);
26+
nnn = npz2;
27+
plan_backward_z = fftw_plan_many_dft_c2r(1, &nnn, npx*npy2, deri_temp_z, &nnn, 1, npz2/2+1, deri, &nnn, npx*npy2, 1, FFTW_MEASURE+FFTW_UNALIGNED);
28+
}
29+
30+
void done_derivatives(){
31+
fftw_destroy_plan(plan_backward_z);
32+
fftw_destroy_plan(plan_backward_y);
33+
fftw_destroy_plan(plan_backward_x);
34+
fftw_destroy_plan(plan_forward_z);
35+
fftw_destroy_plan(plan_forward_y);
36+
fftw_destroy_plan(plan_forward_x);
37+
free(deri_temp_z);
38+
free(deri_temp_y);
39+
free(deri_temp_x);
40+
}
41+
42+
void derivative_x1(double *func, double *deri, int npx, int npy, int npz){
43+
int i, jk;
44+
fftw_execute_dft_r2c(plan_forward_x, func, deri_temp_x);
45+
fftw_execute_dft_c2r(plan_backward_x, deri_temp_x, deri);
46+
}
47+
48+
void derivative_y1(double *func, double *deri, int npx, int npy, int npz){
49+
int i, j, k;
50+
for (k = 0; k<npz; k++){
51+
fftw_execute_dft_r2c(plan_forward_y, func+k*npy*npx, deri_temp_y);
52+
fftw_execute_dft_c2r(plan_backward_y, deri_temp_y, deri+k*npy*npx);
53+
}
54+
}
55+
56+
void derivative_z1(double *func, double *deri, int npx, int npy, int npz){
57+
int k, ij;
58+
fftw_execute_dft_r2c(plan_forward_z, func, deri_temp_z);
59+
fftw_execute_dft_c2r(plan_backward_z, deri_temp_z, deri);
60+
}
61+
62+
int main(int argc, char *argv[]){
63+
int mpi_size, mpi_rank;
64+
int npoints, nproc, iter, withmpi;
65+
double *fvalue, *dvalue;
66+
int npx, npy, npz, npy2, npz2;
67+
int i, j, k;
68+
double my_time;
69+
MPI_Init(&argc, &argv);
70+
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
71+
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
72+
if (argc != 5){
73+
if (mpi_rank == 0){
74+
printf("Usage: %s npoints nproc niter withmpi\n", argv[0]);
75+
}
76+
MPI_Finalize();
77+
exit(1);
78+
}
79+
npoints = atoi(argv[1]);
80+
nproc = atoi(argv[2]);
81+
iter = atoi(argv[3]);
82+
withmpi = atoi(argv[4]);
83+
if ((npoints <= 0) || (nproc <= 0) || (iter <= 0) || (withmpi < 0)){
84+
if (mpi_rank == 0){
85+
printf("%s: invalid input arguments\n", argv[0]);
86+
}
87+
MPI_Finalize();
88+
exit(1);
89+
}
90+
if (mpi_size != nproc){
91+
if (mpi_rank == 0){
92+
printf("number of MPI processes must be %d\n", nproc);
93+
}
94+
MPI_Finalize();
95+
exit(1);
96+
}
97+
npx = npy = npz2 = npoints;
98+
npz = npy2 = npoints/nproc;
99+
fvalue = (double *) malloc(npz*npy*npx*sizeof(double));
100+
dvalue = (double *) malloc(npz*npy*npx*sizeof(double));
101+
init_derivatives(fvalue, dvalue, npx, npy, npz, npy2, npz2);
102+
MPI_Barrier(MPI_COMM_WORLD);
103+
my_time = MPI_Wtime();
104+
for (i = 0; i<iter; i++){
105+
derivative_x1(fvalue, dvalue, npx, npy, npz);
106+
derivative_y1(fvalue, dvalue, npx, npy, npz);
107+
if (withmpi){
108+
MPI_Alltoall(fvalue, npx*npy2*npz, MPI_DOUBLE, dvalue, npx*npy2*npz, MPI_DOUBLE, MPI_COMM_WORLD);
109+
}
110+
derivative_z1(fvalue, dvalue, npx, npy, npz);
111+
if (withmpi){
112+
MPI_Alltoall(fvalue, npx*npy2*npz, MPI_DOUBLE, dvalue, npx*npy2*npz, MPI_DOUBLE, MPI_COMM_WORLD);
113+
}
114+
}
115+
my_time = MPI_Wtime()-my_time;
116+
if (mpi_rank == 0){
117+
MPI_Reduce(MPI_IN_PLACE, &my_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
118+
printf("npoints: %d nproc: %d iter: %d withmpi: %d execution time: %e\n", npoints, nproc, iter, withmpi, my_time);
119+
}else{
120+
MPI_Reduce(&my_time, &my_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
121+
}
122+
done_derivatives();
123+
MPI_Finalize();
124+
return(0);
125+
}

cscs-checks/microbenchmarks/kernel_latency/kernel_latency.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,35 @@
77
class KernelLatencyTest(rfm.RegressionTest):
88
def __init__(self, kernel_version):
99
super().__init__()
10-
self.sourcepath = 'kernel_latency.cu'
11-
self.build_system = 'SingleSource'
10+
# List known partitions here so as to avoid specifying them every time
11+
# with --system
1212
self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
13-
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
1413
self.num_tasks = 0
1514
self.num_tasks_per_node = 1
16-
15+
self.sourcepath = 'kernel_latency.cu'
16+
self.build_system = 'SingleSource'
17+
self.build_system.cxxflags = ['-std=c++11']
1718
if self.current_system.name in {'dom', 'daint'}:
1819
self.num_gpus_per_node = 1
1920
gpu_arch = '60'
2021
self.modules = ['craype-accel-nvidia60']
21-
self.valid_prog_environs += ['PrgEnv-gnu']
22-
else:
22+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi',
23+
'PrgEnv-gnu']
24+
elif self.current_system.name == 'kesch':
2325
self.num_gpus_per_node = 16
26+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
2427
self.modules = ['craype-accel-nvidia35']
2528
gpu_arch = '37'
29+
else:
30+
# Enable test when running on an unknown system
31+
self.num_gpus_per_node = 1
32+
self.valid_systems = ['*']
33+
self.valid_prog_environs = ['*']
34+
gpu_arch = None
2635

27-
self.build_system.cxxflags = ['-arch=compute_%s' % gpu_arch,
28-
'-code=sm_%s' % gpu_arch, '-std=c++11']
36+
if gpu_arch:
37+
self.build_system.cxxflags += ['-arch=compute_%s' % gpu_arch,
38+
'-code=sm_%s' % gpu_arch]
2939

3040
if kernel_version == 'sync':
3141
self.build_system.cppflags = ['-D SYNCKERNEL=1']
@@ -59,6 +69,9 @@ def __init__(self, kernel_version):
5969
'kesch:cn': {
6070
'latency': (12.0, None, 0.10, 'us')
6171
},
72+
'*': {
73+
'latency': (0.0, None, None, 'us')
74+
}
6275
},
6376
'async': {
6477
'dom:gpu': {
@@ -70,6 +83,9 @@ def __init__(self, kernel_version):
7083
'kesch:cn': {
7184
'latency': (5.7, None, 0.10, 'us')
7285
},
86+
'*': {
87+
'latency': (0.0, None, None, 'us')
88+
}
7389
},
7490
}
7591

0 commit comments

Comments
 (0)