Skip to content

Commit 69a84d7

Browse files
authored
Merge branch 'master' into feature/min_flex_nodes
2 parents 80a9827 + 02e0d16 commit 69a84d7

File tree

18 files changed

+524
-93
lines changed

18 files changed

+524
-93
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,4 @@ For unsubscribing, you may send an empty message to [[email protected]
6161

6262
### Slack
6363

64-
You may also reach the community through Slack at [reframetalk.slack.com](https://reframetalk.slack.com/join/signup). Currently, you may join the Slack workspace by invitation only, which you will get as soon as you subscribe to the mailing list.
64+
You may also reach the community through Slack [here](https://reframe-slack.herokuapp.com).
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import reframe as rfm
2+
import reframe.utility.sanity as sn
3+
4+
5+
@rfm.required_version('>=2.16-dev0')
6+
@rfm.parameterized_test(['sync'], ['async'])
7+
class KernelLatencyTest(rfm.RegressionTest):
8+
def __init__(self, kernel_version):
9+
super().__init__()
10+
self.sourcepath = 'kernel_latency.cu'
11+
self.build_system = 'SingleSource'
12+
self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
13+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
14+
self.num_tasks = 0
15+
self.num_tasks_per_node = 1
16+
17+
if self.current_system.name in {'dom', 'daint'}:
18+
self.num_gpus_per_node = 1
19+
gpu_arch = '60'
20+
self.modules = ['craype-accel-nvidia60']
21+
self.valid_prog_environs += ['PrgEnv-gnu']
22+
else:
23+
self.num_gpus_per_node = 16
24+
self.modules = ['craype-accel-nvidia35']
25+
gpu_arch = '37'
26+
27+
self.build_system.cxxflags = ['-arch=compute_%s' % gpu_arch,
28+
'-code=sm_%s' % gpu_arch, '-std=c++11']
29+
30+
if kernel_version == 'sync':
31+
self.build_system.cppflags = ['-D SYNCKERNEL=1']
32+
else:
33+
self.build_system.cppflags = ['-D SYNCKERNEL=0']
34+
35+
self.sanity_patterns = sn.all([
36+
sn.assert_eq(
37+
sn.count(sn.findall(r'\[\S+\] Found \d+ gpu\(s\)',
38+
self.stdout)),
39+
self.num_tasks_assigned),
40+
sn.assert_eq(
41+
sn.count(sn.findall(r'\[\S+\] \[gpu \d+\] Kernel launch '
42+
r'latency: \S+ us', self.stdout)),
43+
self.num_tasks_assigned * self.num_gpus_per_node)
44+
])
45+
46+
self.perf_patterns = {
47+
'latency': sn.max(sn.extractall(
48+
r'\[\S+\] \[gpu \d+\] Kernel launch latency: '
49+
r'(?P<latency>\S+) us', self.stdout, 'latency', float))
50+
}
51+
self.sys_reference = {
52+
'sync': {
53+
'dom:gpu': {
54+
'latency': (6.6, None, 0.10, 's')
55+
},
56+
'daint:gpu': {
57+
'latency': (6.6, None, 0.10, 'us')
58+
},
59+
'kesch:cn': {
60+
'latency': (12.0, None, 0.10, 'us')
61+
},
62+
},
63+
'async': {
64+
'dom:gpu': {
65+
'latency': (2.2, None, 0.10, 'us')
66+
},
67+
'daint:gpu': {
68+
'latency': (2.2, None, 0.10, 's')
69+
},
70+
'kesch:cn': {
71+
'latency': (5.7, None, 0.10, 'us')
72+
},
73+
},
74+
}
75+
76+
self.reference = self.sys_reference[kernel_version]
77+
78+
self.maintainers = ['TM']
79+
self.tags = {'benchmark', 'diagnostic'}
80+
81+
@property
82+
@sn.sanity_function
83+
def num_tasks_assigned(self):
84+
return self.job.num_tasks
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#include <iostream>
2+
#include <chrono>
3+
#include <ratio>
4+
#include <unistd.h>
5+
#include <cuda.h>
6+
7+
__global__ void null_kernel() {
8+
};
9+
10+
int main(int argc, char* argv[]) {
11+
12+
char hostname[256];
13+
hostname[255]='\0';
14+
gethostname(hostname, 255);
15+
16+
cudaError_t error;
17+
int gpu_count = 0;
18+
19+
error = cudaGetDeviceCount(&gpu_count);
20+
21+
if (error == cudaSuccess) {
22+
if (gpu_count <= 0) {
23+
std::cout << "[" << hostname << "] " << "Could not find any gpu\n";
24+
return 1;
25+
}
26+
std::cout << "[" << hostname << "] " << "Found " << gpu_count << " gpu(s)\n";
27+
}
28+
else{
29+
std::cout << "[" << hostname << "] " << "Error getting gpu count, exiting...\n";
30+
return 1;
31+
}
32+
33+
for (int i = 0; i < gpu_count; i++) {
34+
35+
cudaSetDevice(i);
36+
// Single kernel launch to initialize cuda runtime
37+
null_kernel<<<1, 1>>>();
38+
39+
auto t_start = std::chrono::system_clock::now();
40+
const int kernel_count = 1000;
41+
42+
for (int i = 0; i < kernel_count; ++i) {
43+
null_kernel<<<1, 1>>>();
44+
#if SYNCKERNEL == 1
45+
cudaDeviceSynchronize();
46+
#endif
47+
}
48+
49+
#if SYNCKERNEL != 1
50+
cudaDeviceSynchronize();
51+
#endif
52+
53+
auto t_end = std::chrono::system_clock::now();
54+
std::cout << "[" << hostname << "] " << "[gpu " << i << "] " << "Kernel launch latency: " << std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(t_end - t_start).count() / kernel_count << " us\n";
55+
}
56+
57+
return 0;
58+
}
59+

cscs-checks/prgenv/mpi.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import reframe as rfm
2+
import reframe.utility.sanity as sn
3+
4+
5+
@rfm.required_version('>=2.14')
6+
@rfm.parameterized_test(['single'], ['funneled'], ['serialized'], ['multiple'])
7+
class MpiInitTest(rfm.RegressionTest):
8+
"""This test checks the value returned by calling MPI_Init_thread.
9+
10+
Output should look the same for every prgenv (cray, gnu, intel, pgi)
11+
(mpi_thread_multiple seems to be not supported):
12+
13+
# 'single':
14+
['mpi_thread_supported=MPI_THREAD_SINGLE
15+
mpi_thread_queried=MPI_THREAD_SINGLE 0'],
16+
17+
# 'funneled':
18+
['mpi_thread_supported=MPI_THREAD_FUNNELED
19+
mpi_thread_queried=MPI_THREAD_FUNNELED 1'],
20+
21+
# 'serialized':
22+
['mpi_thread_supported=MPI_THREAD_SERIALIZED
23+
mpi_thread_queried=MPI_THREAD_SERIALIZED 2'],
24+
25+
# 'multiple':
26+
['mpi_thread_supported=MPI_THREAD_SERIALIZED
27+
mpi_thread_queried=MPI_THREAD_SERIALIZED 2']
28+
29+
"""
30+
31+
def __init__(self, required_thread):
32+
super().__init__()
33+
self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc']
34+
self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu',
35+
'PrgEnv-intel', 'PrgEnv-pgi']
36+
self.build_system = 'SingleSource'
37+
self.sourcepath = 'mpi_init_thread.cpp'
38+
self.cppflags = {
39+
'single': ['-D_MPI_THREAD_SINGLE'],
40+
'funneled': ['-D_MPI_THREAD_FUNNELED'],
41+
'serialized': ['-D_MPI_THREAD_SERIALIZED'],
42+
'multiple': ['-D_MPI_THREAD_MULTIPLE']
43+
}
44+
self.build_system.cppflags = self.cppflags[required_thread]
45+
self.build_system.cppflags += ['-static']
46+
self.time_limit = (0, 1, 0)
47+
found_mpithread = sn.extractsingle(
48+
r'^mpi_thread_required=\w+\s+mpi_thread_supported=\w+'
49+
r'\s+mpi_thread_queried=\w+\s+(?P<result>\d)$',
50+
self.stdout, 1, int)
51+
self.mpithread_version = {
52+
'single': 0,
53+
'funneled': 1,
54+
'serialized': 2,
55+
'multiple': 2
56+
}
57+
self.sanity_patterns = sn.all([
58+
sn.assert_found(r'tid=0 out of 1 from rank 0 out of 1',
59+
self.stdout),
60+
sn.assert_eq(found_mpithread,
61+
self.mpithread_version[required_thread])
62+
])
63+
self.maintainers = ['JG']
64+
self.tags = {'production'}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// testing MPI_Init_thread
2+
#include <iostream>
3+
#include <stdio.h>
4+
#include <mpi.h>
5+
using namespace std;
6+
7+
int main(int argc, char **argv) {
8+
int rank, size, mpiversion, mpisubversion;
9+
int resultlen = -1, mpi_thread_supported=-1;
10+
char mpilibversion[MPI_MAX_LIBRARY_VERSION_STRING];
11+
12+
// --------------------------------------------------------------------------
13+
// int MPI_Init_thread( int *argc, char ***argv, int required, int *provided )
14+
//
15+
// { MPI_THREAD_SINGLE}
16+
// Only one thread will execute.
17+
//
18+
// { MPI_THREAD_FUNNELED}
19+
// The process may be multi-threaded, but only the main thread will make MPI calls
20+
// (all MPI calls are funneled to the main thread).
21+
//
22+
// { MPI_THREAD_SERIALIZED}
23+
// The process may be multi-threaded, and multiple threads may make MPI calls, but
24+
// only one at a time: MPI calls are not made concurrently from two distinct
25+
// threads (all MPI calls are serialized).
26+
//
27+
// { MPI_THREAD_MULTIPLE}
28+
// Multiple threads may call MPI, with no restrictions.
29+
// --------------------------------------------------------------------------
30+
31+
#if defined(_MPI_THREAD_SINGLE)
32+
cout << "mpi_thread_required=MPI_THREAD_SINGLE ";
33+
int ev = MPI_Init_thread( 0,0, MPI_THREAD_SINGLE, &mpi_thread_supported );
34+
#elif defined(_MPI_THREAD_FUNNELED)
35+
cout << "mpi_thread_required=MPI_THREAD_FUNNELED ";
36+
int ev = MPI_Init_thread( 0,0, MPI_THREAD_FUNNELED, &mpi_thread_supported );
37+
#elif defined(_MPI_THREAD_SERIALIZED)
38+
cout << "mpi_thread_required=MPI_THREAD_SERIALIZED ";
39+
int ev = MPI_Init_thread( 0,0, MPI_THREAD_SERIALIZED, &mpi_thread_supported );
40+
#elif defined(_MPI_THREAD_MULTIPLE)
41+
cout << "mpi_thread_required=MPI_THREAD_MULTIPLE ";
42+
int ev = MPI_Init_thread( 0,0, MPI_THREAD_MULTIPLE, &mpi_thread_supported );
43+
#else
44+
cout << "mpi_thread_required=none ";
45+
int ev = MPI_Init(0,0);
46+
#endif
47+
48+
switch ( mpi_thread_supported )
49+
{
50+
case MPI_THREAD_SINGLE: cout << "mpi_thread_supported=MPI_THREAD_SINGLE" ; break;
51+
case MPI_THREAD_FUNNELED: cout << "mpi_thread_supported=MPI_THREAD_FUNNELED" ; break;
52+
case MPI_THREAD_SERIALIZED: cout << "mpi_thread_supported=MPI_THREAD_SERIALIZED" ;break;
53+
case MPI_THREAD_MULTIPLE: cout << "mpi_thread_supported=MPI_THREAD_MULTIPLE" ; break;
54+
default: cout << "mpi_thread_supported=UNKNOWN" ;
55+
}
56+
57+
// Return the level of thread support provided by the MPI library:
58+
int mpi_thread_required=-1;
59+
MPI_Query_thread( &mpi_thread_required );
60+
switch ( mpi_thread_supported )
61+
{
62+
case MPI_THREAD_SINGLE: cout << " mpi_thread_queried=MPI_THREAD_SINGLE " << mpi_thread_required << std::endl; break;
63+
case MPI_THREAD_FUNNELED: cout << " mpi_thread_queried=MPI_THREAD_FUNNELED " << mpi_thread_required << std::endl; break;
64+
case MPI_THREAD_SERIALIZED: cout << " mpi_thread_queried=MPI_THREAD_SERIALIZED "<< mpi_thread_required << std::endl; break;
65+
case MPI_THREAD_MULTIPLE: cout << " mpi_thread_queried=MPI_THREAD_MULTIPLE " << mpi_thread_required << std::endl; break;
66+
default: cout << " mpi_thread_queried=UNKNOWN " << mpi_thread_required << std::endl;
67+
}
68+
69+
MPI_Get_version( &mpiversion, &mpisubversion );
70+
MPI_Get_library_version(mpilibversion, &resultlen);
71+
printf( "# MPI-%d.%d = %s", mpiversion, mpisubversion, mpilibversion);
72+
73+
rank = MPI::COMM_WORLD.Get_rank();
74+
size = MPI::COMM_WORLD.Get_size();
75+
cout << "tid=0 out of 1 from rank " << rank << " out of " << size << "\n";
76+
77+
//std::cout << " mpi_thread_queried=" << mpi_thread_required << std::endl;
78+
79+
MPI::Finalize();
80+
81+
return 0;
82+
} /* end func main */

reframe/core/decorators.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,26 @@
77

88
import collections
99
import inspect
10+
import sys
11+
import traceback
1012

1113
import reframe
12-
from reframe.core.exceptions import ReframeSyntaxError
14+
from reframe.core.exceptions import ReframeSyntaxError, user_frame
1315
from reframe.core.logging import getlogger
1416
from reframe.core.pipeline import RegressionTest
1517
from reframe.utility.versioning import Version, VersionValidator
1618

1719

1820
def _register_test(cls, args=None):
19-
def _instantiate():
21+
def _instantiate(cls, args):
22+
if isinstance(args, collections.Sequence):
23+
return cls(*args)
24+
elif isinstance(args, collections.Mapping):
25+
return cls(**args)
26+
elif args is None:
27+
return cls()
28+
29+
def _instantiate_all():
2030
ret = []
2131
for cls, args in mod.__rfm_test_registry:
2232
try:
@@ -26,18 +36,21 @@ def _instantiate():
2636
except AttributeError:
2737
mod.__rfm_skip_tests = set()
2838

29-
if isinstance(args, collections.Sequence):
30-
ret.append(cls(*args))
31-
elif isinstance(args, collections.Mapping):
32-
ret.append(cls(**args))
33-
elif args is None:
34-
ret.append(cls())
39+
try:
40+
ret.append(_instantiate(cls, args))
41+
except Exception as e:
42+
frame = user_frame(sys.exc_info()[2])
43+
msg = "skipping test due to errors: %s: " % cls.__name__
44+
msg += "use `-v' for more information\n"
45+
msg += " FILE: %s:%s" % (frame.filename, frame.lineno)
46+
getlogger().warning(msg)
47+
getlogger().verbose(traceback.format_exc())
3548

3649
return ret
3750

3851
mod = inspect.getmodule(cls)
3952
if not hasattr(mod, '_rfm_gettests'):
40-
mod._rfm_gettests = _instantiate
53+
mod._rfm_gettests = _instantiate_all
4154

4255
try:
4356
mod.__rfm_test_registry.append((cls, args))

0 commit comments

Comments
 (0)