11import os
22
33import reframe .utility .sanity as sn
4- from reframe . core . pipeline import RegressionTest
4+ import reframe as rfm
55
66
7- class GpuBandwidthCheck ( RegressionTest ):
8- def __init__ ( self , ** kwargs ):
9- super (). __init__ ('gpu_bandwidth_check' ,
10- os . path . dirname ( __file__ ), ** kwargs )
7+ @ rfm . simple_test
8+ class GpuBandwidthCheck ( rfm . RegressionTest ):
9+ def __init__ (self ):
10+ super (). __init__ ( )
1111 self .valid_systems = ['kesch:cn' , 'daint:gpu' , 'dom:gpu' ]
1212 self .valid_prog_environs = ['PrgEnv-cray' , 'PrgEnv-gnu' ]
13-
1413 self .sourcesdir = os .path .join (self .current_system .resourcesdir ,
1514 'CUDA' , 'essentials' )
15+ self .build_system = 'SingleSource'
1616 self .sourcepath = 'bandwidthTest.cu'
1717 self .executable = 'gpu_bandwidth_check.x'
18- self .executable_opts = ['device' , 'all' ]
18+
19+ # NOTE: Perform a range of bandwidth tests from 2MB to 32MB
20+ # with 2MB increments to avoid initialization overhead in bandwidth
21+ self .executable_opts = ['device' , 'all' , '--mode=range' ,
22+ '--start=2097152' , '--increment=2097152' ,
23+ '--end=33554432' ]
1924 if self .current_system .name in ['daint' , 'dom' ]:
20- self .modules = ['cudatoolkit ' ]
25+ self .modules = ['craype-accel-nvidia60 ' ]
2126 self .num_gpus_per_node = 1
2227 else :
28+ self .modules = ['cudatoolkit' ]
2329 self .num_gpus_per_node = 8
2430
2531 self .sanity_patterns = sn .all ([
@@ -33,7 +39,7 @@ def __init__(self, **kwargs):
3339 for device in range (self .num_gpus_per_node ):
3440 self .perf_patterns ['perf_%s_%i' % (xfer_kind , device )] = \
3541 sn .extractsingle (self ._xfer_pattern (xfer_kind , device ),
36- self .stdout , 2 , float , 0 )
42+ self .stdout , 3 , float , 0 )
3743
3844 self .reference = {}
3945 for d in range (self .num_gpus_per_node ):
@@ -51,7 +57,8 @@ def __init__(self, **kwargs):
5157 nvidia_sm = '60'
5258 if self .current_system .name == 'kesch' :
5359 nvidia_sm = '37'
54- self ._flags = ('-m64 -arch=sm_%s' % nvidia_sm )
60+
61+ self .build_system .cxxflags = ['-I.' , '-m64' , '-arch=sm_%s' % nvidia_sm ]
5562
5663 self .maintainers = ['AJ' , 'VK' ]
5764 self .tags = {'production' }
@@ -63,10 +70,6 @@ def setup(self, partition, environ, **job_opts):
6370
6471 super ().setup (partition , environ , ** job_opts )
6572
66- def compile (self ):
67- self .current_environ .cxxflags = self ._flags
68- super ().compile ()
69-
7073 def _xfer_pattern (self , xfer_kind , devno ):
7174 """generates search pattern for performance analysis"""
7275 if xfer_kind == 'h2d' :
@@ -75,9 +78,8 @@ def _xfer_pattern(self, xfer_kind, devno):
7578 first_part = 'Device to Host Bandwidth'
7679 else :
7780 first_part = 'Device to Device Bandwidth'
78- return (r'^ *%s([^\n]*\n){%i}^ *Device Id:'
79- r' %i[^\n]*\n^\s*\d+\s+(\S+)' % (first_part , 3 + 3 * devno , devno ))
80-
8181
82- def _get_checks (** kwargs ):
83- return [GpuBandwidthCheck (** kwargs )]
82+ # Extract the bandwidth corresponding to the 32MB message (16th value)
83+ return (r'^ *%s([^\n]*\n){%i}^ *Device Id: %i\s+'
84+ r'([^\n]*\n){15}'
85+ r'\s+\d+\s+(\S+)' % (first_part , 3 + 18 * devno , devno ))
0 commit comments