1111class SlurmSimpleBaseCheck (rfm .RunOnlyRegressionTest ):
1212 '''Base class for Slurm simple binary tests'''
1313
14- def __init__ (self ):
15- self .valid_systems = ['daint:gpu' , 'daint:mc' ,
16- 'dom:gpu' , 'dom:mc' ,
17- 'arolla:cn' , 'arolla:pn' ,
18- 'tsa:cn' , 'tsa:pn' ,
19- 'daint:xfer' , 'eiger:mc' ,
20- 'pilatus:mc' ]
21- self .valid_prog_environs = ['PrgEnv-cray' ]
22- self .tags = {'slurm' , 'maintenance' , 'ops' ,
23- 'production' , 'single-node' }
24- self .num_tasks_per_node = 1
14+ valid_systems = ['daint:gpu' , 'daint:mc' ,
15+ 'dom:gpu' , 'dom:mc' ,
16+ 'arolla:cn' , 'arolla:pn' ,
17+ 'tsa:cn' , 'tsa:pn' ,
18+ 'daint:xfer' , 'eiger:mc' ,
19+ 'pilatus:mc' ]
20+ valid_prog_environs = ['PrgEnv-cray' ]
21+ tags = {'slurm' , 'maintenance' , 'ops' ,
22+ 'production' , 'single-node' }
23+ num_tasks_per_node = 1
24+ maintainers = ['RS' , 'VH' ]
25+
26+ @run_after ('init' )
27+ def customize_systems (self ):
2528 if self .current_system .name in ['arolla' , 'tsa' ]:
2629 self .valid_prog_environs = ['PrgEnv-gnu' , 'PrgEnv-pgi' ]
2730 self .exclusive_access = True
2831
29- self .maintainers = ['RS' , 'VH' ]
30-
3132
3233class SlurmCompiledBaseCheck (rfm .RegressionTest ):
3334 '''Base class for Slurm tests that require compiling some code'''
3435
35- def __init__ (self ):
36- self .valid_systems = ['daint:gpu' , 'daint:mc' ,
37- 'dom:gpu' , 'dom:mc' ]
38- self .valid_prog_environs = ['PrgEnv-cray' ]
39- self .tags = {'slurm' , 'maintenance' , 'ops' ,
40- 'production' , 'single-node' }
41- self .num_tasks_per_node = 1
42-
43- self .maintainers = ['RS' , 'VH' ]
36+ valid_systems = ['daint:gpu' , 'daint:mc' ,
37+ 'dom:gpu' , 'dom:mc' ]
38+ valid_prog_environs = ['PrgEnv-cray' ]
39+ tags = {'slurm' , 'maintenance' , 'ops' ,
40+ 'production' , 'single-node' }
41+ num_tasks_per_node = 1
42+ maintainers = ['RS' , 'VH' ]
4443
4544
4645@rfm .simple_test
4746class HostnameCheck (SlurmSimpleBaseCheck ):
48- def __init__ (self ):
49- super ().__init__ ()
50- self .executable = '/bin/hostname'
51- self .valid_prog_environs = ['builtin' ]
52- self .hostname_patt = {
53- 'arolla:cn' : r'^arolla-cn\d{3}$' ,
54- 'arolla:pn' : r'^arolla-pp\d{3}$' ,
55- 'tsa:cn' : r'^tsa-cn\d{3}$' ,
56- 'tsa:pn' : r'^tsa-pp\d{3}$' ,
57- 'daint:gpu' : r'^nid\d{5}$' ,
58- 'daint:mc' : r'^nid\d{5}$' ,
59- 'daint:xfer' : r'^datamover\d{2}.cscs.ch$' ,
60- 'dom:gpu' : r'^nid\d{5}$' ,
61- 'dom:mc' : r'^nid\d{5}$' ,
62- 'eiger:mc' : r'^nid\d{6}$' ,
63- 'pilatus:mc' : r'^nid\d{6}$'
64- }
47+ executable = '/bin/hostname'
48+ valid_prog_environs = ['builtin' ]
49+ hostname_patt = {
50+ 'arolla:cn' : r'^arolla-cn\d{3}$' ,
51+ 'arolla:pn' : r'^arolla-pp\d{3}$' ,
52+ 'tsa:cn' : r'^tsa-cn\d{3}$' ,
53+ 'tsa:pn' : r'^tsa-pp\d{3}$' ,
54+ 'daint:gpu' : r'^nid\d{5}$' ,
55+ 'daint:mc' : r'^nid\d{5}$' ,
56+ 'daint:xfer' : r'^datamover\d{2}.cscs.ch$' ,
57+ 'dom:gpu' : r'^nid\d{5}$' ,
58+ 'dom:mc' : r'^nid\d{5}$' ,
59+ 'eiger:mc' : r'^nid\d{6}$' ,
60+ 'pilatus:mc' : r'^nid\d{6}$'
61+ }
6562
6663 @run_before ('sanity' )
6764 def set_sanity_patterns (self ):
@@ -74,45 +71,48 @@ def set_sanity_patterns(self):
7471
7572@rfm .simple_test
7673class EnvironmentVariableCheck (SlurmSimpleBaseCheck ):
77- def __init__ (self ):
78- super ().__init__ ()
79- self .num_tasks = 2
80- self .valid_systems = ['daint:gpu' , 'daint:mc' ,
81- 'dom:gpu' , 'dom:mc' ,
82- 'arolla:cn' , 'arolla:pn' ,
83- 'tsa:cn' , 'tsa:pn' ,
84- 'eiger:mc' , 'pilatus:mc' ]
85- self .executable = '/bin/echo'
86- self .executable_opts = ['$MY_VAR' ]
87- self .variables = {'MY_VAR' : 'TEST123456!' }
88- self .tags .remove ('single-node' )
74+ num_tasks = 2
75+ valid_systems = ['daint:gpu' , 'daint:mc' ,
76+ 'dom:gpu' , 'dom:mc' ,
77+ 'arolla:cn' , 'arolla:pn' ,
78+ 'tsa:cn' , 'tsa:pn' ,
79+ 'eiger:mc' , 'pilatus:mc' ]
80+ executable = '/bin/echo'
81+ executable_opts = ['$MY_VAR' ]
82+ variables = {'MY_VAR' : 'TEST123456!' }
83+ tags .remove ('single-node' )
84+
85+ @sanity_function
86+ def assert_num_tasks (self ):
8987 num_matches = sn .count (sn .findall (r'TEST123456!' , self .stdout ))
90- self . sanity_patterns = sn .assert_eq (self .num_tasks , num_matches )
88+ return sn .assert_eq (self .num_tasks , num_matches )
9189
9290
9391@rfm .simple_test
9492class RequiredConstraintCheck (SlurmSimpleBaseCheck ):
95- def __init__ (self ):
96- super ().__init__ ()
97- self .valid_systems = ['daint:login' , 'dom:login' ]
98- self .executable = 'srun'
99- self .executable_opts = ['-A' , osext .osgroup (), 'hostname' ]
100- self .sanity_patterns = sn .assert_found (
93+ valid_systems = ['daint:login' , 'dom:login' ]
94+ executable = 'srun'
95+ executable_opts = ['-A' , osext .osgroup (), 'hostname' ]
96+
97+ @sanity_function
98+ def assert_found_missing_constraint (self ):
99+ return sn .assert_found (
101100 r'ERROR: you must specify -C with one of the following: mc,gpu' ,
102101 self .stderr
103102 )
104103
105104
106105@rfm .simple_test
107106class RequestLargeMemoryNodeCheck (SlurmSimpleBaseCheck ):
108- def __init__ (self ):
109- super ().__init__ ()
110- self .valid_systems = ['daint:mc' ]
111- self .executable = '/usr/bin/free'
112- self .executable_opts = ['-h' ]
107+ valid_systems = ['daint:mc' ]
108+ executable = '/usr/bin/free'
109+ executable_opts = ['-h' ]
110+
111+ @sanity_function
112+ def assert_memory_is_bounded (self ):
113113 mem_obtained = sn .extractsingle (r'Mem:\s+(?P<mem>\S+)G' ,
114114 self .stdout , 'mem' , float )
115- self . sanity_patterns = sn .assert_bounded (mem_obtained , 122.0 , 128.0 )
115+ return sn .assert_bounded (mem_obtained , 122.0 , 128.0 )
116116
117117 @run_before ('run' )
118118 def set_memory_limit (self ):
@@ -121,55 +121,56 @@ def set_memory_limit(self):
121121
122122@rfm .simple_test
123123class DefaultRequestGPU (SlurmSimpleBaseCheck ):
124- def __init__ (self ):
125- super ().__init__ ()
126- self .valid_systems = ['daint:gpu' , 'dom:gpu' ,
127- 'arolla:cn' , 'tsa:cn' ]
128- self .executable = 'nvidia-smi'
129- self .sanity_patterns = sn .assert_found (
130- r'NVIDIA-SMI.*Driver Version.*' , self .stdout )
124+ valid_systems = ['daint:gpu' , 'dom:gpu' ,
125+ 'arolla:cn' , 'tsa:cn' ]
126+ executable = 'nvidia-smi'
127+
128+ @sanity_function
129+ def asser_found_nvidia_driver_version (self ):
130+ return sn .assert_found (r'NVIDIA-SMI.*Driver Version.*' ,
131+ self .stdout )
131132
132133
133134@rfm .simple_test
134135class DefaultRequestGPUSetsGRES (SlurmSimpleBaseCheck ):
135- def __init__ ( self ):
136- super (). __init__ ()
137- self . valid_systems = [ 'daint:gpu' , 'dom:gpu' ]
138- self . executable = 'scontrol show job ${SLURM_JOB_ID}'
139- self . sanity_patterns = sn . assert_found (
140- r'.*(TresPerNode|Gres)=.*gpu:1.*' , self .stdout )
136+ valid_systems = [ 'daint:gpu' , 'dom:gpu' ]
137+ executable = 'scontrol show job ${SLURM_JOB_ID}'
138+
139+ @ sanity_function
140+ def assert_found_resources ( self ):
141+ return sn . assert_found ( r'.*(TresPerNode|Gres)=.*gpu:1.*' , self .stdout )
141142
142143
143144@rfm .simple_test
144145class DefaultRequestMC (SlurmSimpleBaseCheck ):
145- def __init__ (self ):
146- super ().__init__ ()
147- self .valid_systems = ['daint:mc' , 'dom:mc' ]
148- # This is a basic test that should return the number of CPUs on the
149- # system which, on a MC node should be 72
150- self .executable = 'lscpu -p |grep -v "^#" -c'
151- self .sanity_patterns = sn .assert_found (r'72' , self .stdout )
146+ valid_systems = ['daint:mc' , 'dom:mc' ]
147+ # This is a basic test that should return the number of CPUs on the
148+ # system which, on a MC node should be 72
149+ executable = 'lscpu -p |grep -v "^#" -c'
150+
151+ @sanity_function
152+ def assert_found_num_cpus (self ):
153+ return sn .assert_found (r'72' , self .stdout )
152154
153155
154156@rfm .simple_test
155157class ConstraintRequestCabinetGrouping (SlurmSimpleBaseCheck ):
156- def __init__ (self ):
157- super ().__init__ ()
158- self .valid_systems = ['daint:gpu' , 'daint:mc' ,
159- 'dom:gpu' , 'dom:mc' ]
160- self .executable = 'cat /proc/cray_xt/cname'
161- self .cabinets = {
162- 'daint:gpu' : 'c0-1' ,
163- 'daint:mc' : 'c1-0' ,
164-
165- # Numbering is inverse in Dom
166- 'dom:gpu' : 'c0-0' ,
167- 'dom:mc' : 'c0-1' ,
168- }
158+ valid_systems = ['daint:gpu' , 'daint:mc' ,
159+ 'dom:gpu' , 'dom:mc' ]
160+ executable = 'cat /proc/cray_xt/cname'
161+ cabinets = {
162+ 'daint:gpu' : 'c0-1' ,
163+ 'daint:mc' : 'c1-0' ,
164+ # Numbering is inverse in Dom
165+ 'dom:gpu' : 'c0-0' ,
166+ 'dom:mc' : 'c0-1' ,
167+ }
169168
169+ @sanity_function
170+ def assert_found_cabinet (self ):
170171 # We choose a default pattern that will cause assert_found() to fail
171172 cabinet = self .cabinets .get (self .current_system .name , r'$^' )
172- self . sanity_patterns = sn .assert_found (fr'{ cabinet } .*' , self .stdout )
173+ return sn .assert_found (fr'{ cabinet } .*' , self .stdout )
173174
174175 @run_before ('run' )
175176 def set_slurm_constraint (self ):
@@ -180,16 +181,16 @@ def set_slurm_constraint(self):
180181
181182@rfm .simple_test
182183class MemoryOverconsumptionCheck (SlurmCompiledBaseCheck ):
183- def __init__ ( self ):
184- super (). __init__ ()
185- self . time_limit = '1m'
186- self . valid_systems += [ 'eiger:mc' , 'pilatus:mc' ]
187- self . sourcepath = 'eatmemory.c'
188- self . tags . add ( 'mem' )
189- self . executable_opts = [ '4000M' ]
190- self . sanity_patterns = sn . assert_found (
191- r'(exceeded memory limit)|(Out Of Memory)' , self . stderr
192- )
184+ time_limit = '1m'
185+ valid_systems += [ 'eiger:mc' , 'pilatus:mc' ]
186+ tags . add ( 'mem' )
187+ sourcepath = 'eatmemory.c'
188+ executable_opts = [ '4000M' ]
189+
190+ @ sanity_function
191+ def assert_found_exceeded_memory ( self ):
192+ return sn . assert_found ( r'(exceeded memory limit)|(Out Of Memory)' ,
193+ self . stderr )
193194
194195 @run_before ('run' )
195196 def set_memory_limit (self ):
@@ -198,38 +199,41 @@ def set_memory_limit(self):
198199
199200@rfm .simple_test
200201class MemoryOverconsumptionMpiCheck (SlurmCompiledBaseCheck ):
201- def __init__ (self ):
202- super ().__init__ ()
203- self .maintainers = ['JG' ]
204- self .valid_systems += ['eiger:mc' , 'pilatus:mc' ]
205- self .time_limit = '5m'
206- self .sourcepath = 'eatmemory_mpi.c'
207- self .tags .add ('mem' )
208- self .executable_opts = ['100%' ]
209- self .sanity_patterns = sn .assert_found (r'(oom-kill)|(Killed)' ,
210- self .stderr )
211- # {{{ perf
212- regex = (r'^Eating \d+ MB\/mpi \*\d+mpi = -\d+ MB memory from \/proc\/'
213- r'meminfo: total: \d+ GB, free: \d+ GB, avail: \d+ GB, using:'
214- r' (\d+) GB' )
215- self .perf_patterns = {
216- 'max_cn_memory' : sn .getattr (self , 'reference_meminfo' ),
217- 'max_allocated_memory' : sn .max (
218- sn .extractall (regex , self .stdout , 1 , int )
219- ),
220- }
202+ maintainers = ['JG' ]
203+ valid_systems += ['eiger:mc' , 'pilatus:mc' ]
204+ time_limit = '5m'
205+ sourcepath = 'eatmemory_mpi.c'
206+ tags .add ('mem' )
207+ executable_opts = ['100%' ]
208+
209+ @sanity_function
210+ def assert_found_oom (self ):
211+ return sn .assert_found (r'(oom-kill)|(Killed)' ,
212+ self .stderr )
213+
214+ @run_before ('performance' )
215+ def set_references (self ):
221216 no_limit = (0 , None , None , 'GB' )
222217 self .reference = {
223218 '*' : {
224219 'max_cn_memory' : no_limit ,
225220 'max_allocated_memory' : (
226- sn . getattr ( self , ' reference_meminfo' ), - 0.05 , None , 'GB'
221+ self . reference_meminfo ( ), - 0.05 , None , 'GB'
227222 ),
228223 }
229224 }
230- # }}}
231225
232- # {{{ hooks
226+ @performance_function ('GB' )
227+ def max_cn_memory (self ):
228+ return self .reference_meminfo ()
229+
230+ @performance_function ('GB' )
231+ def max_allocated_memory (self ):
232+ regex = (r'^Eating \d+ MB\/mpi \*\d+mpi = -\d+ MB memory from \/proc\/'
233+ r'meminfo: total: \d+ GB, free: \d+ GB, avail: \d+ GB, using:'
234+ r' (\d+) GB' )
235+ return sn .max (sn .extractall (regex , self .stdout , 1 , int ))
236+
233237 @run_before ('run' )
234238 def set_tasks (self ):
235239 tasks_per_node = {
@@ -244,10 +248,7 @@ def set_tasks(self):
244248 self .num_tasks_per_node = tasks_per_node [partname ]
245249 self .num_tasks = self .num_tasks_per_node
246250 self .job .launcher .options = ['-u' ]
247- # }}}
248251
249- @property
250- @sn .sanity_function
251252 def reference_meminfo (self ):
252253 reference_meminfo = {
253254 'dom:gpu' : 62 ,
@@ -285,10 +286,11 @@ class slurm_response_check(rfm.RunOnlyRegressionTest):
285286 def set_exec_opts (self ):
286287 self .executable_opts = [self .command ]
287288
288- @run_before ('sanity' )
289- def set_sanity (self ):
290- self .sanity_patterns = sn .assert_eq (self .job .exitcode , 0 )
291- self .perf_patterns = {
292- 'real_time' : sn .extractsingle (r'real (?P<real_time>\S+)' ,
293- self .stderr , 'real_time' , float )
294- }
289+ @sanity_function
290+ def assert_exitcode_zero (self ):
291+ return sn .assert_eq (self .job .exitcode , 0 )
292+
293+ @performance_function ('s' )
294+ def real_time (self ):
295+ return sn .extractsingle (r'real (?P<real_time>\S+)' , self .stderr ,
296+ 'real_time' , float )
0 commit comments