@@ -24,38 +24,54 @@ class IPCMagicCheck(rfm.RunOnlyRegressionTest):
2424 executable_opts = ['tf-hvd-sgd-ipc-tf2.py' ]
2525 reference = {
2626 'daint:gpu' : {
27- 'slope' : (2.0 , - 0.1 , 0.1 , None ),
28- 'offset' : (0.0 , - 0.1 , 0.1 , None ),
29- 'retries' : (0 , None , None , None ),
27+ 'slope' : (2.0 , - 0.1 , 0.1 , 'N/A' ),
28+ 'offset' : (0.0 , - 0.1 , 0.1 , 'N/A' ),
29+ 'retries' : (0 , None , None , 'N/A' ),
3030 'time' : (10 , None , None , 's' ),
3131 },
3232 'dom:gpu' : {
33- 'slope' : (2.0 , - 0.1 , 0.1 , None ),
34- 'offset' : (0.0 , - 0.1 , 0.1 , None ),
35- 'retries' : (0 , None , None , None ),
33+ 'slope' : (2.0 , - 0.1 , 0.1 , 'N/A' ),
34+ 'offset' : (0.0 , - 0.1 , 0.1 , 'N/A' ),
35+ 'retries' : (0 , None , None , 'N/A' ),
3636 'time' : (10 , None , None , 's' ),
3737 }
3838 }
3939
4040 maintainers = ['RS' , 'TR' ]
4141 tags = {'production' }
4242
43+ @run_after ('setup' )
44+ def daint_module_workaround (self ):
45+ if self .current_system .name == 'daint' :
46+ # FIXME: Use the default modules once Dom/Daint are aligned
47+ self .modules = [
48+ f'ipcmagic/1.0.1-CrayGNU-{ osext .cray_cdt_version ()} ' ,
49+ f'Horovod/0.21.0-CrayGNU-{ osext .cray_cdt_version ()} -tf-2.4.0'
50+ ]
51+ # FIXME: Enforce loading of jupyterlab module since
52+ # `module show jupyterlab` throws a Tcl error on Daint
53+ self .prerun_cmds = ['module load jupyterlab' ]
54+
4355 @sanity_function
44- def assert_nids (self ):
56+ def assert_successful_execution (self ):
4557 nids = sn .extractall (r'nid(?P<nid>\d+)' , self .stdout , 'nid' , str )
46- return sn .all ([sn .assert_ne (nids , []), sn .assert_ne (nids [0 ], nids [1 ])])
58+ return sn .all ([
59+ sn .assert_ne (nids , []), sn .assert_ne (nids [0 ], nids [1 ]),
60+ sn .assert_found (r'IPCluster is ready\!\s+' , self .stdout ),
61+ sn .assert_found (r'slope=\S+' , self .stdout )
62+ ])
4763
48- @performance_function ('' )
64+ @performance_function ('N/A ' )
4965 def slope (self ):
5066 return sn .extractsingle (r'slope=(?P<slope>\S+)' , self .stdout ,
5167 'slope' , float )
5268
53- @performance_function ('' )
69+ @performance_function ('N/A ' )
5470 def offset (self ):
5571 return sn .extractsingle (r'offset=(?P<offset>\S+)' , self .stdout ,
5672 'offset' , float )
5773
58- @performance_function ('' )
74+ @performance_function ('N/A ' )
5975 def retries (self ):
6076 return 4 - sn .count (sn .findall (r'IPCluster is already running' ,
6177 self .stdout ))
0 commit comments