Skip to content

Commit 164bae6

Browse files
author
Vasileios Karakasis
authored
Merge pull request #2154 from kraushm/bugfix/ipcmagic_version_typo
[test] Fix typo in ipcmagic module version
2 parents 6a6d1e4 + a857cf5 commit 164bae6

File tree

1 file changed

+70
-46
lines changed

1 file changed

+70
-46
lines changed

cscs-checks/apps/jupyter/check_ipcmagic.py

Lines changed: 70 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -11,55 +11,79 @@
1111

1212
@rfm.simple_test
1313
class IPCMagicCheck(rfm.RunOnlyRegressionTest):
14-
def __init__(self):
15-
self.descr = 'Distributed training with TensorFlow using ipyparallel'
16-
self.valid_systems = ['daint:gpu', 'dom:gpu']
17-
self.valid_prog_environs = ['PrgEnv-gnu']
18-
self.modules = [
19-
# FIXME: Use the default ipcmagic version when fixed
20-
f'ipcmagic/0.1-CrayGNU-{osext.cray_cdt_version()}',
21-
f'Horovod/0.21.0-CrayGNU-{osext.cray_cdt_version()}-tf-2.4.0'
22-
]
23-
self.num_tasks = 2
24-
self.num_tasks_per_node = 1
25-
self.executable = 'ipython'
26-
self.executable_opts = ['tf-hvd-sgd-ipc-tf2.py']
27-
nids = sn.extractall(r'nid(?P<nid>\d+)',
28-
self.stdout, 'nid', str)
29-
self.sanity_patterns = sn.all([
30-
sn.assert_ne(nids, []),
31-
sn.assert_ne(nids[0], nids[1])
32-
])
33-
self.reference = {
34-
'daint:gpu': {
35-
'slope': (2.0, -0.1, 0.1, None),
36-
'offset': (0.0, -0.1, 0.1, None),
37-
'retries': (0, None, None, None),
38-
'time': (10, None, None, 's'),
39-
},
40-
'dom:gpu': {
41-
'slope': (2.0, -0.1, 0.1, None),
42-
'offset': (0.0, -0.1, 0.1, None),
43-
'retries': (0, None, None, None),
44-
'time': (10, None, None, 's'),
45-
}
46-
}
47-
self.perf_patterns = {
48-
'slope': sn.extractsingle(r'slope=(?P<slope>\S+)',
49-
self.stdout, 'slope', float),
50-
'offset': sn.extractsingle(r'offset=(?P<offset>\S+)',
51-
self.stdout, 'offset', float),
52-
'retries': 4 - sn.count(sn.findall(r'IPCluster is already running',
53-
self.stdout)),
54-
'time': sn.extractsingle(r'IPCluster is ready\!\s+'
55-
r'\((?P<time>\d+) seconds\)',
56-
self.stdout, 'time', float)
14+
descr = 'Distributed training with TensorFlow using ipyparallel'
15+
valid_systems = ['daint:gpu', 'dom:gpu']
16+
valid_prog_environs = ['PrgEnv-gnu']
17+
modules = [
18+
f'ipcmagic', f'jupyterlab',
19+
f'Horovod/0.21.0-CrayGNU-{osext.cray_cdt_version()}-tf-2.4.0'
20+
]
21+
num_tasks = 2
22+
num_tasks_per_node = 1
23+
executable = 'ipython'
24+
executable_opts = ['tf-hvd-sgd-ipc-tf2.py']
25+
reference = {
26+
'daint:gpu': {
27+
'slope': (2.0, -0.1, 0.1, 'N/A'),
28+
'offset': (0.0, -0.1, 0.1, 'N/A'),
29+
'retries': (0, None, None, 'N/A'),
30+
'time': (10, None, None, 's'),
31+
},
32+
'dom:gpu': {
33+
'slope': (2.0, -0.1, 0.1, 'N/A'),
34+
'offset': (0.0, -0.1, 0.1, 'N/A'),
35+
'retries': (0, None, None, 'N/A'),
36+
'time': (10, None, None, 's'),
5737
}
58-
self.maintainers = ['RS', 'TR']
59-
self.tags = {'production'}
38+
}
39+
40+
maintainers = ['RS', 'TR']
41+
tags = {'production'}
42+
43+
@run_after('setup')
44+
def daint_module_workaround(self):
45+
if self.current_system.name == 'daint':
46+
# FIXME: Use the default modules once Dom/Daint are aligned
47+
self.modules = [
48+
f'ipcmagic/1.0.1-CrayGNU-{osext.cray_cdt_version()}',
49+
f'Horovod/0.21.0-CrayGNU-{osext.cray_cdt_version()}-tf-2.4.0'
50+
]
51+
# FIXME: Enforce loading of jupyterlab module since
52+
# `module show jupyterlab` throws a Tcl error on Daint
53+
self.prerun_cmds = ['module load jupyterlab']
54+
55+
@sanity_function
56+
def assert_successful_execution(self):
57+
nids = sn.extractall(r'nid(?P<nid>\d+)', self.stdout, 'nid', str)
58+
return sn.all([
59+
sn.assert_ne(nids, []), sn.assert_ne(nids[0], nids[1]),
60+
sn.assert_found(r'IPCluster is ready\!\s+', self.stdout),
61+
sn.assert_found(r'slope=\S+', self.stdout)
62+
])
63+
64+
@performance_function('N/A')
65+
def slope(self):
66+
return sn.extractsingle(r'slope=(?P<slope>\S+)', self.stdout,
67+
'slope', float)
68+
69+
@performance_function('N/A')
70+
def offset(self):
71+
return sn.extractsingle(r'offset=(?P<offset>\S+)', self.stdout,
72+
'offset', float)
73+
74+
@performance_function('N/A')
75+
def retries(self):
76+
return 4 - sn.count(sn.findall(r'IPCluster is already running',
77+
self.stdout))
78+
79+
@performance_function('s')
80+
def time(self):
81+
return sn.extractsingle(r'IPCluster is ready\!\s+'
82+
r'\((?P<time>\d+) seconds\)',
83+
self.stdout, 'time', float)
6084

6185
@run_before('run')
62-
def prepare_run(self):
86+
def reset_launcher(self):
6387
# Change the job launcher since `ipython`
6488
# needs to be launched without `srun`.
6589
self.job.launcher = getlauncher('local')()

0 commit comments

Comments
 (0)