|
4 | 4 | # SPDX-License-Identifier: BSD-3-Clause |
5 | 5 |
|
6 | 6 | import reframe as rfm |
7 | | -import reframe.utility.osext as osext |
8 | | -import reframe.utility.sanity as sn |
9 | | -from reframe.core.backends import getlauncher |
| 7 | + |
| 8 | +from hpctestlib.apps.jupyter.ipcmagic import ipcmagic_check |
10 | 9 |
|
11 | 10 |
|
12 | 11 | @rfm.simple_test |
13 | | -class IPCMagicCheck(rfm.RunOnlyRegressionTest): |
14 | | - descr = 'Distributed training with TensorFlow using ipyparallel' |
| 12 | +class cscs_ipcmagic_check(ipcmagic_check): |
15 | 13 | valid_systems = ['daint:gpu', 'dom:gpu'] |
16 | | - valid_prog_environs = ['PrgEnv-gnu'] |
17 | | - modules = [ |
18 | | - f'ipcmagic', f'jupyterlab', |
19 | | - f'Horovod/0.21.0-CrayGNU-{osext.cray_cdt_version()}-tf-2.4.0' |
20 | | - ] |
21 | | - num_tasks = 2 |
22 | | - num_tasks_per_node = 1 |
23 | | - executable = 'ipython' |
24 | | - executable_opts = ['tf-hvd-sgd-ipc-tf2.py'] |
| 14 | + valid_prog_environs = ['builtin'] |
| 15 | + modules = ['jupyterlab', 'Horovod'] |
| 16 | + maintainers = ['RS', 'TR'] |
| 17 | + tags = {'production'} |
25 | 18 | reference = { |
26 | 19 | 'daint:gpu': { |
27 | 20 | 'slope': (2.0, -0.1, 0.1, 'N/A'), |
28 | 21 | 'offset': (0.0, -0.1, 0.1, 'N/A'), |
29 | 22 | 'retries': (0, None, None, 'N/A'), |
30 | | - 'time': (10, None, None, 's'), |
31 | 23 | }, |
32 | 24 | 'dom:gpu': { |
33 | 25 | 'slope': (2.0, -0.1, 0.1, 'N/A'), |
34 | 26 | 'offset': (0.0, -0.1, 0.1, 'N/A'), |
35 | 27 | 'retries': (0, None, None, 'N/A'), |
36 | | - 'time': (10, None, None, 's'), |
37 | 28 | } |
38 | 29 | } |
39 | | - |
40 | | - maintainers = ['RS', 'TR'] |
41 | | - tags = {'production'} |
42 | | - |
43 | | - @run_after('setup') |
44 | | - def daint_module_workaround(self): |
45 | | - if self.current_system.name == 'daint': |
46 | | - # FIXME: Use the default modules once Dom/Daint are aligned |
47 | | - self.modules = [ |
48 | | - f'ipcmagic/1.0.1-CrayGNU-{osext.cray_cdt_version()}', |
49 | | - f'Horovod/0.21.0-CrayGNU-{osext.cray_cdt_version()}-tf-2.4.0' |
50 | | - ] |
51 | | - # FIXME: Enforce loading of jupyterlab module since |
52 | | - # `module show jupyterlab` throws a Tcl error on Daint |
53 | | - self.prerun_cmds = ['module load jupyterlab'] |
54 | | - |
55 | | - @sanity_function |
56 | | - def assert_successful_execution(self): |
57 | | - nids = sn.extractall(r'nid(?P<nid>\d+)', self.stdout, 'nid', str) |
58 | | - return sn.all([ |
59 | | - sn.assert_ne(nids, []), sn.assert_ne(nids[0], nids[1]), |
60 | | - sn.assert_found(r'IPCluster is ready\!\s+', self.stdout), |
61 | | - sn.assert_found(r'slope=\S+', self.stdout) |
62 | | - ]) |
63 | | - |
64 | | - @performance_function('N/A') |
65 | | - def slope(self): |
66 | | - return sn.extractsingle(r'slope=(?P<slope>\S+)', self.stdout, |
67 | | - 'slope', float) |
68 | | - |
69 | | - @performance_function('N/A') |
70 | | - def offset(self): |
71 | | - return sn.extractsingle(r'offset=(?P<offset>\S+)', self.stdout, |
72 | | - 'offset', float) |
73 | | - |
74 | | - @performance_function('N/A') |
75 | | - def retries(self): |
76 | | - return 4 - sn.count(sn.findall(r'IPCluster is already running', |
77 | | - self.stdout)) |
78 | | - |
79 | | - @performance_function('s') |
80 | | - def time(self): |
81 | | - return sn.extractsingle(r'IPCluster is ready\!\s+' |
82 | | - r'\((?P<time>\d+) seconds\)', |
83 | | - self.stdout, 'time', float) |
84 | | - |
85 | | - @run_before('run') |
86 | | - def reset_launcher(self): |
87 | | - # Change the job launcher since `ipython` |
88 | | - # needs to be launched without `srun`. |
89 | | - self.job.launcher = getlauncher('local')() |
0 commit comments