Skip to content

Commit 3623af2

Browse files
author
Vasileios Karakasis
authored
Merge pull request #1872 from teojgo/tests/remove_custom_dom
[test] Align Daint and Dom versions of tests
2 parents 1c9b188 + d84fa3a commit 3623af2

File tree

19 files changed

+106
-195
lines changed

19 files changed

+106
-195
lines changed

cscs-checks/apps/jupyter/check_ipcmagic.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,10 @@ def __init__(self):
1515
self.descr = 'Distributed training with TensorFlow using ipyparallel'
1616
self.valid_systems = ['daint:gpu', 'dom:gpu']
1717
self.valid_prog_environs = ['PrgEnv-gnu']
18-
cray_cdt_version = osext.cray_cdt_version()
19-
# FIXME: The following will not be needed after the Daint upgrade
20-
if self.current_system.name == 'dom':
21-
self.modules = [
22-
'ipcmagic',
23-
f'Horovod/0.21.0-CrayGNU-{cray_cdt_version}-tf-2.4.0'
24-
]
25-
else:
26-
self.modules = [
27-
'ipcmagic',
28-
'Horovod/0.19.1-CrayGNU-20.08-tf-2.2.0'
29-
]
30-
18+
self.modules = [
19+
f'ipcmagic',
20+
f'Horovod/0.21.0-CrayGNU-{osext.cray_cdt_version()}-tf-2.4.0'
21+
]
3122
self.num_tasks = 2
3223
self.num_tasks_per_node = 1
3324
self.executable = 'ipython'

cscs-checks/apps/quantumespresso/quantumespresso_check.py

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88

99

1010
class QuantumESPRESSOCheck(rfm.RunOnlyRegressionTest):
11+
scale = parameter(['small', 'large'])
12+
variant = parameter(['maint', 'prod'])
13+
1114
def __init__(self):
1215
if self.current_system.name == 'pilatus':
1316
self.valid_prog_environs = ['cpeGNU']
@@ -37,15 +40,14 @@ def __init__(self):
3740
}
3841

3942

40-
@rfm.parameterized_test(*([s, v]
41-
for s in ['small', 'large']
42-
for v in ['maint', 'prod']))
43+
@rfm.simple_test
4344
class QuantumESPRESSOCpuCheck(QuantumESPRESSOCheck):
44-
def __init__(self, scale, variant):
45+
def __init__(self):
4546
super().__init__()
46-
self.descr = f'QuantumESPRESSO CPU check (version: {scale}, {variant})'
47+
self.descr = (f'QuantumESPRESSO CPU check (version: {self.scale}, '
48+
f'{self.variant})')
4749
self.valid_systems = ['daint:mc', 'eiger:mc', 'pilatus:mc']
48-
if scale == 'small':
50+
if self.scale == 'small':
4951
self.valid_systems += ['dom:mc']
5052
energy_reference = -11427.09017218
5153
if self.current_system.name in ['daint', 'dom']:
@@ -86,8 +88,6 @@ def __init__(self, scale, variant):
8688
energy_diff = sn.abs(energy-energy_reference)
8789
self.sanity_patterns = sn.all([
8890
self.sanity_patterns,
89-
# FIXME temporarily increase energy difference
90-
# (different QE default on Dom and Daint)
9191
sn.assert_lt(energy_diff, 1e-6)
9292
])
9393

@@ -120,8 +120,10 @@ def __init__(self, scale, variant):
120120
}
121121
}
122122

123-
self.reference = references[variant][scale]
124-
self.tags |= {'maintenance' if variant == 'maint' else 'production'}
123+
self.reference = references[self.variant][self.scale]
124+
self.tags |= {
125+
'maintenance' if self.variant == 'maint' else 'production'
126+
}
125127

126128
@rfm.run_before('run')
127129
def set_task_distribution(self):
@@ -132,16 +134,15 @@ def set_cpu_binding(self):
132134
self.job.launcher.options = ['--cpu-bind=cores']
133135

134136

135-
@rfm.parameterized_test(*([s, v]
136-
for s in ['small', 'large']
137-
for v in ['maint', 'prod']))
137+
@rfm.simple_test
138138
class QuantumESPRESSOGpuCheck(QuantumESPRESSOCheck):
139-
def __init__(self, scale, variant):
139+
def __init__(self):
140140
super().__init__()
141-
self.descr = f'QuantumESPRESSO GPU check (version: {scale}, {variant})'
141+
self.descr = (f'QuantumESPRESSO GPU check (version: {self.scale}, '
142+
f'{self.variant})')
142143
self.valid_systems = ['daint:gpu']
143144
self.num_gpus_per_node = 1
144-
if scale == 'small':
145+
if self.scale == 'small':
145146
self.valid_systems += ['dom:gpu']
146147
self.num_tasks = 6
147148
energy_reference = -11427.09017168
@@ -157,8 +158,6 @@ def __init__(self, scale, variant):
157158
energy_diff = sn.abs(energy-energy_reference)
158159
self.sanity_patterns = sn.all([
159160
self.sanity_patterns,
160-
# FIXME temporarily increase energy difference
161-
# (different CUDA default on Dom and Daint)
162161
sn.assert_lt(energy_diff, 1e-7)
163162
])
164163

@@ -183,5 +182,7 @@ def __init__(self, scale, variant):
183182
}
184183
}
185184

186-
self.reference = references[variant][scale]
187-
self.tags |= {'maintenance' if variant == 'maint' else 'production'}
185+
self.reference = references[self.variant][self.scale]
186+
self.tags |= {
187+
'maintenance' if self.variant == 'maint' else 'production'
188+
}

cscs-checks/apps/tensorflow/tf2_horovod_check.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,21 @@
88
import reframe.utility.osext as osext
99

1010

11-
@rfm.parameterized_test(['small'], ['large'])
11+
@rfm.simple_test
1212
class TensorFlow2HorovodTest(rfm.RunOnlyRegressionTest):
13-
def __init__(self, variant):
13+
variant = parameter(['small', 'large'])
14+
15+
def __init__(self):
1416
self.descr = 'Distributed training with TensorFlow2 and Horovod'
1517
self.valid_systems = ['daint:gpu']
1618
self.valid_prog_environs = ['builtin']
17-
18-
cray_cdt_version = osext.cray_cdt_version()
19-
# FIXME: The following will not be needed after the Daint upgrade
20-
if self.current_system.name == 'dom':
21-
self.modules = [
22-
f'Horovod/0.21.0-CrayGNU-{cray_cdt_version}-tf-2.4.0'
23-
]
24-
else:
25-
self.modules = ['Horovod/0.19.1-CrayGNU-20.08-tf-2.2.0']
26-
19+
self.modules = [
20+
f'Horovod/0.21.0-CrayGNU-{osext.cray_cdt_version()}-tf-2.4.0'
21+
]
2722
self.sourcesdir = None
2823
self.num_tasks_per_node = 1
2924
self.num_cpus_per_task = 12
30-
if variant == 'small':
25+
if self.variant == 'small':
3126
self.valid_systems += ['dom:gpu']
3227
self.num_tasks = 8
3328
self.reference = {

cscs-checks/compile/libsci_acc_symlink.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,30 @@
99
import reframe.utility.sanity as sn
1010

1111

12-
# FIXME: Remove the 71 entries after daint upgrade
13-
@rfm.parameterized_test(['libsci_acc_gnu_81_nv35'],
14-
['libsci_acc_gnu_81_nv60'],
15-
['libsci_acc_cray_nv35'],
16-
['libsci_acc_cray_nv60'],
17-
['libsci_acc_cray_nv35_openacc'],
18-
['libsci_acc_cray_nv60_openacc'])
12+
@rfm.simple_test
1913
class LibSciAccSymLinkTest(rfm.RunOnlyRegressionTest):
20-
def __init__(self, lib_name):
21-
self.descr = 'LibSciAcc symlink check of %s' % lib_name
14+
lib_name = parameter([
15+
'libsci_acc_gnu_81_nv35', 'libsci_acc_gnu_81_nv60',
16+
'libsci_acc_cray_nv35', 'libsci_acc_cray_nv60',
17+
'libsci_acc_cray_nv35_openacc', 'libsci_acc_cray_nv60_openacc'
18+
])
19+
20+
def __init__(self):
21+
self.descr = f'LibSciAcc symlink check of {self.lib_name}'
2222
self.valid_systems = [
2323
'daint:login', 'daint:gpu',
2424
'dom:login', 'dom:gpu',
2525
]
2626
regex = (r'libsci_acc_(?P<prgenv>[A-Za-z]+)_((?P<cver>[A-Za-z0-9]+)_)'
2727
r'?(?P<version>\S+)')
28-
prgenv = re.match(regex, lib_name).group('prgenv')
28+
prgenv = re.match(regex, self.lib_name).group('prgenv')
2929

3030
# The prgenv is irrelevant for this case, so just chose one
3131
self.valid_prog_environs = ['builtin']
3232
self.executable = 'ls'
3333
self.executable_opts = ['-al', '/opt/cray/pe/lib64/libsci_a*']
34-
self.sanity_patterns = sn.assert_found(f'{lib_name}.so', self.stdout)
34+
self.sanity_patterns = sn.assert_found(f'{self.lib_name}.so',
35+
self.stdout)
3536

3637
self.maintainers = ['AJ', 'LM']
3738
self.tags = {'production', 'craype', 'health'}

cscs-checks/mch/collectives_halo.py

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@
88

99

1010
class CollectivesBaseTest(rfm.RegressionTest):
11-
def __init__(self, variant, bench_reference):
11+
variant = parameter(['default', 'nocomm', 'nocomp'])
12+
13+
def __init__(self, bench_reference):
1214
self.valid_systems = ['dom:gpu', 'daint:gpu', 'arolla:cn', 'tsa:cn']
1315
self.valid_prog_environs = ['PrgEnv-gnu']
1416
self.variables = {'G2G': '1'}
1517
self.executable = 'build/src/comm_overlap_benchmark'
16-
if variant != 'default':
17-
self.executable_opts = ['--' + variant]
18+
if self.variant != 'default':
19+
self.executable_opts = [f'--{self.variant}']
1820

1921
self.build_system = 'CMake'
2022
self.build_system.builddir = 'build'
@@ -37,7 +39,7 @@ def __init__(self, variant, bench_reference):
3739
self.num_tasks = 4
3840
self.num_gpus_per_node = 1
3941
self.num_tasks_per_node = 1
40-
self.modules = ['craype-accel-nvidia60', 'CMake']
42+
self.modules = ['craype-accel-nvidia60', 'CMake', 'cdt-cuda']
4143
self.variables['MPICH_RDMA_ENABLED_CUDA'] = '1'
4244
self.build_system.config_opts += [
4345
'-DCUDA_COMPUTE_CAPABILITY="sm_60"'
@@ -68,7 +70,7 @@ def __init__(self, variant, bench_reference):
6870
sysname = self.current_system.name
6971

7072
try:
71-
ref = bench_reference[sysname][variant]
73+
ref = bench_reference[sysname][self.variant]
7274
except KeyError:
7375
ref = 0.0
7476

@@ -87,44 +89,41 @@ def __init__(self, variant, bench_reference):
8789
else:
8890
self.tags = {'production', 'mch', 'craype'}
8991

90-
@rfm.run_before('compile')
91-
def dom_set_cuda_cdt(self):
92-
if self.current_system.name == 'dom':
93-
self.modules += ['cdt-cuda']
94-
9592
@rfm.run_before('run')
9693
def set_launcher_options(self):
9794
if self.current_system.name in ['arolla', 'tsa']:
9895
self.job.launcher.options = ['--distribution=block:block',
9996
'--cpu_bind=q']
10097

10198

102-
@rfm.parameterized_test(['default'], ['nocomm'], ['nocomp'])
99+
@rfm.simple_test
103100
class AlltoallvTest(CollectivesBaseTest):
104-
def __init__(self, variant):
105-
super().__init__(variant,
106-
{
107-
'daint': {
108-
'nocomm': 0.0171947,
109-
'nocomp': 0.0137893,
110-
'default': 0.0138493
111-
},
112-
})
101+
def __init__(self):
102+
super().__init__(
103+
{
104+
'daint': {
105+
'nocomm': 0.0171947,
106+
'nocomp': 0.0137893,
107+
'default': 0.0138493
108+
},
109+
}
110+
)
113111
self.strict_check = False
114112
self.sourcesdir = 'https://github.com/eth-cscs/comm_overlap_bench.git'
115113
self.prebuild_cmds = ['git checkout alltoallv']
116114

117115

118-
@rfm.parameterized_test(['default'], ['nocomm'], ['nocomp'])
116+
@rfm.simple_test
119117
class HaloExchangeTest(CollectivesBaseTest):
120-
def __init__(self, variant):
121-
super().__init__(variant,
122-
{
123-
'daint': {
124-
'nocomm': 0.978306,
125-
'nocomp': 1.36716,
126-
'default': 2.53509
127-
},
128-
})
118+
def __init__(self):
119+
super().__init__(
120+
{
121+
'daint': {
122+
'nocomm': 0.978306,
123+
'nocomp': 1.36716,
124+
'default': 2.53509
125+
},
126+
}
127+
)
129128
self.sourcesdir = 'https://github.com/eth-cscs/comm_overlap_bench.git'
130129
self.prebuild_cmds = ['git checkout barebones']

cscs-checks/mch/cuda_stress_test.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def __init__(self):
1919
self.modules = ['cuda/10.1.243']
2020
else:
2121
self.valid_prog_environs = ['PrgEnv-gnu']
22-
self.modules = ['craype-accel-nvidia60']
22+
self.modules = ['craype-accel-nvidia60', 'cdt-cuda']
2323

2424
self.sourcepath = 'cuda_stencil_test.cu'
2525
self.build_system = 'SingleSource'
@@ -40,8 +40,3 @@ def __init__(self):
4040
}
4141
self.tags = {'production', 'mch', 'craype'}
4242
self.maintainers = ['MKr', 'AJ']
43-
44-
@rfm.run_before('compile')
45-
def dom_set_cuda_cdt(self):
46-
if self.current_system.name == 'dom':
47-
self.modules += ['cdt-cuda']

cscs-checks/mch/gpu_direct_cuda.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def __init__(self):
1717
self.build_system = 'SingleSource'
1818
self.build_system.ldflags = ['-lcublas', '-lcudart']
1919
if self.current_system.name in ['daint', 'dom']:
20-
self.modules = ['craype-accel-nvidia60']
20+
self.modules = ['craype-accel-nvidia60', 'cdt-cuda']
2121
self.variables = {'MPICH_RDMA_ENABLED_CUDA': '1'}
2222
self.build_system.cxxflags = ['-ccbin CC', '-arch=sm_60']
2323
elif self.current_system.name in ['arolla', 'tsa']:
@@ -36,8 +36,3 @@ def __init__(self):
3636
self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5)
3737
self.maintainers = ['AJ', 'MKr']
3838
self.tags = {'production', 'mch', 'craype'}
39-
40-
@rfm.run_before('compile')
41-
def dom_set_cuda_cdt(self):
42-
if self.current_system.name == 'dom':
43-
self.modules += ['cdt-cuda']

cscs-checks/microbenchmarks/gpu/dgemm/dgmemm.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,7 @@ def set_gpu_arch(self):
7878
if nvidia_sm:
7979
self.build_system.cxxflags += [f'-arch=sm_{nvidia_sm}']
8080
if cp in {'dom:gpu', 'daint:gpu'}:
81-
self.modules += ['craype-accel-nvidia60']
82-
if cp == 'dom:gpu':
83-
self.modules += ['cdt-cuda']
84-
81+
self.modules += ['craype-accel-nvidia60', 'cdt-cuda']
8582
else:
8683
self.modules += ['cuda']
8784

cscs-checks/microbenchmarks/gpu/gpu_burn/gpu_burn_test.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,7 @@ def set_gpu_arch(self):
8383
# Nvidia options
8484
if cs in {'dom', 'daint'}:
8585
gpu_arch = '60'
86-
self.modules = ['craype-accel-nvidia60']
87-
if cs == 'dom':
88-
self.modules += ['cdt-cuda']
89-
86+
self.modules = ['craype-accel-nvidia60', 'cdt-cuda']
9087
elif cs in {'arola', 'tsa'}:
9188
gpu_arch = '70'
9289
self.modules = ['cuda/10.1.243']

cscs-checks/microbenchmarks/gpu/memory_bandwidth/memory_bandwidth.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,7 @@ def set_gpu_arch(self):
103103
if nvidia_sm:
104104
self.build_system.cxxflags += [f'-arch=sm_{nvidia_sm}']
105105
if cp in {'dom:gpu', 'daint:gpu'}:
106-
self.modules += ['craype-accel-nvidia60']
107-
if cp == 'dom:gpu':
108-
self.modules += ['cdt-cuda']
109-
106+
self.modules += ['craype-accel-nvidia60', 'cdt-cuda']
110107
else:
111108
self.modules += ['cuda']
112109

0 commit comments

Comments
 (0)