Skip to content

Commit 89c4ed0

Browse files
authored
Merge branch 'master' into test/hpx-stencil
2 parents fac5a11 + e0c4401 commit 89c4ed0

File tree

12 files changed

+754
-37
lines changed

12 files changed

+754
-37
lines changed

.pep8speaks.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ message:
66
header: "Hello @{name}, Thank you for updating!"
77
footer: "Do see the ReFrame [Coding Style Guide](https://github.com/eth-cscs/reframe/wiki/coding-style-guide)"
88
no_errors: "Cheers! There are no PEP8 issues in this Pull Request!"
9-
9+
1010
only_mention_files_with_errors: True
1111

1212
scanner:
1313
diff_only: True
14-
14+
1515
pycodestyle:
1616
max-line-length: 79
1717
ignore:
@@ -21,4 +21,6 @@ pycodestyle:
2121
- E241
2222
- E272
2323
- E741
24+
- E742
25+
- E743
2426
- W504

Jenkinsfile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ def loginBash = '#!/bin/bash -l'
55
def bashScript = 'ci-scripts/ci-runner.bash'
66
def machinesList = ['daint', 'dom', 'kesch']
77
def machinesToRun = machinesList
8+
def runTests = true
89
def uniqueID
910

1011
stage('Initialization') {
@@ -37,6 +38,11 @@ stage('Initialization') {
3738
currentBuild.result = 'SUCCESS'
3839
return
3940
}
41+
else if (splittedComment[2] == 'none') {
42+
runTests = false
43+
currentBuild.result = 'SUCCESS'
44+
return
45+
}
4046

4147
machinesRequested = []
4248
for (i = 2; i < splittedComment.size(); i++) {
@@ -66,6 +72,11 @@ stage('Initialization') {
6672
}
6773
}
6874

75+
if (!runTests) {
76+
println "Won't execute any test (${currentBuild.result}). Exiting..."
77+
return
78+
}
79+
6980
if (currentBuild.result != 'SUCCESS') {
7081
println "Initialization failed (${currentBuild.result}). Exiting..."
7182
return

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,21 @@ The documentation is now up on [localhost:8000](http://localhost:8000), where yo
4444
In the `cscs-checks/` folder, you can find realistic regression tests used for the CSCS systems that you can reuse and adapt to your system.
4545
Notice that these tests are published as examples and may not run as-is in your system.
4646
However, they can serve as a very good starting point for implementing your system tests in ReFrame.
47+
48+
49+
## Contact
50+
51+
You can get in contact with the ReFrame community in the following ways:
52+
53+
### Mailing list
54+
55+
For keeping up with the latest news about ReFrame, posting questions and, generally getting in touch with other users and the developers, you may follow the mailing list: [[email protected]](mailto:[email protected]).
56+
57+
Only subscribers may send messages to the list.
58+
To subscribe, please send an empty message to [[email protected]](mailto:[email protected]).
59+
60+
For unsubscribing, you may send an empty message to [[email protected]](mailto:[email protected]).
61+
62+
### Slack
63+
64+
You may also reach the community through Slack at [reframetalk.slack.com](https://reframetalk.slack.com/join/signup). Currently, you may join the Slack workspace by invitation only, which you will get as soon as you subscribe to the mailing list.
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import reframe as rfm
2+
import reframe.utility.sanity as sn
3+
4+
5+
@rfm.required_version('>=2.16-dev0')
6+
@rfm.simple_test
7+
class TensorFlowHorovodTest(rfm.RunOnlyRegressionTest):
8+
def __init__(self):
9+
super().__init__()
10+
self.descr = 'Distributed training with TensorFlow and Horovod'
11+
self.valid_systems = ['daint:gpu', 'dom:gpu']
12+
self.valid_prog_environs = ['PrgEnv-gnu']
13+
tfshortver = '1.11'
14+
self.sourcesdir = 'https://github.com/tensorflow/benchmarks'
15+
self.modules = ['Horovod/0.15.0-CrayGNU-18.08-tf-%s.0' % tfshortver]
16+
self.reference = {
17+
'dom:gpu': {
18+
'throughput': (1133.6, None, 0.05, 'images/s'),
19+
},
20+
'daint:gpu': {
21+
'throughput': (4403.0, None, 0.05, 'images/s')
22+
},
23+
}
24+
self.perf_patterns = {
25+
'throughput': sn.avg(sn.extractall(
26+
r'total images/sec:\s+(?P<throughput>\S+)',
27+
self.stdout, 'throughput', float))
28+
}
29+
self.sanity_patterns = sn.assert_found(
30+
r'[\S+\s+] INFO NET\/IB : Using interface ipogif0'
31+
r' for sideband communication', self.stdout)
32+
self.num_tasks_per_node = 1
33+
if self.current_system.name == 'dom':
34+
self.num_tasks = 8
35+
elif self.current_system.name == 'daint':
36+
self.num_tasks = 32
37+
38+
self.pre_run = ['git checkout cnn_tf_v%s_compatible' % tfshortver]
39+
self.variables = {
40+
'NCCL_DEBUG': 'INFO',
41+
'NCCL_IB_HCA': 'ipogif0',
42+
'NCCL_IB_CUDA_SUPPORT': '1',
43+
'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK',
44+
}
45+
self.executable = ('python')
46+
self.executable_opts = [
47+
'scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py',
48+
'--model inception3',
49+
'--batch_size 64',
50+
'--variable_update horovod',
51+
'--log_dir ./logs',
52+
'--train_dir ./checkpoints']
53+
self.tags = {'production'}
54+
self.maintainers = ['MS', 'RS']
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
import reframe as rfm
2+
import reframe.utility.sanity as sn
3+
4+
5+
@rfm.required_version('>=2.16-dev0')
6+
@rfm.simple_test
7+
class HPCGCheckRef(rfm.RegressionTest):
8+
def __init__(self):
9+
super().__init__()
10+
11+
self.descr = 'HPCG reference benchmark'
12+
self.valid_systems = ['daint:mc', 'daint:gpu', 'dom:gpu', 'dom:mc']
13+
self.valid_prog_environs = ['PrgEnv-gnu']
14+
self.modules = ['craype-hugepages8M']
15+
self.build_system = 'Make'
16+
self.build_system.options = ['arch=MPI_GCC_OMP']
17+
self.sourcesdir = 'https://github.com/hpcg-benchmark/hpcg.git'
18+
19+
self.executable = 'bin/xhpcg'
20+
self.executable_opts = ['--nx=104', '--ny=104', '--nz=104', '-t2']
21+
# use glob to catch the output file suffix dependent on execution time
22+
self.output_file = sn.getitem(sn.glob('HPCG*.txt'), 0)
23+
24+
self.num_tasks = 0
25+
self.num_cpus_per_task = 1
26+
self.system_num_tasks = {
27+
'daint:mc': 36,
28+
'daint:gpu': 12,
29+
'dom:mc': 36,
30+
'dom:gpu': 12,
31+
}
32+
33+
self.reference = {
34+
'daint:gpu': {
35+
'gflops': (7.6, -0.1, None, 'Gflop/s')
36+
},
37+
'daint:mc': {
38+
'gflops': (13.4, -0.1, None, 'Gflop/s')
39+
},
40+
'dom:gpu': {
41+
'gflops': (7.6, -0.1, None, 'Gflop/s')
42+
},
43+
'dom:mc': {
44+
'gflops': (13.4, -0.1, None, 'Gflop/s')
45+
},
46+
}
47+
48+
self.maintainers = ['SK']
49+
self.tags = {'diagnostic'}
50+
51+
@property
52+
@sn.sanity_function
53+
def num_tasks_assigned(self):
54+
return self.job.num_tasks
55+
56+
def setup(self, partition, environ, **job_opts):
57+
self.num_tasks_per_node = self.system_num_tasks[partition.fullname]
58+
59+
num_nodes = self.num_tasks_assigned / self.num_tasks_per_node
60+
self.perf_patterns = {
61+
'gflops': sn.extractsingle(
62+
r'HPCG result is VALID with a GFLOP\/s rating of=\s*'
63+
r'(?P<perf>\S+)',
64+
self.output_file, 'perf', float) / num_nodes
65+
}
66+
67+
self.sanity_patterns = sn.all([
68+
sn.assert_eq(4, sn.count(
69+
sn.findall(r'PASSED', self.output_file))),
70+
sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node)
71+
])
72+
73+
super().setup(partition, environ, **job_opts)
74+
75+
76+
@rfm.required_version('>=2.16-dev0')
77+
@rfm.simple_test
78+
class HPCGCheckMKL(rfm.RegressionTest):
79+
def __init__(self):
80+
super().__init__()
81+
82+
self.descr = 'HPCG benchmark Intel MKL implementation'
83+
self.valid_systems = ['daint:mc', 'dom:mc', 'daint:gpu', 'dom:gpu']
84+
self.valid_prog_environs = ['PrgEnv-intel']
85+
self.modules = ['craype-hugepages8M']
86+
self.build_system = 'Make'
87+
self.prebuild_cmd = ['cp -r ${MKLROOT}/benchmarks/hpcg/* .',
88+
'mv Make.CrayXC setup',
89+
'./configure CrayXC']
90+
91+
self.num_tasks = 0
92+
self.num_tasks_per_core = 2
93+
self.problem_size = 104
94+
95+
self.variables = {
96+
'HUGETLB_VERBOSE': '0',
97+
'MPICH_MAX_THREAD_SAFETY': 'multiple',
98+
'MPICH_USE_DMAPP_COLL': '1',
99+
'PMI_NO_FORK': '1',
100+
'KMP_HW_SUBSET': '9c,2t',
101+
'KMP_AFFINITY': 'granularity=fine,compact'
102+
}
103+
104+
self.executable = 'bin/xhpcg_avx2'
105+
self.executable_opts = ['--nx=%d' % self.problem_size,
106+
'--ny=%d' % self.problem_size,
107+
'--nz=%d' % self.problem_size, '-t2']
108+
109+
self.reference = {
110+
'dom:mc': {
111+
'gflops': (22, -0.1, None, 'Gflop/s')
112+
},
113+
'daint:mc': {
114+
'gflops': (22, -0.1, None, 'Gflop/s')
115+
},
116+
'dom:gpu': {
117+
'gflops': (10.7, -0.1, None, 'Gflop/s')
118+
},
119+
'daint:gpu': {
120+
'gflops': (10.7, -0.1, None, 'Gflop/s')
121+
},
122+
}
123+
124+
self.maintainers = ['SK']
125+
self.tags = {'diagnostic'}
126+
127+
@property
128+
@sn.sanity_function
129+
def num_tasks_assigned(self):
130+
return self.job.num_tasks
131+
132+
@property
133+
@sn.sanity_function
134+
def outfile_lazy(self):
135+
pattern = 'n%d-%dp-%dt-*.yaml' % (self.problem_size,
136+
self.job.num_tasks,
137+
self.num_cpus_per_task)
138+
return sn.getitem(sn.glob(pattern), 0)
139+
140+
def setup(self, partition, environ, **job_opts):
141+
if partition.fullname in ['daint:gpu', 'dom:gpu']:
142+
self.num_tasks_per_node = 2
143+
self.num_cpus_per_task = 12
144+
else:
145+
self.num_tasks_per_node = 4
146+
self.num_cpus_per_task = 18
147+
148+
# since this is a flexible test, we divide the extracted
149+
# performance by the number of nodes and compare
150+
# against a single reference
151+
num_nodes = self.num_tasks_assigned / self.num_tasks_per_node
152+
self.perf_patterns = {
153+
'gflops': sn.extractsingle(
154+
r'HPCG result is VALID with a GFLOP\/s rating of:\s*'
155+
r'(?P<perf>\S+)',
156+
self.outfile_lazy, 'perf', float) / num_nodes
157+
}
158+
159+
self.sanity_patterns = sn.all([
160+
sn.assert_eq(4, sn.count(
161+
sn.findall(r'PASSED', self.outfile_lazy))),
162+
sn.assert_eq(0, self.num_tasks_assigned % self.num_tasks_per_node)
163+
])
164+
165+
super().setup(partition, environ, **job_opts)

0 commit comments

Comments
 (0)