Skip to content

Commit a442a52

Browse files
author
Vasileios Karakasis
committed
Merge branch 'master' into internals/bad-check-handling
2 parents bf1361e + 4dcb29b commit a442a52

File tree

88 files changed

+2539
-504
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+2539
-504
lines changed

.pep8speaks.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
message:
2+
opened:
3+
header: "Hello @{name}, Thank you for submitting the Pull Request!"
4+
footer: "Do see the ReFrame [Coding Style Guide](https://github.com/eth-cscs/reframe/wiki/coding-style-guide)"
5+
updated:
6+
header: "Hello @{name}, Thank you for updating!"
7+
footer: "Do see the ReFrame [Coding Style Guide](https://github.com/eth-cscs/reframe/wiki/coding-style-guide)"
8+
no_errors: "Cheers! There are no PEP8 issues in this Pull Request!"
9+
10+
only_mention_files_with_errors: True
11+
12+
scanner:
13+
diff_only: True
14+
15+
pycodestyle:
16+
max-line-length: 79
17+
ignore:
18+
- E129
19+
- E221
20+
- E226
21+
- E241
22+
- E272
23+
- E741
24+
- E742
25+
- E743
26+
- W504

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
dist: xenial
12
language: python
23
python:
34
- "3.5"
45
- "3.6"
5-
- "3.7-dev"
6+
- "3.7"
67

78
install:
89
- pip install -r requirements.txt

Jenkinsfile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ def loginBash = '#!/bin/bash -l'
55
def bashScript = 'ci-scripts/ci-runner.bash'
66
def machinesList = ['daint', 'dom', 'kesch']
77
def machinesToRun = machinesList
8+
def runTests = true
89
def uniqueID
910

1011
stage('Initialization') {
@@ -37,6 +38,11 @@ stage('Initialization') {
3738
currentBuild.result = 'SUCCESS'
3839
return
3940
}
41+
else if (splittedComment[2] == 'none') {
42+
runTests = false
43+
currentBuild.result = 'SUCCESS'
44+
return
45+
}
4046

4147
machinesRequested = []
4248
for (i = 2; i < splittedComment.size(); i++) {
@@ -66,6 +72,11 @@ stage('Initialization') {
6672
}
6773
}
6874

75+
if (!runTests) {
76+
println "Won't execute any test (${currentBuild.result}). Exiting..."
77+
return
78+
}
79+
6980
if (currentBuild.result != 'SUCCESS') {
7081
println "Initialization failed (${currentBuild.result}). Exiting..."
7182
return

README.md

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@ Users can create their own test hierarchies, create test factories for generatin
1919
The official documentation is maintained [here](https://eth-cscs.github.io/reframe/index.html).
2020
It corresponds to the [latest](https://github.com/eth-cscs/reframe/releases/latest) stable release and not to the current status of the `master`.
2121

22-
* SC18 presentation [[pdf](https://drive.google.com/file/d/1sIecW59E-AvhD-vl6c6QGXM14UKNzgo_/view?usp=sharing)]
23-
* SC18 Demo [[link](https://asciinema.org/a/6SQJTaRe2zrMInV92X0yb2gTh)]
24-
2522
### Building the documentation from master
2623

2724
You may build the documentation of the master either with Python 2 or Python 3 (<= 3.5).
@@ -47,3 +44,21 @@ The documentation is now up on [localhost:8000](http://localhost:8000), where yo
4744
In the `cscs-checks/` folder, you can find realistic regression tests used for the CSCS systems that you can reuse and adapt to your system.
4845
Notice that these tests are published as examples and may not run as-is in your system.
4946
However, they can serve as a very good starting point for implementing your system tests in ReFrame.
47+
48+
49+
## Contact
50+
51+
You can get in contact with the ReFrame community in the following ways:
52+
53+
### Mailing list
54+
55+
For keeping up with the latest news about ReFrame, posting questions and, generally getting in touch with other users and the developers, you may follow the mailing list: [[email protected]](mailto:[email protected]).
56+
57+
Only subscribers may send messages to the list.
58+
To subscribe, please send an empty message to [[email protected]](mailto:[email protected]).
59+
60+
For unsubscribing, you may send an empty message to [[email protected]](mailto:[email protected]).
61+
62+
### Slack
63+
64+
You may also reach the community through Slack at [reframetalk.slack.com](https://reframetalk.slack.com/join/signup). Currently, you may join the Slack workspace by invitation only, which you will get as soon as you subscribe to the mailing list.

ci-scripts/deploy.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ cd $tmpdir
3939
git clone https://github.com/eth-cscs/reframe.git
4040
cd reframe
4141
found_version=$(./reframe.py -V)
42-
if [ $found_version -ne $version ]; then
42+
if [ $found_version != $version ]; then
4343
echo "$0: version mismatch: found $found_version, but required $version" >&2
4444
exit 1
4545
fi

config/cscs.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,13 @@ class ReframeSettings:
117117
'access': ['--partition=pn-regression'],
118118
'environs': ['PrgEnv-cray', 'PrgEnv-cray-nompi',
119119
'PrgEnv-pgi', 'PrgEnv-pgi-nompi',
120-
'PrgEnv-gnu', 'PrgEnv-gnu-nompi'],
120+
'PrgEnv-gnu', 'PrgEnv-gnu-nompi',
121+
'PrgEnv-cray-c2sm',
122+
'PrgEnv-pgi-c2sm',
123+
'PrgEnv-gnu-c2sm',
124+
'PrgEnv-cray-c2sm-gpu',
125+
'PrgEnv-pgi-c2sm-gpu',
126+
'PrgEnv-gnu-c2sm-gpu'],
121127
'descr': 'Kesch post-processing nodes'
122128
},
123129

@@ -131,8 +137,8 @@ class ReframeSettings:
131137
'PrgEnv-pgi-c2sm',
132138
'PrgEnv-gnu-c2sm',
133139
'PrgEnv-cray-c2sm-gpu',
134-
'PrgEnv-gnu-c2sm-gpu',
135-
'PrgEnv-cray-c2sm-gpu'],
140+
'PrgEnv-pgi-c2sm-gpu',
141+
'PrgEnv-gnu-c2sm-gpu'],
136142
'descr': 'Kesch compute nodes',
137143
'resources': {
138144
'_rfm_gpu': ['--gres=gpu:{num_gpus_per_node}'],
@@ -244,36 +250,42 @@ class ReframeSettings:
244250
},
245251
'PrgEnv-cray-c2sm': {
246252
'type': 'ProgEnvironment',
247-
'modules': ['env', 'c2sm/cray-env/base'],
253+
'modules': ['c2sm-rcm/1.00.00-kesch',
254+
'c2sm/cray-env/base'],
248255
},
249256
'PrgEnv-cray-c2sm-gpu': {
250257
'type': 'ProgEnvironment',
251-
'modules': ['env', 'c2sm/cray-env/gpu'],
258+
'modules': ['c2sm-rcm/1.00.00-kesch',
259+
'c2sm/cray-env/gpu'],
252260
},
253261
'PrgEnv-pgi-c2sm': {
254262
'type': 'ProgEnvironment',
255-
'modules': ['env', 'c2sm/pgi-env/base'],
263+
'modules': ['c2sm-rcm/1.00.00-kesch',
264+
'c2sm/pgi-env/base'],
256265
'cc': 'mpicc',
257266
'cxx': 'mpicxx',
258267
'ftn': 'mpif90',
259268
},
260269
'PrgEnv-pgi-c2sm-gpu': {
261270
'type': 'ProgEnvironment',
262-
'modules': ['env', 'c2sm/pgi-env/gpu'],
271+
'modules': ['c2sm-rcm/1.00.00-kesch',
272+
'c2sm/pgi-env/gpu'],
263273
'cc': 'mpicc',
264274
'cxx': 'mpicxx',
265275
'ftn': 'mpif90',
266276
},
267277
'PrgEnv-gnu-c2sm': {
268278
'type': 'ProgEnvironment',
269-
'modules': ['env', 'c2sm/gnu-env/base'],
279+
'modules': ['c2sm-rcm/1.00.00-kesch',
280+
'c2sm/gnu-env/base'],
270281
'cc': 'mpicc',
271282
'cxx': 'mpicxx',
272283
'ftn': 'mpif90',
273284
},
274285
'PrgEnv-gnu-c2sm-gpu': {
275286
'type': 'ProgEnvironment',
276-
'modules': ['env', 'c2sm/gnu-env/gpu'],
287+
'modules': ['c2sm-rcm/1.00.00-kesch',
288+
'c2sm/gnu-env/gpu'],
277289
'cc': 'mpicc',
278290
'cxx': 'mpicxx',
279291
'ftn': 'mpif90',
@@ -414,7 +426,8 @@ class ReframeSettings:
414426
'%(check_perf_var)s=%(check_perf_value)s|'
415427
'ref=%(check_perf_ref)s '
416428
'(l=%(check_perf_lower_thres)s, '
417-
'u=%(check_perf_upper_thres)s)'
429+
'u=%(check_perf_upper_thres)s)|'
430+
'%(check_perf_unit)s'
418431
),
419432
'append': True
420433
}

cscs-checks/apps/cp2k/cp2k_check.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def __init__(self, variant):
100100
'perf': (251.8, None, 0.15)
101101
},
102102
'daint:gpu': {
103-
'perf': (182.3, None, 0.10)
103+
'perf': (222.6, None, 0.05)
104104
},
105105
}
106106
else:
@@ -110,6 +110,6 @@ def __init__(self, variant):
110110
'perf': (240.0, None, 0.05)
111111
},
112112
'daint:gpu': {
113-
'perf': (195.0, None, 0.10)
113+
'perf': (222.6, None, 0.05)
114114
},
115115
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import reframe as rfm
2+
import reframe.utility.sanity as sn
3+
4+
5+
@rfm.required_version('>=2.16-dev0')
6+
@rfm.simple_test
7+
class TensorFlowHorovodTest(rfm.RunOnlyRegressionTest):
8+
def __init__(self):
9+
super().__init__()
10+
self.descr = 'Distributed training with TensorFlow and Horovod'
11+
self.valid_systems = ['daint:gpu', 'dom:gpu']
12+
self.valid_prog_environs = ['PrgEnv-gnu']
13+
tfshortver = '1.11'
14+
self.sourcesdir = 'https://github.com/tensorflow/benchmarks'
15+
self.modules = ['Horovod/0.15.0-CrayGNU-18.08-tf-%s.0' % tfshortver]
16+
self.reference = {
17+
'dom:gpu': {
18+
'throughput': (1133.6, None, 0.05, 'images/s'),
19+
},
20+
'daint:gpu': {
21+
'throughput': (4403.0, None, 0.05, 'images/s')
22+
},
23+
}
24+
self.perf_patterns = {
25+
'throughput': sn.avg(sn.extractall(
26+
r'total images/sec:\s+(?P<throughput>\S+)',
27+
self.stdout, 'throughput', float))
28+
}
29+
self.sanity_patterns = sn.assert_found(
30+
r'[\S+\s+] INFO NET\/IB : Using interface ipogif0'
31+
r' for sideband communication', self.stdout)
32+
self.num_tasks_per_node = 1
33+
if self.current_system.name == 'dom':
34+
self.num_tasks = 8
35+
elif self.current_system.name == 'daint':
36+
self.num_tasks = 32
37+
38+
self.pre_run = ['git checkout cnn_tf_v%s_compatible' % tfshortver]
39+
self.variables = {
40+
'NCCL_DEBUG': 'INFO',
41+
'NCCL_IB_HCA': 'ipogif0',
42+
'NCCL_IB_CUDA_SUPPORT': '1',
43+
'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK',
44+
}
45+
self.executable = ('python')
46+
self.executable_opts = [
47+
'scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py',
48+
'--model inception3',
49+
'--batch_size 64',
50+
'--variable_update horovod',
51+
'--log_dir ./logs',
52+
'--train_dir ./checkpoints']
53+
self.tags = {'production'}
54+
self.maintainers = ['MS', 'RS']

cscs-checks/cuda/cuda_checks.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def __init__(self):
2323
self.num_gpus_per_node = 1
2424
self.nvidia_sm = '60'
2525
if self.current_system.name == 'kesch':
26+
self.exclusive_access = True
2627
self.nvidia_sm = '37'
2728

2829
self.maintainers = ['AJ', 'VK']

0 commit comments

Comments
 (0)