Skip to content

Commit 7ce10fb

Browse files
authored
Merge pull request #10 from gjbex/development
Various additions
2 parents e6ce87c + 16055f9 commit 7ce10fb

File tree

6 files changed

+242
-53
lines changed

6 files changed

+242
-53
lines changed

docs/README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ When you complete this training you will
1212
computing such as numpy, numexpr and numba;
1313
* be able to use Cython to improve your code's performance;
1414
* be able to wrap C, C++ and Fortran code to use it from Python;
15-
* understand the opportunities and pitfalls of multithreaded
15+
* understand the opportunities and pitfalls of multi-threaded
1616
programming with Python;
1717
* be able to write distributed application using MPI;
1818
* have an understanding of how frameworks for distributed
@@ -31,7 +31,7 @@ Total duration: 4 hours.
3131
| Cython | 60 min. |
3232
| coffee break | 10 min. |
3333
| interfacing with C/C++/Fortran | 30 min. |
34-
| multithreaded programming | 10 min. |
34+
| multi-threaded programming | 10 min. |
3535
| MPI | 45 min. |
3636
| dask | 15 min. |
3737
| pyspark | 20 min. |
@@ -45,6 +45,12 @@ Slides are available in the
4545
as well as example code and hands-on material.
4646

4747

48+
## Software environment
49+
50+
Instructions on [how to create the required software environment](software_stack.md)
51+
are available.
52+
53+
4854
## Target audience
4955

5056
This training is for you if you need to use Python for computationally

docs/software_stack.md

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Software stack
2+
3+
This training requires a non-trivial software stack so using the conda package
4+
manager will simplify your life considerably.
5+
6+
7+
## git version control
8+
9+
The repository for this training session is available on Github, and cloning this
10+
repository on you own machine will give you access to all training material.
11+
12+
If you don't have a git client installed, consult the following [web page on how to
13+
install](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) one on
14+
your platform of choice.
15+
16+
17+
## Python & conda
18+
19+
The most convenient way to install the required software is using the conda
20+
environment manager. conda is available on Linux, MacOS and Windows. You can use
21+
conda from the command line when you install miniconda which is available for
22+
[download](https://docs.conda.io/en/latest/miniconda.html). The website provides
23+
installation instructions for each platform.
24+
25+
Remember to install miniconda on a file system with enough free space since conda
26+
environments quickly take multiple gigabytes of disk space.
27+
28+
Alternatively, you can install Anaconda, a GUI application to manage Python
29+
environments. For Windows, this may be the most convenient option. Anaconda is
30+
available for Windows, MacOS and Linux and can be downloaded from the
31+
[Anaconda website](https://www.anaconda.com/products/individual).
32+
33+
34+
## Training environment
35+
36+
To create and use the conda environment for this training, open a terminal window and
37+
follow the steps below.
38+
39+
1. Clone the Github repository:
40+
```bash
41+
$ git clone [email protected]:gjbex/Python-for-HPC.git
42+
```
43+
2. Change into the newly created directory:
44+
```bash
45+
$ cd Python-for-HPC
46+
```
47+
1. Create the conda environment for this training session:
48+
```bash
49+
$ conda env create -f environment.yml
50+
```
51+
1. Activate the environment:
52+
```bash
53+
$ conda activate python_for_hpc
54+
```
55+
56+
Now you can run Python scripts in this terminal, or start a Jupyter notebook.
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/usr/bin/env python
2+
3+
from argparse import ArgumentParser, FileType
4+
import numpy as np
5+
import re
6+
import sys
7+
8+
9+
def accumulate(data_file):
10+
regex = re.compile(r'^([a-z]+).+?(\d+\.\d+):\s+(\d+(?:\.\d+)(?:e[+-]\d+)?)')
11+
timings = dict()
12+
times = dict()
13+
for line in data_file:
14+
match = regex.search(line)
15+
if match:
16+
test = match.group(1)
17+
time = float(match.group(2))
18+
duration = float(match.group(3))
19+
if test not in timings:
20+
timings[test] = list()
21+
times[test] = list()
22+
timings[test].append(duration)
23+
times[test].append(time)
24+
data = dict()
25+
for test in timings:
26+
data[test] = np.array([times[test], timings[test]]).T
27+
return data
28+
29+
30+
def print_stats(test, data):
31+
print(f'{test}:')
32+
print(f' min: {data.min()}')
33+
print(f' median: {np.median(data)}')
34+
print(f' mean: {data.mean()}')
35+
print(f' max: {data.max()}')
36+
print(f' stddev: {np.std(data)}')
37+
print(f' n: {len(data)}')
38+
39+
40+
def main():
41+
arg_parser = ArgumentParser(description='analyze mpifitness data')
42+
arg_parser.add_argument('file', type=FileType('r'), help='file to analyse')
43+
options = arg_parser.parse_args()
44+
timings = accumulate(options.file)
45+
for test, data_list in timings.items():
46+
print_stats(test, data_list[:, 1])
47+
return 0
48+
49+
50+
if __name__ == '__main__':
51+
sys.exit(main())

source-code/mpi4py/mpifitness.py

Lines changed: 59 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ def acknowledge(comm):
1818
comm.barrier()
1919
rank = comm.Get_rank()
2020
size = comm.Get_size()
21-
print(f'process {rank} out of {size}')
21+
print(f'acknowledge {rank} out of {size}')
2222
comm.barrier()
2323

2424

25-
def pingpong(comm, nr_iters, msg_size):
25+
def pingpong(comm, nr_iters, msg_size, file):
2626
comm.barrier()
2727
rank = comm.Get_rank()
2828
size = comm.Get_size()
@@ -39,7 +39,8 @@ def pingpong(comm, nr_iters, msg_size):
3939
print(f'{rank} received {msg}, expected {dest}',
4040
file=sys.stderr)
4141
comm.Abort(1)
42-
print(f'{rank} -> {dest} pingpong: {end_time - start_time}')
42+
print(f'pingpong {rank} -> {dest} {start_time}: '
43+
f'{end_time - start_time}', file=file)
4344
if rank == dest:
4445
start_time = time.time()
4546
msg = comm.recv(source=source)
@@ -49,11 +50,12 @@ def pingpong(comm, nr_iters, msg_size):
4950
print(f'{rank} received {msg}, expected {source}',
5051
file=sys.stderr)
5152
comm.Abort(1)
52-
print(f'{rank} -> {source} pingpong: {end_time - start_time}')
53+
print(f'pingpong {rank} -> {source} {start_time}: '
54+
f'{end_time - start_time}', file=file)
5355
comm.barrier()
5456

5557

56-
def broadcast(comm, nr_iters, msg_size):
58+
def broadcast(comm, nr_iters, msg_size, file):
5759
comm.barrier()
5860
rank = comm.Get_rank()
5961
size = comm.Get_size()
@@ -65,14 +67,15 @@ def broadcast(comm, nr_iters, msg_size):
6567
start_time = time.time()
6668
msg = comm.bcast(msg, root=root)
6769
end_time = time.time()
68-
print(f'{root} -> {rank} bcast: {end_time - start_time}')
70+
print(f'bcast {root} -> {rank} {start_time}: {end_time - start_time}',
71+
file=file)
6972
if msg != make_msg(root, msg_size):
7073
print(f'{rank} received unexpected bcast message')
7174
comm.Abort(2)
7275
comm.barrier()
7376

7477

75-
def scatter(comm, nr_iters, msg_size):
78+
def scatter(comm, nr_iters, msg_size, file):
7679
comm.barrier()
7780
rank = comm.Get_rank()
7881
size = comm.Get_size()
@@ -84,14 +87,15 @@ def scatter(comm, nr_iters, msg_size):
8487
start_time = time.time()
8588
msg = comm.scatter(msg, root=root)
8689
end_time = time.time()
87-
print(f'{root} -> {rank} scatter: {end_time - start_time}')
90+
print(f'scatter {root} -> {rank} {start_time}: {end_time - start_time}',
91+
file=file)
8892
if msg != make_msg(rank, msg_size):
8993
print(f'{rank} received unexpected scatter message')
9094
comm.Abort(2)
9195
comm.barrier()
9296

9397

94-
def gather(comm, nr_iters, msg_size):
98+
def gather(comm, nr_iters, msg_size, file):
9599
comm.barrier()
96100
rank = comm.Get_rank()
97101
size = comm.Get_size()
@@ -101,7 +105,8 @@ def gather(comm, nr_iters, msg_size):
101105
start_time = time.time()
102106
msg = comm.gather(msg, root=root)
103107
end_time = time.time()
104-
print(f'{root} -> {rank} gather: {end_time - start_time}')
108+
print(f'gather {root} -> {rank} {start_time}: {end_time - start_time}',
109+
file=file)
105110
if (rank == root):
106111
if len(msg) != size:
107112
print(f'{rank} received unexpected gather message')
@@ -113,7 +118,7 @@ def gather(comm, nr_iters, msg_size):
113118
comm.barrier()
114119

115120

116-
def alltoall(comm, nr_iters, msg_size):
121+
def alltoall(comm, nr_iters, msg_size, file):
117122
comm.barrier()
118123
rank = comm.Get_rank()
119124
size = comm.Get_size()
@@ -122,7 +127,7 @@ def alltoall(comm, nr_iters, msg_size):
122127
start_time = time.time()
123128
msg = comm.alltoall(msg)
124129
end_time = time.time()
125-
print(f'{rank} alltoall: {end_time - start_time}')
130+
print(f'alltoall {rank} {start_time}: {end_time - start_time}', file=file)
126131
if len(msg) != size:
127132
print(f'{rank} received unexpected alltoall message')
128133
comm.Abort(2)
@@ -133,7 +138,7 @@ def alltoall(comm, nr_iters, msg_size):
133138
comm.barrier()
134139

135140

136-
def reduce(comm, nr_iters, msg_size):
141+
def reduce(comm, nr_iters, msg_size, file):
137142
comm.barrier()
138143
rank = comm.Get_rank()
139144
size = comm.Get_size()
@@ -143,7 +148,8 @@ def reduce(comm, nr_iters, msg_size):
143148
start_time = time.time()
144149
msg = comm.reduce(msg, op=MPI.SUM, root=root)
145150
end_time = time.time()
146-
print(f'{root} -> {rank} reduce: {end_time - start_time}')
151+
print(f'reduce {root} -> {rank} {start_time}: {end_time - start_time}',
152+
file=file)
147153
comm.barrier()
148154

149155

@@ -155,6 +161,7 @@ def main():
155161
print(f'# acknowledgment')
156162
acknowledge(comm)
157163
arg_parser = ArgumentParser(description='MPI performance benchmark')
164+
arg_parser.add_argument('file_base', help='base file name for performance info')
158165
arg_parser.add_argument('--nr_pingpongs', type=int, default=10,
159166
help='number of ping-pong iterations to perform')
160167
arg_parser.add_argument('--pingpong_size', type=int, default=8,
@@ -180,43 +187,44 @@ def main():
180187
arg_parser.add_argument('--reduce_size', type=int, default=8,
181188
help='number of bytes for reduce message')
182189
options = arg_parser.parse_args()
183-
comm.barrier()
184-
if (rank == root):
185-
print(f'# {options.nr_pingpongs} ping-pong iterations, '
186-
f'size {options.pingpong_size}')
187-
comm.barrier()
188-
pingpong(comm, options.nr_pingpongs, options.pingpong_size)
189-
comm.barrier()
190-
if (rank == root):
191-
print(f'# {options.nr_bcasts} broadcast iterations, '
192-
f'size {options.bcast_size}')
193-
comm.barrier()
194-
broadcast(comm, options.nr_bcasts, options.bcast_size)
195-
comm.barrier()
196-
if (rank == root):
197-
print(f'# {options.nr_scatters} scatter iterations, '
198-
f'size {options.scatter_size}')
199-
comm.barrier()
200-
scatter(comm, options.nr_scatters, options.scatter_size)
201-
comm.barrier()
202-
if (rank == root):
203-
print(f'# {options.nr_gathers} gather iterations, '
204-
f'size {options.gather_size}')
205-
comm.barrier()
206-
gather(comm, options.nr_gathers, options.gather_size)
207-
comm.barrier()
208-
if (rank == root):
209-
print(f'# {options.nr_alltoalls} alltoall iterations, '
210-
f'size {options.alltoall_size}')
211-
comm.barrier()
212-
alltoall(comm, options.nr_alltoalls, options.alltoall_size)
213-
comm.barrier()
214-
if (rank == root):
215-
print(f'# {options.nr_reduces} reduce iterations, '
216-
f'size {options.reduce_size}')
217-
comm.barrier()
218-
reduce(comm, options.nr_reduces, options.reduce_size)
219-
comm.barrier()
190+
with open(f'{options.file_base}_{rank:04d}.txt', 'w') as file:
191+
comm.barrier()
192+
if (rank == root):
193+
print(f'# {options.nr_pingpongs} ping-pong iterations, '
194+
f'size {options.pingpong_size}', file=file)
195+
comm.barrier()
196+
pingpong(comm, options.nr_pingpongs, options.pingpong_size, file)
197+
comm.barrier()
198+
if (rank == root):
199+
print(f'# {options.nr_bcasts} broadcast iterations, '
200+
f'size {options.bcast_size}', file=file)
201+
comm.barrier()
202+
broadcast(comm, options.nr_bcasts, options.bcast_size, file)
203+
comm.barrier()
204+
if (rank == root):
205+
print(f'# {options.nr_scatters} scatter iterations, '
206+
f'size {options.scatter_size}', file=file)
207+
comm.barrier()
208+
scatter(comm, options.nr_scatters, options.scatter_size, file)
209+
comm.barrier()
210+
if (rank == root):
211+
print(f'# {options.nr_gathers} gather iterations, '
212+
f'size {options.gather_size}', file=file)
213+
comm.barrier()
214+
gather(comm, options.nr_gathers, options.gather_size, file)
215+
comm.barrier()
216+
if (rank == root):
217+
print(f'# {options.nr_alltoalls} alltoall iterations, '
218+
f'size {options.alltoall_size}', file=file)
219+
comm.barrier()
220+
alltoall(comm, options.nr_alltoalls, options.alltoall_size, file)
221+
comm.barrier()
222+
if (rank == root):
223+
print(f'# {options.nr_reduces} reduce iterations, '
224+
f'size {options.reduce_size}', file=file)
225+
comm.barrier()
226+
reduce(comm, options.nr_reduces, options.reduce_size, file)
227+
comm.barrier()
220228
return 0
221229

222230

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/env python
2+
3+
from argparse import ArgumentParser, FileType
4+
import matplotlib.pyplot as plt
5+
import seaborn as sns
6+
import sys
7+
from analyze_mpifitness_data import accumulate
8+
9+
10+
def main():
11+
arg_parser = ArgumentParser(description='plot MPI time distirbution')
12+
arg_parser.add_argument('--file', required=True, type=FileType('r'),
13+
help='file to plot data from')
14+
arg_parser.add_argument('--test', required=True,
15+
choices=['pingpong', 'bcast', 'scatter', 'gather',
16+
'alltoall', 'reduce'],
17+
help='test to visualize')
18+
arg_parser.add_argument('--bins', type=int, default=5,
19+
help='number of bins in histogram')
20+
arg_parser.add_argument('--rug', action='store_true', help='show rug')
21+
arg_parser.add_argument('--log', action='store_true', help='use log x-axis')
22+
options = arg_parser.parse_args()
23+
timings = accumulate(options.file)
24+
data = timings[options.test][:, 1]
25+
if options.rug:
26+
grid = sns.distplot(data, rug=True, hist=False)
27+
else:
28+
grid = sns.distplot(data, bins=options.bins)
29+
if options.log:
30+
grid.set(xscale='log')
31+
grid.set(title=options.test)
32+
plt.show()
33+
return 0
34+
35+
36+
if __name__ == '__main__':
37+
sys.exit(main())

0 commit comments

Comments
 (0)