Skip to content

Commit 869460a

Browse files
committed
Add broadcast examples
both memmap and MPI from NGCM course
1 parent 5663616 commit 869460a

File tree

2 files changed

+1170
-0
lines changed

2 files changed

+1170
-0
lines changed
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# More efficient data movement with MPI"
8+
]
9+
},
10+
{
11+
"cell_type": "markdown",
12+
"metadata": {},
13+
"source": [
14+
"Just like [we did](memmap.ipynb) manually with memmap,\n",
15+
"you can move data more efficiently with MPI by sending it to just one engine,\n",
16+
"and using MPI to broadcast it to the rest of the engines.\n"
17+
]
18+
},
19+
{
20+
"cell_type": "code",
21+
"execution_count": null,
22+
"metadata": {
23+
"collapsed": false,
24+
"jupyter": {
25+
"outputs_hidden": false
26+
}
27+
},
28+
"outputs": [],
29+
"source": [
30+
"import socket\n",
31+
"import os, sys, re\n",
32+
"\n",
33+
"import numpy as np\n",
34+
"\n",
35+
"import ipyparallel as ipp"
36+
]
37+
},
38+
{
39+
"cell_type": "markdown",
40+
"metadata": {},
41+
"source": [
42+
"For this demo, I will connect to a cluster with engines started with MPI.\n",
43+
"If you have MPI and mpi4py on your machine, you can start a local cluster with MPI with:\n",
44+
"\n",
45+
" ipcluster start -n 8 --engines=MPI --profile mpi"
46+
]
47+
},
48+
{
49+
"cell_type": "code",
50+
"execution_count": null,
51+
"metadata": {
52+
"collapsed": false,
53+
"jupyter": {
54+
"outputs_hidden": false
55+
}
56+
},
57+
"outputs": [],
58+
"source": [
59+
"mpi_profile = 'mpi'\n",
60+
"rc = ipp.Client(profile=mpi_profile)\n",
61+
"eall = rc[:]\n",
62+
"root = rc[-1]"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": null,
68+
"metadata": {
69+
"collapsed": false,
70+
"jupyter": {
71+
"outputs_hidden": false
72+
}
73+
},
74+
"outputs": [],
75+
"source": [
76+
"%px from mpi4py.MPI import COMM_WORLD as MPI"
77+
]
78+
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": null,
82+
"metadata": {
83+
"collapsed": false,
84+
"jupyter": {
85+
"outputs_hidden": false
86+
}
87+
},
88+
"outputs": [],
89+
"source": [
90+
"mpi_ranks = eall.apply_async(lambda : MPI.Get_rank()).get_dict()\n",
91+
"root_rank = root.apply_sync(lambda : MPI.Get_rank())\n",
92+
"mpi_ranks"
93+
]
94+
},
95+
{
96+
"cell_type": "code",
97+
"execution_count": null,
98+
"metadata": {
99+
"collapsed": false,
100+
"jupyter": {
101+
"outputs_hidden": false
102+
}
103+
},
104+
"outputs": [],
105+
"source": [
106+
"sz = 1024\n",
107+
"data = np.random.random((sz, sz))\n",
108+
"data = data.dot(data.T)"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"metadata": {
115+
"collapsed": false,
116+
"jupyter": {
117+
"outputs_hidden": false
118+
}
119+
},
120+
"outputs": [],
121+
"source": [
122+
"%%time \n",
123+
"ar = eall.push({'data': data}, block=False)\n",
124+
"ar.wait_interactive()"
125+
]
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": null,
130+
"metadata": {
131+
"collapsed": false,
132+
"jupyter": {
133+
"outputs_hidden": false
134+
}
135+
},
136+
"outputs": [],
137+
"source": [
138+
"@ipp.interactive\n",
139+
"def _bcast(key, root_rank):\n",
140+
" \"\"\"function to run on engines as part of broadcast\"\"\"\n",
141+
" g = globals()\n",
142+
" obj = g.get(key, None)\n",
143+
" obj = MPI.bcast(obj, root_rank)\n",
144+
" g[key] = obj\n",
145+
"\n",
146+
"def broadcast(key, obj, dv, root, root_rank):\n",
147+
" \"\"\"More efficient broadcast by doing push to root,\n",
148+
" and MPI broadcast to other engines.\n",
149+
" \n",
150+
" Still O(N) messages, but all but one message is always small.\n",
151+
" \"\"\"\n",
152+
" root.push({key : obj}, block=False)\n",
153+
" return dv.apply_async(_bcast, key, root_rank)"
154+
]
155+
},
156+
{
157+
"cell_type": "code",
158+
"execution_count": null,
159+
"metadata": {
160+
"collapsed": false,
161+
"jupyter": {
162+
"outputs_hidden": false
163+
}
164+
},
165+
"outputs": [],
166+
"source": [
167+
"%%time\n",
168+
"ar = broadcast('data', data, eall, root, root_rank)\n",
169+
"ar.wait_interactive()"
170+
]
171+
},
172+
{
173+
"cell_type": "code",
174+
"execution_count": null,
175+
"metadata": {
176+
"collapsed": false,
177+
"jupyter": {
178+
"outputs_hidden": false
179+
}
180+
},
181+
"outputs": [],
182+
"source": [
183+
"eall.apply_sync(np.linalg.norm, parallel.Reference('data'), 2)"
184+
]
185+
},
186+
{
187+
"cell_type": "code",
188+
"execution_count": null,
189+
"metadata": {},
190+
"outputs": [],
191+
"source": []
192+
}
193+
],
194+
"metadata": {
195+
"kernelspec": {
196+
"display_name": "Python 3 (ipykernel)",
197+
"language": "python",
198+
"name": "python3"
199+
},
200+
"language_info": {
201+
"codemirror_mode": {
202+
"name": "ipython",
203+
"version": 3
204+
},
205+
"file_extension": ".py",
206+
"mimetype": "text/x-python",
207+
"name": "python",
208+
"nbconvert_exporter": "python",
209+
"pygments_lexer": "ipython3",
210+
"version": "3.8.8"
211+
},
212+
"widgets": {
213+
"application/vnd.jupyter.widget-state+json": {
214+
"state": {},
215+
"version_major": 2,
216+
"version_minor": 0
217+
}
218+
}
219+
},
220+
"nbformat": 4,
221+
"nbformat_minor": 4
222+
}

0 commit comments

Comments
 (0)