Skip to content

Commit b2030e4

Browse files
brownbaerchenThomas
andauthored
GPU project with Gray-Scott configurations (#496)
* Structure for GPU project and configurations for Gray-Scott * Fixed GS initial conditions in spectral mode * Added Game-of-Life adjacent configuration for Gray-Scott * Blobs evolving in different directions * Added USkate world configuration for Gray-Scott * Added script for scaling tests * Changed configurations a bit * Changed configurations #!!!!!! WARNING: FLAKEHEAVEN FAILED !!!!!!: #: * Changed configs * Added readme for GPU project * Implemented node limit in scaling #!!!!!! WARNING: FLAKEHEAVEN FAILED !!!!!!: #: --------- Co-authored-by: Thomas <[email protected]>
1 parent 5fccc9f commit b2030e4

File tree

27 files changed

+1446
-35
lines changed

27 files changed

+1446
-35
lines changed

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Projects
5353
projects/compression.rst
5454
projects/second_order.rst
5555
projects/monodomain.rst
56+
projects/GPU.rst
5657

5758

5859
API documentation

docs/source/projects/GPU.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.. include:: /../../pySDC/projects/GPU/README.rst

pySDC/implementations/datatype_classes/cupy_mesh.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
except ImportError:
66
MPI = None
77

8+
try:
9+
from pySDC.helpers.NCCL_communicator import NCCLComm
10+
except ImportError:
11+
NCCLComm = None
12+
813

914
class cupy_mesh(cp.ndarray):
1015
"""
@@ -31,7 +36,7 @@ def __new__(cls, init, val=0.0, **kwargs):
3136
obj[:] = init[:]
3237
elif (
3338
isinstance(init, tuple)
34-
and (init[1] is None or isinstance(init[1], MPI.Intracomm))
39+
and (init[1] is None or isinstance(init[1], MPI.Intracomm) or isinstance(init[1], NCCLComm))
3540
and isinstance(init[2], cp.dtype)
3641
):
3742
obj = cp.ndarray.__new__(cls, init[0], dtype=init[2], **kwargs)
@@ -62,12 +67,15 @@ def __abs__(self):
6267
float: absolute maximum of all mesh values
6368
"""
6469
# take absolute values of the mesh values
65-
local_absval = float(cp.amax(cp.ndarray.__abs__(self)))
70+
local_absval = cp.max(cp.ndarray.__abs__(self))
6671

6772
if self.comm is not None:
6873
if self.comm.Get_size() > 1:
69-
global_absval = 0.0
70-
global_absval = max(self.comm.allreduce(sendobj=local_absval, op=MPI.MAX), global_absval)
74+
global_absval = local_absval * 0
75+
if isinstance(self.comm, NCCLComm):
76+
self.comm.Allreduce(sendbuf=local_absval, recvbuf=global_absval, op=MPI.MAX)
77+
else:
78+
global_absval = self.comm.allreduce(sendobj=float(local_absval), op=MPI.MAX)
7179
else:
7280
global_absval = local_absval
7381
else:

pySDC/implementations/problem_classes/GrayScott_MPIFFT.py

Lines changed: 152 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
import scipy.sparse as sp
2-
from mpi4py import MPI
3-
from mpi4py_fft import PFFT
42

53
from pySDC.core.errors import ProblemError
6-
from pySDC.core.problem import Problem, WorkCounter
7-
from pySDC.implementations.datatype_classes.mesh import mesh, imex_mesh, comp2_mesh
4+
from pySDC.core.problem import WorkCounter
5+
from pySDC.implementations.datatype_classes.mesh import comp2_mesh
86
from pySDC.implementations.problem_classes.generic_MPIFFT_Laplacian import IMEX_Laplacian_MPIFFT
97

108
from mpi4py_fft import newDistArray
@@ -48,6 +46,8 @@ class grayscott_imex_diffusion(IMEX_Laplacian_MPIFFT):
4846
Denotes the period of the function to be approximated for the Fourier transform.
4947
comm : COMM_WORLD, optional
5048
Communicator for ``mpi4py-fft``.
49+
num_blobs : int, optional
50+
Number of blobs in the initial conditions. Negative values give rectangles.
5151
5252
Attributes
5353
----------
@@ -71,18 +71,35 @@ class grayscott_imex_diffusion(IMEX_Laplacian_MPIFFT):
7171
.. [3] https://www.chebfun.org/examples/pde/GrayScott.html
7272
"""
7373

74-
def __init__(self, Du=1.0, Dv=0.01, A=0.09, B=0.086, **kwargs):
75-
kwargs['L'] = 2.0
76-
super().__init__(dtype='d', alpha=1.0, x0=-kwargs['L'] / 2.0, **kwargs)
74+
def __init__(
75+
self,
76+
Du=1.0,
77+
Dv=0.01,
78+
A=0.09,
79+
B=0.086,
80+
L=2.0,
81+
num_blobs=1,
82+
**kwargs,
83+
):
84+
super().__init__(dtype='d', alpha=1.0, x0=-L / 2.0, L=L, **kwargs)
7785

7886
# prepare the array with two components
7987
shape = (2,) + (self.init[0])
8088
self.iU = 0
8189
self.iV = 1
8290
self.ncomp = 2 # needed for transfer class
83-
self.init = (shape, self.comm, self.xp.dtype('float'))
8491

85-
self._makeAttributeAndRegister('Du', 'Dv', 'A', 'B', localVars=locals(), readOnly=True)
92+
self.init = (shape, self.comm, self.xp.dtype('complex') if self.spectral else self.xp.dtype('float'))
93+
94+
self._makeAttributeAndRegister(
95+
'Du',
96+
'Dv',
97+
'A',
98+
'B',
99+
'num_blobs',
100+
localVars=locals(),
101+
readOnly=True,
102+
)
86103

87104
# prepare "Laplacians"
88105
self.Ku = -self.Du * self.K2
@@ -168,7 +185,7 @@ def solve_system(self, rhs, factor, u0, t):
168185

169186
return me
170187

171-
def u_exact(self, t):
188+
def u_exact(self, t, seed=10700000):
172189
r"""
173190
Routine to compute the exact solution at time :math:`t = 0`, see [3]_.
174191
@@ -185,19 +202,135 @@ def u_exact(self, t):
185202
assert t == 0.0, 'Exact solution only valid as initial condition'
186203
assert self.ndim == 2, 'The initial conditions are 2D for now..'
187204

188-
me = self.dtype_u(self.init, val=0.0)
205+
xp = self.xp
206+
207+
_u = xp.zeros_like(self.X[0])
208+
_v = xp.zeros_like(self.X[0])
209+
210+
rng = xp.random.default_rng(seed)
211+
212+
if self.num_blobs < 0:
213+
"""
214+
Rectangles with stationary background, see arXiv:1501.01990
215+
"""
216+
F, k = self.A, self.B - self.A
217+
A = xp.sqrt(F) / (F + k)
218+
219+
# set stable background state from Equation 2
220+
assert 2 * k < xp.sqrt(F) - 2 * F, 'Kill rate is too large to facilitate stable background'
221+
_u[...] = (A - xp.sqrt(A**2 - 4)) / (2 * A)
222+
_v[...] = xp.sqrt(F) * (A + xp.sqrt(A**2 - 4)) / 2
223+
224+
for _ in range(-self.num_blobs):
225+
x0, y0 = rng.random(size=2) * self.L[0] - self.L[0] / 2
226+
lx, ly = rng.random(size=2) * self.L[0] / self.nvars[0] * 30
227+
228+
mask_x = xp.logical_and(self.X[0] > x0, self.X[0] < x0 + lx)
229+
mask_y = xp.logical_and(self.X[1] > y0, self.X[1] < y0 + ly)
230+
mask = xp.logical_and(mask_x, mask_y)
231+
232+
_u[mask] = rng.random()
233+
_v[mask] = rng.random()
234+
235+
elif self.num_blobs > 0:
236+
"""
237+
Blobs as in https://www.chebfun.org/examples/pde/GrayScott.html
238+
"""
239+
240+
inc = self.L[0] / (self.num_blobs + 1)
241+
242+
for i in range(1, self.num_blobs + 1):
243+
for j in range(1, self.num_blobs + 1):
244+
signs = (-1) ** rng.integers(low=0, high=2, size=2)
245+
246+
# This assumes that the box is [-L/2, L/2]^2
247+
_u[...] += -xp.exp(
248+
-80.0
249+
* (
250+
(self.X[0] + self.x0 + inc * i + signs[0] * 0.05) ** 2
251+
+ (self.X[1] + self.x0 + inc * j + signs[1] * 0.02) ** 2
252+
)
253+
)
254+
_v[...] += xp.exp(
255+
-80.0
256+
* (
257+
(self.X[0] + self.x0 + inc * i - signs[0] * 0.05) ** 2
258+
+ (self.X[1] + self.x0 + inc * j - signs[1] * 0.02) ** 2
259+
)
260+
)
261+
262+
_u += 1
263+
else:
264+
raise NotImplementedError
189265

190-
# This assumes that the box is [-L/2, L/2]^2
266+
u = self.u_init
191267
if self.spectral:
192-
tmp = 1.0 - self.xp.exp(-80.0 * ((self.X[0] + 0.05) ** 2 + (self.X[1] + 0.02) ** 2))
193-
me[0, ...] = self.fft.forward(tmp)
194-
tmp = self.xp.exp(-80.0 * ((self.X[0] - 0.05) ** 2 + (self.X[1] - 0.02) ** 2))
195-
me[1, ...] = self.fft.forward(tmp)
268+
u[0, ...] = self.fft.forward(_u)
269+
u[1, ...] = self.fft.forward(_v)
196270
else:
197-
me[0, ...] = 1.0 - self.xp.exp(-80.0 * ((self.X[0] + 0.05) ** 2 + (self.X[1] + 0.02) ** 2))
198-
me[1, ...] = self.xp.exp(-80.0 * ((self.X[0] - 0.05) ** 2 + (self.X[1] - 0.02) ** 2))
271+
u[0, ...] = _u
272+
u[1, ...] = _v
199273

200-
return me
274+
return u
275+
276+
def get_fig(self, n_comps=2): # pragma: no cover
277+
"""
278+
Get a figure suitable to plot the solution of this problem
279+
280+
Args:
281+
n_comps (int): Number of components that fit in the solution
282+
283+
Returns
284+
-------
285+
self.fig : matplotlib.pyplot.figure.Figure
286+
"""
287+
import matplotlib.pyplot as plt
288+
from mpl_toolkits.axes_grid1 import make_axes_locatable
289+
290+
plt.rcParams['figure.constrained_layout.use'] = True
291+
292+
if n_comps == 2:
293+
self.fig, axs = plt.subplots(1, 2, sharex=True, sharey=True, figsize=((6, 3)))
294+
divider = make_axes_locatable(axs[1])
295+
self.cax = divider.append_axes('right', size='3%', pad=0.03)
296+
else:
297+
self.fig, ax = plt.subplots(1, 1, figsize=((6, 5)))
298+
divider = make_axes_locatable(ax)
299+
self.cax = divider.append_axes('right', size='3%', pad=0.03)
300+
return self.fig
301+
302+
def plot(self, u, t=None, fig=None): # pragma: no cover
303+
r"""
304+
Plot the solution. Please supply a figure with the same structure as returned by ``self.get_fig``.
305+
306+
Parameters
307+
----------
308+
u : dtype_u
309+
Solution to be plotted
310+
t : float
311+
Time to display at the top of the figure
312+
fig : matplotlib.pyplot.figure.Figure
313+
Figure with the correct structure
314+
315+
Returns
316+
-------
317+
None
318+
"""
319+
fig = self.get_fig(n_comps=2) if fig is None else fig
320+
axs = fig.axes
321+
322+
vmin = u.min()
323+
vmax = u.max()
324+
for i, label in zip([self.iU, self.iV], [r'$u$', r'$v$']):
325+
im = axs[i].pcolormesh(self.X[0], self.X[1], u[i], vmin=vmin, vmax=vmax)
326+
axs[i].set_aspect(1)
327+
axs[i].set_title(label)
328+
329+
if t is not None:
330+
fig.suptitle(f't = {t:.2e}')
331+
axs[0].set_xlabel(r'$x$')
332+
axs[0].set_ylabel(r'$y$')
333+
fig.colorbar(im, self.cax)
201334

202335

203336
class grayscott_imex_linear(grayscott_imex_diffusion):

pySDC/implementations/problem_classes/generic_MPIFFT_Laplacian.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,6 @@ def setup_GPU(cls):
6666
def __init__(
6767
self, nvars=None, spectral=False, L=2 * np.pi, alpha=1.0, comm=MPI.COMM_WORLD, dtype='d', useGPU=False, x0=0.0
6868
):
69-
"""Initialization routine"""
70-
7169
if useGPU:
7270
self.setup_GPU()
7371

@@ -98,7 +96,7 @@ def __init__(
9896
# invoke super init, passing the communicator and the local dimensions as init
9997
super().__init__(init=(tmp_u.shape, comm, tmp_u.dtype))
10098
self._makeAttributeAndRegister(
101-
'nvars', 'spectral', 'L', 'alpha', 'comm', 'x0', localVars=locals(), readOnly=True
99+
'nvars', 'spectral', 'L', 'alpha', 'comm', 'x0', 'useGPU', localVars=locals(), readOnly=True
102100
)
103101

104102
# get local mesh
Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
pySDC using GPUs
2-
===================
2+
================
3+
34
Installation
45
------------
5-
In order to start playing on GPU, install `pySDC` and it's dependencies, ideally in developer mode.
6+
In order to start playing on GPU, install `pySDC` and its dependencies, ideally in developer mode.
67
First start by setting up a virtual environment, e.g. by using [Miniconda](https://docs.conda.io/en/latest/miniconda.html).
78
Then also add the CuPy Package (the cuda-toolkit will be installed automatically):
89

@@ -13,28 +14,49 @@ Then also add the CuPy Package (the cuda-toolkit will be installed automatically
1314
When this is done (and it can take a while), you have your setup to run `pySDC` on the GPU.
1415

1516
Changes in the problem_classes
16-
------------
17+
------------------------------
1718
Now you have to change a little bit in the problem_classes. The first and easy step is to change the datatype.
1819
To use pySDC on the GPU with CuPy you must use the [cupy-datatype](../../implementations/datatype_classes/cupy_mesh.py).
1920
The next step is to import cupy in the problem_class. In the following you have to exchange the NumPy/SciPy functions with the CuPy functions.
2021
A [Comparison Table](https://docs.cupy.dev/en/latest/reference/comparison.html) is given from CuPy to do that.
21-
For Exmaple: The above steps can be traced using the files
22+
For example: The above steps can be traced using the files
2223
[HeatEquation_ND_FD_forced_periodic.py](../../implementations/problem_classes/HeatEquation_ND_FD_forced_periodic.py)
2324
and [HeatEquation_ND_FD_forced_periodic_gpu.py](../../implementations/problem_classes/HeatEquation_ND_FD_forced_periodic.py)
2425
Now you are ready to run `pySDC` on the GPU.
2526

2627
Run pySDC on the GPU
27-
------------
28-
You have to configure a Script to run it. You can see at the file [heat.py](heat.py) that the parameters are the
28+
--------------------
29+
You have to configure a script to run it. You can see at the file [heat.py](heat.py) that the parameters are the
2930
same for GPU and CPU. Only the import for the problem_class changed.
3031

31-
32-
3332
More examples
34-
----------
33+
-------------
3534
Further examples can found with Allen-Cahn:
3635
* problem: [AllenCahn_2D_FD.py](../../implementations/problem_classes/AllenCahn_2D_FD.py) and [AllenCahn_2D_FD_gpu.py](../../implementations/problem_classes/AllenCahn_2D_FD_gpu.py)
3736
* problem: [AllenCahn_2D_FFT.py](../../implementations/problem_classes/AllenCahn_2D_FFT.py) and [AllenCahn_2D_FFT_gpu.py](../../implementations/problem_classes/AllenCahn_2D_FFT_gpu.py)
3837
* Script to run pySDC: [ac-fft.py](ac-fft.py)
3938

4039

40+
Running large problems on GPU
41+
-----------------------------
42+
This project contains some infrastructure for running and plotting specific problems.
43+
The main file is `run_experiment` and can be configured using command line arguments.
44+
For instance, use
45+
46+
.. code-block:: bash
47+
48+
srun -n 4 python work_precision.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=run
49+
mpirun -np 8 python work_precision.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=plot
50+
python work_precision.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=video
51+
52+
to first run the problem, then make plots and then make a video for Gray-Scott with the U-Skate configuration (see arXiv:1501.01990).
53+
54+
To do a parallel scaling test, you can go to JUWELS Booster and use, for instance,
55+
56+
.. code-block:: bash
57+
python analysis_scripts/parallel_scaling.py --mode=run --scaling=strong --space_time=True --XPU=GPU --problem=GS
58+
srun python analysis_scripts/parallel_scaling.py --mode=plot --scaling=strong --space_time=True --XPU=GPU --problem=GS
59+
60+
This will generate jobscripts and submit the jobs. Notice that you have to wait for the jobs to complete before you can plot them.
61+
62+
To learn more about the options for the scripts, run them with `--help`.

0 commit comments

Comments
 (0)