Parallel-in-Time · pancetta · Jan 29, 2025 · Jan 29, 2025
diff --git a/pySDC/implementations/problem_classes/GrayScott_MPIFFT.py b/pySDC/implementations/problem_classes/GrayScott_MPIFFT.py
@@ -19,7 +19,7 @@ class grayscott_imex_diffusion(IMEX_Laplacian_MPIFFT):
         \frac{\partial u}{\partial t} = D_u \Delta u - u v^2 + A (1 - u),
 
     .. math::
-        \frac{\partial v}{\partial t} = D_v \Delta v + u v^2 - B u
+        \frac{\partial v}{\partial t} = D_v \Delta v + u v^2 - B v
 
     in :math:`x \in \Omega:=[-L/2, L/2]^N` with :math:`N=2,3`. Spatial discretization is done by using
     Fast Fourier transformation for solving the linear parts provided by ``mpi4py-fft`` [2]_, see also
@@ -222,7 +222,7 @@ def u_exact(self, t, seed=10700000):
 
             for _ in range(-self.num_blobs):
                 x0 = rng.random(size=self.ndim) * self.L[0] - self.L[0] / 2
-                l = rng.random(size=self.ndim) * self.L[0] / self.nvars[0] * 30
+                l = rng.random(size=self.ndim) * self.L[0] / self.nvars[0] * 80
 
                 masks = [xp.logical_and(self.X[i] > x0[i], self.X[i] < x0[i] + l[i]) for i in range(self.ndim)]
                 mask = masks[0]
@@ -236,33 +236,54 @@ def u_exact(self, t, seed=10700000):
             """
             Blobs as in https://www.chebfun.org/examples/pde/GrayScott.html
             """
-            assert self.ndim == 2, 'The initial conditions are 2D for now..'
-
             inc = self.L[0] / (self.num_blobs + 1)
 
             for i in range(1, self.num_blobs + 1):
                 for j in range(1, self.num_blobs + 1):
-                    signs = (-1) ** rng.integers(low=0, high=2, size=2)
-
-                    # This assumes that the box is [-L/2, L/2]^2
-                    _u[...] += -xp.exp(
-                        -80.0
-                        * (
-                            (self.X[0] + self.x0 + inc * i + signs[0] * 0.05) ** 2
-                            + (self.X[1] + self.x0 + inc * j + signs[1] * 0.02) ** 2
+                    signs = (-1) ** rng.integers(low=0, high=2, size=self.ndim)
+
+                    if self.ndim == 2:
+                        # This assumes that the box is [-L/2, L/2]^2
+                        _u[...] += -xp.exp(
+                            -80.0
+                            * (
+                                (self.X[0] + self.x0 + inc * i + signs[0] * 0.05) ** 2
+                                + (self.X[1] + self.x0 + inc * j + signs[1] * 0.02) ** 2
+                            )
+                        )
+                        _v[...] += xp.exp(
+                            -80.0
+                            * (
+                                (self.X[0] + self.x0 + inc * i - signs[0] * 0.05) ** 2
+                                + (self.X[1] + self.x0 + inc * j - signs[1] * 0.02) ** 2
+                            )
+                        )
+                    elif self.ndim == 3:
+                        z_pos = self.x0 + rng.random() * self.L[2]
+                        # This assumes that the box is [-L/2, L/2]^3
+                        _u[...] += -xp.exp(
+                            -80.0
+                            * (
+                                (self.X[0] + self.x0 + inc * i + signs[0] * 0.05) ** 2
+                                + (self.X[1] + self.x0 + inc * j + signs[1] * 0.02) ** 2
+                                + (self.X[2] - z_pos + signs[2] * 0.035) ** 2
+                            )
                         )
-                    )
-                    _v[...] += xp.exp(
-                        -80.0
-                        * (
-                            (self.X[0] + self.x0 + inc * i - signs[0] * 0.05) ** 2
-                            + (self.X[1] + self.x0 + inc * j - signs[1] * 0.02) ** 2
+                        _v[...] += xp.exp(
+                            -80.0
+                            * (
+                                (self.X[0] + self.x0 + inc * i - signs[0] * 0.05) ** 2
+                                + (self.X[1] + self.x0 + inc * j - signs[1] * 0.02) ** 2
+                                + (self.X[2] - z_pos - signs[2] * 0.035) ** 2
+                            )
                         )
-                    )
+                    else:
+                        raise NotImplementedError
 
             _u += 1
         else:
-            raise NotImplementedError
+            _u[...] = rng.random(_u.shape)
+            _v[...] = rng.random(_v.shape)
 
         u = self.u_init
         if self.spectral:

diff --git a/pySDC/implementations/problem_classes/generic_MPIFFT_Laplacian.py b/pySDC/implementations/problem_classes/generic_MPIFFT_Laplacian.py
@@ -85,6 +85,7 @@ def __init__(
             collapse=True,
             backend=self.fft_backend,
             comm_backend=self.fft_comm_backend,
+            grid=(-1,),
         )
 
         # get test data to figure out type and dimensions

diff --git a/pySDC/projects/GPU/README.rst b/pySDC/projects/GPU/README.rst
@@ -45,18 +45,84 @@ For instance, use
 
 .. code-block:: bash
 
-    srun -n 4 python work_precision.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=run
-    mpirun -np 8 python work_precision.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=plot
-    python work_precision.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=video
+    srun -n 4 python run_experiment.pyy --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=run
+    mpirun -np 8 python run_experiment.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=plot
+    python run_experiment.py --config=GS_USkate --procs=1/1/4 --useGPU=True --mode=video
 
 to first run the problem, then make plots and then make a video for Gray-Scott with the U-Skate configuration (see arXiv:1501.01990).
 
 To do a parallel scaling test, you can go to JUWELS Booster and use, for instance,
 
 .. code-block:: bash
-   python analysis_scripts/parallel_scaling.py --mode=run --scaling=strong --space_time=True --XPU=GPU --problem=GS
-   srun python analysis_scripts/parallel_scaling.py --mode=plot --scaling=strong --space_time=True --XPU=GPU --problem=GS
+
+   python analysis_scripts/parallel_scaling.py --mode=run --space_time=True --XPU=GPU --problem=GS3D
+   python analysis_scripts/parallel_scaling.py --mode=plot --space_time=True --XPU=GPU --problem=GS3D
 
 This will generate jobscripts and submit the jobs. Notice that you have to wait for the jobs to complete before you can plot them.
 
 To learn more about the options for the scripts, run them with `--help`.
+
+Reproducing plots in Thomas Baumann's thesis
+--------------------------------------------
+Keep in mind that the results of the experiments are specific to the hardware that was used in the experiments.
+To record the data for space-time parallel scaling experiments with Gray-Scott and RBC, run the following commands on the specified machines within the directory that contains this README.
+
+.. code-block:: bash
+
+    # run on JUWELS
+    python analysis_scripts/parallel_scaling.py --mode=run --problem=GS3D --XPU=CPU --space_time=False
+    python analysis_scripts/parallel_scaling.py --mode=run --problem=GS3D --XPU=CPU --space_time=True
+
+    # run on JUWELS booster
+    python analysis_scripts/parallel_scaling.py --mode=run --problem=GS3D --XPU=GPU --space_time=False
+    python analysis_scripts/parallel_scaling.py --mode=run --problem=GS3D --XPU=GPU --space_time=True
+
+    # run on JURECA DC
+    python analysis_scripts/parallel_scaling.py --mode=run --problem=RBC --XPU=CPU --space_time=False
+    python analysis_scripts/parallel_scaling.py --mode=run --problem=RBC --XPU=CPU --space_time=True
+
+    # run on JUWELS booster
+    python analysis_scripts/parallel_scaling.py --mode=run --problem=RBC --XPU=GPU --space_time=False
+    python analysis_scripts/parallel_scaling.py --mode=run --problem=RBC --XPU=GPU --space_time=True
+
+These commands will submit a bunch of jobscripts with the individual runs.
+Keep in mind that these are specific to a compute project and some paths are account-specific.
+Most likely, you will have to change options at the top of the file `./etc/generate_jobscript.py` before you can run anything.
+Also, notice that you may not be allowed to request all resources needed for the largest Gray-Scott GPU run during normal operation of JUWELS booster.
+
+After all jobs have run to completion, you have recorded all scaling data and may plot the results with the following command:
+
+.. code-block:: bash
+
+    python paper_plots.py --target=thesis
+
+In order to run the production runs, modify the `path` class attribute of `LargeSim` in `analysis_scripts/large_simulations.py`.
+Then use the following commands on the specified machines:
+
+.. code-block:: bash
+
+    # run on JUWELS booster
+    python analysis_scripts/large_simulations.py --mode=run --problem=GS --XPU=GPU
+
+    # run on JURECA DC
+    python analysis_scripts/large_simulations.py --mode=run --problem=RBC --XPU=CPU
+
+Plotting the results of the Gray-Scott simulation requires a lot of memory and will take very long.
+Modify the paths in `analysis_scripts/plot_large_simulations.py` and then run:
+
+.. code-block:: bash
+
+    python analysis_scripts/3d_plot_GS_large.py --base_path=<path>
+    python analysis_scripts/plot_large_simulations.py --problem=GS
+
+Plotting the results of the Rayleigh-Benard production run is more easy.
+After modifying the paths as earlier, run the following commands:
+
+.. code-block:: bash
+
+    python analysis_scripts/large_simulations.py --mode=plot --problem=RBC --XPU=CPU
+    python analysis_scripts/large_simulations.py --mode=video --problem=RBC --XPU=CPU
+    python analysis_scripts/plot_large_simulations.py --problem=RBC
+
+Run scripts with `--help` to learn more about parameters.
+Keep in mind that not all features are supported with all problems.
diff --git a/pySDC/projects/GPU/analysis_scripts/3d_plot_GS_large.py b/pySDC/projects/GPU/analysis_scripts/3d_plot_GS_large.py
@@ -0,0 +1,162 @@
+import numpy as np
+import pickle
+import pyvista as pv
+import subprocess
+import gc
+from mpi4py import MPI
+from tqdm import tqdm
+
+
+class Grid:
+    def __init__(self, grid, label, zoom_range):
+        self.grid = grid
+        self.label = label
+        self.zoom_range = zoom_range
+
+    def get_camera_path(self):
+        import os
+
+        os.makedirs('etc/cameras', exist_ok=True)
+        return f'etc/cameras/cam{self.label}_{len(self.grid.x)}.pickle'
+
+    def get_camera_pos(self):
+        with open(self.get_camera_path(), 'rb') as file:
+            return pickle.load(file)
+
+    def set_camera_pos(self, pos):
+        with open(self.get_camera_path(), 'wb') as file:
+            pickle.dump(pos, file)
+
+    def get_zoom(self, frame):
+        return (self.zoom_range[1] - self.zoom_range[0]) * frame / 100 + self.zoom_range[0]
+
+
+def plot(
+    n_time,
+    n_space,
+    useGPU,
+    n_frames,
+    base_path,
+    space_range,
+    res,
+    start_frame=0,
+    plot_resolution=2048,
+    n_samples=1024,
+    zoom=1e-2,
+):  # pragma: no cover
+    comm = MPI.COMM_WORLD
+
+    space_range = tqdm(space_range)
+    space_range.set_description('load files')
+
+    for frame in range(start_frame, n_frames, comm.size):
+        i = frame + comm.rank
+
+        v = None
+        gc.collect()
+        for procs in space_range:
+            gc.collect()
+
+            path = f'{base_path}/GrayScottLarge-res_{res}-useGPU_{useGPU}-procs_1_{n_time}_{n_space}-0-{procs}-solution_{i:06d}.pickle'
+            with open(path, 'rb') as file:
+                _data = pickle.load(file)
+
+            if v is None:
+                shape = _data['shape']
+                v = pv.ImageData(dimensions=shape, spacing=tuple([1 / me for me in shape]))
+                v['values'] = np.zeros(np.prod(shape))
+
+            local_slice_flat = slice(np.prod(_data['v'].shape) * procs, np.prod(_data['v'].shape) * (procs + 1))
+            v['values'][local_slice_flat] = _data['v'].flatten()
+
+        # sampled = Grid(pv.ImageData(dimensions=(n_samples,) * 3, spacing=(1 / n_samples,) * 3), '', [1.0, 1.0])
+        zoomed = Grid(
+            pv.ImageData(dimensions=(n_samples,) * 3, spacing=(zoom / n_samples,) * 3, origin=[0.8, 0.47, 0.03]),
+            '_zoomed',
+            [1.0, 1.2],
+        )
+
+        for grid in [zoomed]:
+
+            p = pv.Plotter(off_screen=True)
+            contours = grid.grid.sample(v, progress_bar=True, categorical=True).contour(
+                isosurfaces=[0.3], method='flying_edges', progress_bar=True
+            )
+
+            p.add_mesh(contours, opacity=0.5, cmap=['teal'])
+            p.remove_scalar_bar()
+            p.camera.azimuth += 15
+
+            p.camera.Elevation(0.9)
+            plotting_path = './simulation_plots/'
+
+            path = f'{plotting_path}/GS_large_{i:06d}{grid.label}.png'
+
+            if frame == 0:
+                grid.set_camera_pos(p.camera.position)
+
+            p.camera.position = tuple(pos * grid.get_zoom(frame) for pos in grid.get_camera_pos())
+
+            p.screenshot(path, window_size=(plot_resolution,) * 2)
+            print(f'Saved {path}', flush=True)
+
+
+def video(view=None):  # pragma: no cover
+    path = f'simulation_plots/GS_large_%06d{view}.png'
+    path_target = f'videos/GS_large{view}.mp4'
+
+    cmd = f'ffmpeg -i {path} -pix_fmt yuv420p -r 9 -s 2048:1536 -y {path_target}'.split()
+
+    subprocess.run(cmd)
+
+
+if __name__ == '__main__':
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--XPU', type=str, choices=['CPU', 'GPU'], default='GPU')
+    parser.add_argument('--mode', type=str, choices=['plot', 'video'], default='plot')
+    parser.add_argument('--nframes', type=int, default=100)
+    parser.add_argument('--start', type=int, default=0)
+    parser.add_argument('--base_path', type=str, default='/p/scratch/ccstma/baumann7/large_runs/data/')
+    parser.add_argument('--space_range', type=int, default=None)
+    parser.add_argument('--zoom', type=float, default=9e-2)
+    parser.add_argument('--n_samples', type=int, default=1024)
+    args = parser.parse_args()
+
+    from pySDC.projects.GPU.analysis_scripts.large_simulations import GSLarge
+
+    sim = GSLarge()
+
+    if args.XPU == 'CPU':
+        sim.setup_CPU_params()
+    elif args.XPU == 'GPU':
+        sim.setup_GPU_params()
+    else:
+        raise NotImplementedError()
+
+    space_range = range(sim.params['procs'][2] if args.space_range is None else args.space_range)
+
+    if args.mode == 'plot':
+        pv.global_theme.allow_empty_mesh = True
+        try:
+            pv.start_xvfb(window_size=(4096, 4096))
+        except OSError:
+            pass
+        plot(
+            n_time=sim.params['procs'][1],
+            n_space=sim.params['procs'][2],
+            useGPU=sim.params['useGPU'],
+            base_path=args.base_path,
+            space_range=space_range,
+            n_frames=args.nframes,
+            res=sim.params['res'],
+            start_frame=args.start,
+            n_samples=args.n_samples,
+            zoom=args.zoom,
+        )
+    elif args.mode == 'video':
+        for view in ['', '_zoomed']:
+            video(view)
+    else:
+        raise NotImplementedError()