Parallel-in-Time
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/conf.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/conf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pySDC/helpers/fieldsIO.py‎
Lines changed: 34 additions & 55 deletions b/‎pySDC/helpers/fieldsIO.py‎
Lines changed: 34 additions & 55 deletions
diff --git a/‎pySDC/projects/GPU/analysis_scripts/parallel_scaling.py‎
Lines changed: 58 additions & 30 deletions b/‎pySDC/projects/GPU/analysis_scripts/parallel_scaling.py‎
Lines changed: 58 additions & 30 deletions
@@ -112,7 +112,7 @@ Any contribution is dearly welcome! If you want to contribute, please take the t
 This project has received funding from the [European High-Performance
 Computing Joint Undertaking](https://eurohpc-ju.europa.eu/) (JU) under
 grant agreement No 955701 ([TIME-X](https://www.time-x-eurohpc.eu/))
-and grant agreement No 101118139.
+and grant agreement No 101118139. 
 The JU receives support from the European Union's Horizon 2020 research
 and innovation programme and Belgium, France, Germany, and Switzerland.
 This project also received funding from the [German Federal Ministry of
 
@@ -72,7 +72,7 @@
 # The short X.Y version.
 version = '5.6'
 # The full version, including alpha/beta/rc tags.
-release = '5.6
+release = '5.6'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 
@@ -44,14 +44,13 @@
 
 Warning
 -------
-To use MPI collective writing, you need to call first the class methods :class:`Rectilinear.initMPI` (cf their docstring).
+To use MPI collective writing, you need to call first the class methods :class:`Rectilinear.setupMPI` (cf their docstring).
 Also, `Rectilinear.setHeader` **must be given the global grids coordinates**, whether the code is run in parallel or not.
 """
 import os
 import numpy as np
 from typing import Type, TypeVar
 import logging
-import itertools
 
 T = TypeVar("T")
 
@@ -61,11 +60,17 @@
     except ImportError:
         pass
     from mpi4py import MPI
+    from mpi4py.util.dtlib import from_numpy_dtype as MPI_DTYPE
 except ImportError:
 
     class MPI:
         COMM_WORLD = None
         Intracomm = T
+        File = T
+        Datatype = T
+
+    def MPI_DTYPE():
+        pass
 
 
 # Supported data types
@@ -417,6 +422,8 @@ def setHeader(self, nVar, coords):
         coords = self.setupCoords(*coords)
         self.header = {"nVar": int(nVar), "coords": coords}
         self.nItems = nVar * self.nDoF
+        if self.MPI_ON:
+            self.MPI_SETUP()
 
     @property
     def hInfos(self):
@@ -438,6 +445,8 @@ def readHeader(self, f):
         gridSizes = np.fromfile(f, dtype=np.int32, count=dim)
         coords = [np.fromfile(f, dtype=np.float64, count=n) for n in gridSizes]
         self.setHeader(nVar, coords)
+        if self.MPI_ON:
+            self.MPI_SETUP()
 
     def reshape(self, fields: np.ndarray):
         """Reshape the fields to a N-d array (inplace operation)"""
@@ -498,7 +507,6 @@ def toVTR(self, baseName, varNames, idxFormat="{:06d}"):
     # MPI-parallel implementation
     # -------------------------------------------------------------------------
     comm: MPI.Intracomm = None
-    _nCollectiveIO = None
 
     @classmethod
     def setupMPI(cls, comm: MPI.Intracomm, iLoc, nLoc):
@@ -519,20 +527,8 @@ def setupMPI(cls, comm: MPI.Intracomm, iLoc, nLoc):
         cls.iLoc = iLoc
         cls.nLoc = nLoc
         cls.mpiFile: MPI.File = None
-        cls._nCollectiveIO = None
-
-    @property
-    def nCollectiveIO(self):
-        """
-        Number of collective IO operations over all processes, when reading or writing a field.
-
-        Returns:
-        --------
-        int: Number of collective IO accesses
-        """
-        if self._nCollectiveIO is None:
-            self._nCollectiveIO = self.comm.allreduce(self.nVar * np.prod(self.nLoc[:-1]), op=MPI.MAX)
-        return self._nCollectiveIO
+        cls.mpiType: MPI.Datatype = None
+        cls.mpiFileType: MPI.Datatype = None
 
     @property
     def MPI_ON(self):
@@ -548,6 +544,16 @@ def MPI_ROOT(self):
             return True
         return self.comm.Get_rank() == 0
 
+    def MPI_SETUP(self):
+        """Setup subarray masks for each processes"""
+        self.mpiType = MPI_DTYPE(self.dtype)
+        self.mpiFileType = self.mpiType.Create_subarray(
+            [self.nVar, *self.gridSizes],  # Global array sizes
+            [self.nVar, *self.nLoc],  # Local array sizes
+            [0, *self.iLoc],  # Global starting indices of local blocks
+        )
+        self.mpiFileType.Commit()
+
     def MPI_FILE_OPEN(self, mode):
         """Open the binary file in MPI mode"""
         amode = {
@@ -572,7 +578,8 @@ def MPI_WRITE_AT_ALL(self, offset, data: np.ndarray):
         data : np.ndarray
             Data to be written in the binary file.
         """
-        self.mpiFile.Write_at_all(offset, data)
+        self.mpiFile.Set_view(disp=offset, etype=self.mpiType, filetype=self.mpiFileType)
+        self.mpiFile.Write_all(data)
 
     def MPI_READ_AT_ALL(self, offset, data: np.ndarray):
         """
@@ -586,7 +593,8 @@ def MPI_READ_AT_ALL(self, offset, data: np.ndarray):
         data : np.ndarray
             Array on which to read the data from the binary file.
         """
-        self.mpiFile.Read_at_all(offset, data)
+        self.mpiFile.Set_view(disp=offset, etype=self.mpiType, filetype=self.mpiFileType)
+        self.mpiFile.Read_all(data)
 
     def MPI_FILE_CLOSE(self):
         """Close the binary file in MPI mode"""
@@ -637,33 +645,15 @@ def addField(self, time, field):
             *self.nLoc,
         ), f"expected {(self.nVar, *self.nLoc)} shape, got {field.shape}"
 
-        offset0 = self.fileSize
+        offset = self.fileSize
         self.MPI_FILE_OPEN(mode="a")
-        nWrites = 0
-        nCollectiveIO = self.nCollectiveIO
 
         if self.MPI_ROOT:
             self.MPI_WRITE(np.array(time, dtype=T_DTYPE))
-        offset0 += self.tSize
-
-        for (iVar, *iBeg) in itertools.product(range(self.nVar), *[range(n) for n in self.nLoc[:-1]]):
-            offset = offset0 + self.iPos(iVar, iBeg) * self.itemSize
-            self.MPI_WRITE_AT_ALL(offset, field[(iVar, *iBeg)])
-            nWrites += 1
-
-        for _ in range(nCollectiveIO - nWrites):
-            # Additional collective write to catch up with other processes
-            self.MPI_WRITE_AT_ALL(offset0, field[:0])
-
+        offset += self.tSize
+        self.MPI_WRITE_AT_ALL(offset, field)
         self.MPI_FILE_CLOSE()
 
-    def iPos(self, iVar, iX):
-        iPos = iVar * self.nDoF
-        for axis in range(self.dim - 1):
-            iPos += (self.iLoc[axis] + iX[axis]) * np.prod(self.gridSizes[axis + 1 :])
-        iPos += self.iLoc[-1]
-        return iPos
-
     def readField(self, idx):
         """
         Read one field stored in the binary file, corresponding to the given
@@ -689,26 +679,15 @@ def readField(self, idx):
             return super().readField(idx)
 
         idx = self.formatIndex(idx)
-        offset0 = self.hSize + idx * (self.tSize + self.fSize)
+        offset = self.hSize + idx * (self.tSize + self.fSize)
         with open(self.fileName, "rb") as f:
-            t = float(np.fromfile(f, dtype=T_DTYPE, count=1, offset=offset0)[0])
-        offset0 += self.tSize
+            t = float(np.fromfile(f, dtype=T_DTYPE, count=1, offset=offset)[0])
+        offset += self.tSize
 
         field = np.empty((self.nVar, *self.nLoc), dtype=self.dtype)
 
         self.MPI_FILE_OPEN(mode="r")
-        nReads = 0
-        nCollectiveIO = self.nCollectiveIO
-
-        for (iVar, *iBeg) in itertools.product(range(self.nVar), *[range(n) for n in self.nLoc[:-1]]):
-            offset = offset0 + self.iPos(iVar, iBeg) * self.itemSize
-            self.MPI_READ_AT_ALL(offset, field[(iVar, *iBeg)])
-            nReads += 1
-
-        for _ in range(nCollectiveIO - nReads):
-            # Additional collective read to catch up with other processes
-            self.MPI_READ_AT_ALL(offset0, field[:0])
-
+        self.MPI_READ_AT_ALL(offset, field)
         self.MPI_FILE_CLOSE()
 
         return t, field
 
@@ -81,13 +81,17 @@ def run_scaling_test(self, **kwargs):
                     **kwargs,
                 )
 
-    def plot_scaling_test(self, ax, quantity='time', **plotting_params):  # pragma: no cover
+    def plot_scaling_test(self, ax, quantity='time', space_time=None, **plotting_params):  # pragma: no cover
         from matplotlib.colors import TABLEAU_COLORS
 
         cmap = TABLEAU_COLORS
         colors = list(cmap.values())
 
         for experiment in self.experiments:
+            if space_time is not None:
+                if not experiment.PinT == space_time:
+                    continue
+
             tasks_time = self.tasks_time if experiment.PinT else 1
             timings = {}
 
@@ -141,20 +145,30 @@ def plot_scaling_test(self, ax, quantity='time', **plotting_params):  # pragma:
                     elif quantity == 'throughput_per_task':
                         timings[np.prod(procs)] = experiment.res**self.ndim / t_mean
                     elif quantity == 'efficiency':
+                        if type(config).__name__ == 'GrayScottScaling3D':
+                            norm = 13216322.909
+                        else:
+                            norm = 1
                         timings[np.prod(procs) / self.tasks_per_node] = (
-                            experiment.res**self.ndim / t_mean / np.prod(procs)
+                            experiment.res**self.ndim / t_mean / np.prod(procs) / norm
                         )
                     elif quantity == 'time':
                         timings[np.prod(procs) / self.tasks_per_node] = t_mean
                     elif quantity == 'time_per_task':
                         timings[np.prod(procs)] = t_mean
                     elif quantity == 'min_time_per_task':
                         timings[np.prod(procs)] = t_min
+                    elif quantity == 'min_time':
+                        timings[np.prod(procs) / self.tasks_per_node] = t_min
                     else:
                         raise NotImplementedError
                 except (FileNotFoundError, ValueError):
                     pass
 
+            if quantity == 'efficiency' and type(config).__name__ == 'RayleighBenard_scaling':
+                norm = max(timings.values())
+                timings = {key: value / norm for key, value in timings.items()}
+
             ax.loglog(
                 timings.keys(),
                 timings.values(),
@@ -171,7 +185,8 @@ def plot_scaling_test(self, ax, quantity='time', **plotting_params):  # pragma:
             'time': r'$t_\mathrm{step}$ / s',
             'time_per_task': r'$t_\mathrm{step}$ / s',
             'min_time_per_task': r'minimal $t_\mathrm{step}$ / s',
-            'efficiency': 'efficiency / DoF/s/task',
+            'min_time': r'minimal $t_\mathrm{step}$ / s',
+            'efficiency': r'parallel efficiency / \%',
         }
         ax.set_ylabel(labels[quantity])
 
@@ -331,17 +346,28 @@ class RayleighBenardDedalusComparisonGPU(GPUConfig, ScalingConfig):
     ]
 
 
-def plot_scalings(problem, **kwargs):  # pragma: no cover
+def plot_scalings(problem, XPU=None, space_time=None, **kwargs):  # pragma: no cover
     if problem == 'GS3D':
-        configs = [
-            GrayScottSpaceScalingCPU3D(),
-            GrayScottSpaceScalingGPU3D(),
-        ]
+        if XPU == 'CPU':
+            configs = [GrayScottSpaceScalingCPU3D()]
+        elif XPU == 'GPU':
+            configs = [GrayScottSpaceScalingGPU3D()]
+        else:
+            configs = [GrayScottSpaceScalingCPU3D(), GrayScottSpaceScalingGPU3D()]
     elif problem == 'RBC':
-        configs = [
-            RayleighBenardSpaceScalingGPU(),
-            RayleighBenardSpaceScalingCPU(),
-        ]
+        if XPU == 'CPU':
+            configs = [
+                RayleighBenardSpaceScalingCPU(),
+            ]
+        elif XPU == 'GPU':
+            configs = [
+                RayleighBenardSpaceScalingGPU(),
+            ]
+        else:
+            configs = [
+                RayleighBenardSpaceScalingGPU(),
+                RayleighBenardSpaceScalingCPU(),
+            ]
     elif problem == 'RBC_dedalus':
         configs = [
             RayleighBenardDedalusComparison(),
@@ -358,31 +384,26 @@ def plot_scalings(problem, **kwargs):  # pragma: no cover
         ('RBC', 'time'): {'x': [1 / 10, 64], 'y': [60, 60 / 640]},
         ('RBC', 'time_per_task'): {'x': [1, 640], 'y': [60, 60 / 640]},
         ('RBC', 'min_time_per_task'): {'x': [1, 640], 'y': [60, 60 / 640]},
+        ('RBC', 'min_time'): {'x': [1, 640], 'y': [60, 60 / 640]},
         ('RBC', 'throughput_per_task'): {'x': [1 / 1, 640], 'y': [2e4, 2e4 * 640]},
     }
 
-    fig, ax = plt.subplots(figsize=figsize_by_journal('TUHH_thesis', 1, 0.6))
-    configs[1].plot_scaling_test(ax=ax, quantity='efficiency')
-    # ax.legend(frameon=False)
-    box = ax.get_position()
-    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
-    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
-
-    ax.set_yscale('linear')
-    path = f'{PROJECT_PATH}/plots/scaling_{problem}_efficiency.pdf'
-    fig.savefig(path, bbox_inches='tight')
-    print(f'Saved {path!r}', flush=True)
-
-    for quantity in ['time', 'throughput', 'time_per_task', 'throughput_per_task', 'min_time_per_task'][::-1]:
+    for quantity in ['time', 'throughput', 'time_per_task', 'throughput_per_task', 'min_time_per_task', 'efficiency'][
+        ::-1
+    ]:
         fig, ax = plt.subplots(figsize=figsize_by_journal('TUHH_thesis', 1, 0.6))
         for config in configs:
-            config.plot_scaling_test(ax=ax, quantity=quantity)
+            config.plot_scaling_test(ax=ax, quantity=quantity, space_time=space_time)
         if (problem, quantity) in ideal_lines.keys():
             ax.loglog(*ideal_lines[(problem, quantity)].values(), color='black', ls=':', label='ideal')
+        elif quantity == 'efficiency':
+            ax.axhline(1, color='black', ls=':', label='ideal')
+            ax.set_yscale('linear')
+            ax.set_ylim(0, 1.1)
         box = ax.get_position()
         ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
         ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
-        path = f'{PROJECT_PATH}/plots/scaling_{problem}_{quantity}.pdf'
+        path = f'{PROJECT_PATH}/plots/scaling_{problem}_{quantity}_{XPU}_{space_time}.pdf'
         fig.savefig(path, bbox_inches='tight')
         print(f'Saved {path!r}', flush=True)
 
@@ -393,8 +414,8 @@ def plot_scalings(problem, **kwargs):  # pragma: no cover
     parser = argparse.ArgumentParser()
     parser.add_argument('--mode', type=str, choices=['run', 'plot'], default='run')
     parser.add_argument('--problem', type=str, default='GS')
-    parser.add_argument('--XPU', type=str, choices=['CPU', 'GPU'], default='CPU')
-    parser.add_argument('--space_time', type=str, choices=['True', 'False'], default='False')
+    parser.add_argument('--XPU', type=str, choices=['CPU', 'GPU', 'both'], default='CPU')
+    parser.add_argument('--space_time', type=str, choices=['True', 'False', 'None'], default='False')
     parser.add_argument('--submit', type=str, choices=['True', 'False'], default='True')
     parser.add_argument('--nsys_profiling', type=str, choices=['True', 'False'], default='False')
 
@@ -403,6 +424,13 @@ def plot_scalings(problem, **kwargs):  # pragma: no cover
     submit = args.submit == 'True'
     nsys_profiling = args.nsys_profiling == 'True'
 
+    if args.space_time == 'True':
+        space_time = True
+    elif args.space_time == 'False':
+        space_time = False
+    else:
+        space_time = None
+
     config_classes = []
 
     if args.problem == 'GS3D':
@@ -429,6 +457,6 @@ def plot_scalings(problem, **kwargs):  # pragma: no cover
         if args.mode == 'run':
             config.run_scaling_test(submit=submit, nsys_profiling=nsys_profiling)
         elif args.mode == 'plot':
-            plot_scalings(problem=args.problem)
+            plot_scalings(problem=args.problem, XPU=args.XPU, space_time=space_time)
         else:
             raise NotImplementedError(f'Don\'t know mode {args.mode!r}')