TL: block decomposition and first tests for fieldsIO

tlunet · tlunet · commit 5a5375ee6068 · 2025-01-16T16:10:44.000+01:00
diff --git a/pySDC/playgrounds/dedalus/fieldsIO/README.md b/pySDC/playgrounds/dedalus/fieldsIO/README.md
@@ -0,0 +1,3 @@
+# Testing the FieldsIO
+
+- generate seq, read MPI
diff --git a/pySDC/playgrounds/dedalus/fieldsIO/base.py b/pySDC/playgrounds/dedalus/fieldsIO/base.py
@@ -4,10 +4,17 @@
 Base generic script for fields IO
 """
 import os
+import sys
 import numpy as np
 from typing import Type, TypeVar
-from mpi4py import MPI
-from time import time
+try:
+    from mpi4py import MPI
+except ImportError:
+    pass
+
+
+from time import time, sleep
+from blocks import BlockDecomposition
 
 T = TypeVar("T")
 
@@ -230,6 +237,7 @@ def setupMPI(cls, comm:MPI.Intracomm, iLocX, nLocX):
         cls.comm = comm
         cls.iLocX = iLocX
         cls.nLocX = nLocX
+        cls.mpiFile = None
 
     @property
     def MPI_ON(self):
@@ -241,18 +249,33 @@ def MPI_ROOT(self):
         if self.comm is None: return True
         return self.comm.Get_rank() == 0
 
-    def MPI_FILE_OPEN(self, mode)->MPI.File:
+    def MPI_FILE_OPEN(self, mode):
         amode = {
             "r": MPI.MODE_RDONLY,
             "a": MPI.MODE_WRONLY | MPI.MODE_APPEND,
             }[mode]
-        return MPI.File.Open(self.comm, self.fileName, amode)
+        self.mpiFile = MPI.File.Open(self.comm, self.fileName, amode)
+
+    def MPI_WRITE(self, data):
+        self.mpiFile.Write(data)
+
+    def MPI_WRITE_AT(self, offset, data:np.ndarray):
+        self.mpiFile.Write_at(offset, data)
+
+    def MPI_READ_AT(self, offset, data):
+        self.mpiFile.Read_at(offset, data)
+
+    def MPI_FILE_CLOSE(self):
+        self.mpiFile.Close()
+        self.mpiFile = None
 
     def initialize(self):
         if self.MPI_ROOT:
             super().initialize()
-        self.comm.Barrier()
-        self.initialized = True
+        if self.MPI_ON:
+            self.comm.Barrier()
+            self.initialized = True
+
 
     def addField(self, time, field):
         if not self.MPI_ON: return super().addField(time, field)
@@ -265,15 +288,15 @@ def addField(self, time, field):
             f"expected {(self.nVar, self.nLocX)} shape, got {field.shape}"
 
         offset0 = self.fileSize
-        mpiFile = self.MPI_FILE_OPEN(mode="a")
+        self.MPI_FILE_OPEN(mode="a")
         if self.MPI_ROOT:
-            mpiFile.Write(np.array(time, dtype=T_DTYPE))
+            self.MPI_WRITE(np.array(time, dtype=T_DTYPE))
         offset0 += self.tSize
 
         for iVar in range(self.nVar):
             offset = offset0 + (iVar*self.nX + self.iLocX)*self.itemSize
-            mpiFile.Write_at_all(offset, field[iVar])
-        mpiFile.Close()
+            self.MPI_WRITE_AT(offset, field[iVar])
+        self.MPI_FILE_CLOSE()
 
 
     def readField(self, idx):
@@ -287,11 +310,11 @@ def readField(self, idx):
 
         field = np.empty((self.nVar, self.nLocX), dtype=self.dtype)
 
-        mpiFile = self.MPI_FILE_OPEN(mode="r")
+        self.MPI_FILE_OPEN(mode="r")
         for iVar in range(self.nVar):
             offset = offset0 + (iVar*self.nX + self.iLocX)*self.itemSize
-            mpiFile.Read_at_all(offset, field[iVar])
-        mpiFile.Close()
+            self.MPI_READ_AT(offset, field[iVar])
+        self.MPI_FILE_CLOSE()
 
         return t, field
 
@@ -331,9 +354,7 @@ def readHeader(self, f):
     # -------------------------------------------------------------------------
     @classmethod
     def setupMPI(cls, comm:MPI.Intracomm, iLocX, nLocX, iLocY, nLocY):
-        cls.comm = comm
-        cls.iLocX = iLocX
-        cls.nLocX = nLocX
+        super().setupMPI(comm, iLocX, nLocX)
         cls.iLocY = iLocY
         cls.nLocY = nLocY
 
@@ -349,18 +370,18 @@ def addField(self, time, field):
             f"expected {(self.nVar, self.nLocX, self.nLocY)} shape, got {field.shape}"
 
         offset0 = self.fileSize
-        mpiFile = self.MPI_FILE_OPEN(mode="a")
+        self.MPI_FILE_OPEN(mode="a")
         if self.MPI_ROOT:
-            mpiFile.Write(np.array(time, dtype=T_DTYPE))
+            self.MPI_WRITE(np.array(time, dtype=T_DTYPE))
         offset0 += self.tSize
 
         for iVar in range(self.nVar):
             for iX in range(self.nLocX):
                 offset = offset0 + (
                     iVar*self.nX*self.nY + (self.iLocX + iX)*self.nY + self.iLocY
                     )*self.itemSize
-                mpiFile.Write_at_all(offset, field[iVar, iX])
-        mpiFile.Close()
+                self.MPI_WRITE_AT(offset, field[iVar, iX])
+        self.MPI_FILE_CLOSE()
 
 
     def readField(self, idx):
@@ -374,14 +395,14 @@ def readField(self, idx):
 
         field = np.empty((self.nVar, self.nLocX, self.nLocY), dtype=self.dtype)
 
-        mpiFile = self.MPI_FILE_OPEN(mode="r")
+        self.MPI_FILE_OPEN(mode="r")
         for iVar in range(self.nVar):
             for iX in range(self.nLocX):
                 offset = offset0 + (
                     iVar*self.nX*self.nY + (self.iLocX + iX)*self.nY + self.iLocY
                     )*self.itemSize
-                mpiFile.Read_at_all(offset, field[iVar, iX])
-        mpiFile.Close()
+                self.MPI_READ_AT(offset, field[iVar, iX])
+        self.MPI_FILE_CLOSE()
 
         return t, field
 
@@ -404,7 +425,7 @@ def readField(self, idx):
     y = np.linspace(0, 1, num=64, endpoint=False)
     nY = y.size
 
-    dim = 2
+    dim = 1
     dType = np.float64
 
     if dim == 1:
@@ -416,42 +437,40 @@ def readField(self, idx):
     comm = MPI.COMM_WORLD
     MPI_SIZE = comm.Get_size()
     MPI_RANK = comm.Get_rank()
+
+    gridSizes = u0.shape[1:]
+    algo = sys.argv[1] if len(sys.argv) > 1 else "ChatGPT"
+    blocks = BlockDecomposition(MPI_SIZE, gridSizes, algo, MPI_RANK)
+    bounds = blocks.localBounds
     if MPI_SIZE > 1:
         fileName = "test_MPI.pysdc"
-        if dim == 1:
-            pSizeX = MPI_SIZE
-            pRankX = MPI_RANK
-        if dim == 2:
-            assert MPI_SIZE == 4
-            pSizeX = MPI_SIZE // 2
-            pRankX = MPI_RANK // 2
-            pSizeY = MPI_SIZE // 2
-            pRankY = MPI_RANK % 2
-    else:
-        pSizeX, pRankX = 1, 0
-        pSizeY, pRankY = 1, 0
-
-    def decomposeDirection(nItems, pSize, pRank):
-        n0 = nItems // pSize
-        nRest = nItems - pSize*n0
-        nLoc = n0 + 1*(pRank < nRest)
-        iLoc = pRank*n0 + nRest*(pRank >= nRest) + pRank*(pRank < nRest)
-        return iLoc, nLoc
-
-
-    iLocX, nLocX = decomposeDirection(nX, pSizeX, pRankX)
+
+
     if dim == 1:
+        (iLocX, ), (nLocX, ) = bounds
+        pRankX, = blocks.ranks
         Cart1D.setupMPI(comm, iLocX, nLocX)
         u0 = u0[:, iLocX:iLocX+nLocX]
 
+        MPI.COMM_WORLD.Barrier()
+        sleep(0.01*MPI_RANK)
+        print(f"[Rank {MPI_RANK}] pRankX={pRankX} ({iLocX}, {nLocX})")
+        MPI.COMM_WORLD.Barrier()
+
         f1 = Cart1D(dType, fileName)
         f1.setHeader(nVar=u0.shape[0], gridX=x)
 
     if dim == 2:
-        iLocY, nLocY = decomposeDirection(nY, pSizeY, pRankY)
+        (iLocX, iLocY), (nLocX, nLocY) = bounds
+        pRankX, pRankY = blocks.ranks
         Cart2D.setupMPI(comm, iLocX, nLocX, iLocY, nLocY)
         u0 = u0[:, iLocX:iLocX+nLocX, iLocY:iLocY+nLocY]
 
+        MPI.COMM_WORLD.Barrier()
+        sleep(0.01*MPI_RANK)
+        print(f"[Rank {MPI_RANK}] pRankX={pRankX} ({iLocX}, {nLocX}), pRankY={pRankY} ({iLocY}, {nLocY})")
+        MPI.COMM_WORLD.Barrier()
+
         f1 = Cart2D(dType, fileName)
         f1.setHeader(nVar=u0.shape[0], gridX=x, gridY=y)
 
@@ -465,7 +484,7 @@ def decomposeDirection(nItems, pSize, pRank):
     for t in np.arange(nTimes)/nTimes:
         f1.addField(t, t*u0)
     if MPI_RANK == 0:
-        print(f" -> done in {time()-tBeg:1.2f}s !")
+        print(f" -> done in {time()-tBeg:1.4f}s !")
 
     f2 = FieldsIO.fromFile(fileName)
     t, u = f2.readField(2)
diff --git a/pySDC/playgrounds/dedalus/fieldsIO/blocks.py b/pySDC/playgrounds/dedalus/fieldsIO/blocks.py
@@ -0,0 +1,139 @@
+
+class BlockDecomposition(object):
+    """
+    Class decomposing a cartesian space domain (1D to 3D) into a given number of processors.
+
+    Parameters
+    ----------
+    nProcs : int
+        Total number of processors for space block decomposition.
+    gridSizes : list[int]
+        Number of grid points in each dimension
+    algo : str, optional
+        Algorithm used for hte block decomposition :
+
+        - Hybrid : approach minimizing interface communication, taken from
+        the `[Hybrid CFD solver] <https://web.stanford.edu/group/ctr/ResBriefs07/5_larsson1_pp47_58.pdf>`_.
+        - ChatGPT : quickly generated using `[ChatGPT] <https://chatgpt.com>`_.
+
+        The default is "Hybrid".
+    gRank : int, optional
+        If provided, the global rank that will determine the local block distribution. Default is None.
+    order : str, optional
+        The order used when computing the rank block distribution. Default is `C`.
+    """
+
+    def __init__(self, nProcs, gridSizes, algo="Hybrid", gRank=None, order="C"):
+        dim = len(gridSizes)
+        assert dim in [1, 2, 3], "block decomposition only works for 1D, 2D or 3D domains"
+
+        if algo == "ChatGPT":
+
+            nBlocks = [1]*dim
+            for i in range(2, int(nProcs**0.5) + 1):
+                while nProcs % i == 0:
+                    nBlocks[0] *= i
+                    nProcs //= i
+                    nBlocks.sort()
+
+            if nProcs > 1:
+                nBlocks[0] *= nProcs
+
+            nBlocks.sort()
+            while len(nBlocks) < dim:
+                smallest = nBlocks.pop(0)
+                nBlocks += [1, smallest]
+                nBlocks.sort()
+
+            while len(nBlocks) > dim:
+                smallest = nBlocks.pop(0)
+                next_smallest = nBlocks.pop(0)
+                nBlocks.append(smallest * next_smallest)
+                nBlocks.sort()
+
+        elif algo == "Hybrid":
+            rest = nProcs
+            facs = {
+                1: [1],
+                2: [2, 1],
+                3: [2, 3, 1],
+                }[dim]
+            exps = [0]*dim
+            for n in range(dim-1):
+                while (rest % facs[n]) == 0:
+                    exps[n] = exps[n] + 1
+                    rest = rest // facs[n]
+            if (rest > 1):
+                facs[dim-1] = rest
+                exps[dim-1] = 1
+
+            nBlocks = [1]*dim
+            for n in range(dim-1, -1, -1):
+                while exps[n] > 0:
+                    dummymax = -1
+                    dmax = 0
+                    for d, nPts in enumerate(gridSizes):
+                        dummy = (nPts + nBlocks[d] - 1) // nBlocks[d]
+                        if (dummy >= dummymax):
+                            dummymax = dummy
+                            dmax = d
+                    nBlocks[dmax] = nBlocks[dmax] * facs[n]
+                    exps[n] = exps[n] - 1
+
+        else:
+            raise NotImplementedError(f"algo={algo}")
+
+        # Store attributes
+        self.dim = dim
+        self.nBlocks = nBlocks
+        self.gridSizes = gridSizes
+
+        # Used for rank block distribution
+        self.gRank = gRank
+        self.order = order
+
+    @property
+    def ranks(self):
+        gRank, order = self.gRank, self.order
+        assert gRank is not None, "gRank attribute need to be set"
+        dim, nBlocks = self.dim, self.nBlocks
+        if dim == 1:
+            return (gRank, )
+        elif dim == 2:
+            div = nBlocks[-1] if order == "C" else nBlocks[0]
+            return (gRank // div, gRank % div)
+        else:
+            raise NotImplementedError(f"dim={dim}")
+
+    @property
+    def localBounds(self):
+        iLocList, nLocList = [], []
+        for rank, nPoints, nBlocks in zip(self.ranks, self.gridSizes, self.nBlocks):
+            n0 = nPoints // nBlocks
+            nRest = nPoints - nBlocks*n0
+            nLoc = n0 + 1*(rank < nRest)
+            iLoc = rank*n0 + nRest*(rank >= nRest) + rank*(rank < nRest)
+
+            iLocList.append(iLoc)
+            nLocList.append(nLoc)
+        return iLocList, nLocList
+
+
+if __name__ == "__main__":
+    from mpi4py import MPI
+    from time import sleep
+
+    comm:MPI.Intracomm = MPI.COMM_WORLD
+    MPI_SIZE = comm.Get_size()
+    MPI_RANK = comm.Get_rank()
+
+    blocks = BlockDecomposition(MPI_SIZE, [256, 64], gRank=MPI_RANK)
+    if MPI_RANK == 0:
+        print(f"nBlocks : {blocks.nBlocks}")
+
+    ranks = blocks.ranks
+    bounds = blocks.localBounds
+
+    comm.Barrier()
+    sleep(0.01*MPI_RANK)
+    print(f"[Rank {MPI_RANK}] pRankX={ranks}, bounds={bounds}")
diff --git a/pySDC/playgrounds/dedalus/fieldsIO/tests.py b/pySDC/playgrounds/dedalus/fieldsIO/tests.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Testing the FieldsIO`
	`2`	`+`
	`3`	`+- generate seq, read MPI`