Renamed layer and group as to row and col respectively

astroC86 · astroC86 · commit db7cc07ce582 · 2025-06-27T11:16:04.000+02:00
diff --git a/examples/plot_matrixmult.py b/examples/plot_matrixmult.py
@@ -71,12 +71,12 @@
 #    R = \bigl\lceil \tfrac{P}{P'} \bigr\rceil.
 #
 # Each process is therefore assigned a pair of coordinates 
-# :math:`(g, l)` within this grid:
+# :math:`(r,c)` within this grid:
 #
 # .. math::
-#    g = \mathrm{rank} \bmod P',
+#    r = \left\lfloor \frac{\mathrm{rank}}{P'} \right\rfloor,
 #    \quad
-#    l = \left\lfloor \frac{\mathrm{rank}}{P'} \right\rfloor.
+#    c = \mathrm{rank} \bmod P'.
 #
 #For example, when :math:`P = 4` we have :math:`P' = 2`, giving a 2×2 layout:
 #
@@ -85,19 +85,19 @@
 #   <div style="text-align: center; font-family: monospace; white-space: pre;">
 #  ┌────────────┬────────────┐
 #  │ Rank 0     │ Rank 1     │
-#  │ (g=0, l=0) │ (g=1, l=0) │
+#  │ (r=0, c=0) │ (r=0, c=1) │
 #  ├────────────┼────────────┤
 #  │ Rank 2     │ Rank 3     │
-#  │ (g=0, l=1) │ (g=1, l=1) │
+#  │ (r=1, c=0) │ (r=1, c=1) │
 #  └────────────┴────────────┘
 #   </div>
 
-my_group = rank % p_prime
-my_layer = rank // p_prime
+my_col = rank % p_prime
+my_row = rank // p_prime
 
 # Create sub‐communicators
-layer_comm = comm.Split(color=my_layer, key=my_group)  # all procs in same layer
-group_comm = comm.Split(color=my_group, key=my_layer)  # all procs in same group
+row_comm = comm.Split(color=my_row, key=my_col)  # all procs in same row
+col_comm = comm.Split(color=my_col, key=my_row)  # all procs in same col
 
 ################################################################################
 # At this point we divide the rows and columns of :math:`\mathbf{A}` and  
@@ -111,10 +111,10 @@
 #   <div style="text-align: left; font-family: monospace; white-space: pre;">
 #   <b>Matrix A (4 x 4):</b>
 #   ┌─────────────────┐
-#   │ a11 a12 a13 a14 │ <- Rows 0–1 (Group 0)
+#   │ a11 a12 a13 a14 │ <- Rows 0–1 (Process Grid Col 0)
 #   │ a21 a22 a23 a24 │
 #   ├─────────────────┤
-#   │ a41 a42 a43 a44 │ <- Rows 2–3 (Group 1)
+#   │ a41 a42 a43 a44 │ <- Rows 2–3 (Process Grid Col 1)
 #   │ a51 a52 a53 a54 │
 #   └─────────────────┘
 #   </div>
@@ -124,7 +124,7 @@
 #   <div style="text-align: left; font-family: monospace; white-space: pre;">
 #   <b>Matrix X (4 x 4):</b>
 #   ┌─────────┬─────────┐
-#   │ b11 b12 │ b13 b14 │ <- Cols 0–1 (Layer 0), Cols 2–3 (Layer 1)
+#   │ b11 b12 │ b13 b14 │ <- Cols 0–1 (Process Grid Row 0), Cols 2–3 (Process Grid Row 1)
 #   │ b21 b22 │ b23 b24 │
 #   │ b31 b32 │ b33 b34 │
 #   │ b41 b42 │ b43 b44 │
@@ -135,11 +135,11 @@
 blk_rows = int(math.ceil(N / p_prime))
 blk_cols = int(math.ceil(M / p_prime))
 
-rs = my_group * blk_rows
+rs = my_col * blk_rows
 re = min(N, rs + blk_rows)
 my_own_rows = re - rs
 
-cs = my_layer * blk_cols
+cs = my_row * blk_cols
 ce = min(M, cs + blk_cols)
 my_own_cols = ce - cs
 
diff --git a/pylops_mpi/basicoperators/MatrixMult.py b/pylops_mpi/basicoperators/MatrixMult.py
@@ -82,16 +82,16 @@ class MPIMatrixMult(MPILinearOperator):
        of shape ``(K, M)``) is reshaped to ``(K, M_local)`` where ``M_local``
        is the number of columns assigned to the current process.
 
-    2. **Data Broadcasting**: Within each layer (processes with same ``layer_id``),
-       the operand data is broadcast from the process whose ``group_id`` matches
-       the ``layer_id``. This ensures all processes in a layer have access to
-       the same operand columns.
+    2. **Data Broadcasting**: Within each row (processes with same ``row_id``),
+       the operand data is broadcast from the process whose ``col_id`` matches
+       the ``row_id`` (processes along the diagonal). This ensures all processes
+       in a row have access to the same operand columns.
 
     3. **Local Computation**: Each process computes ``A_local @ X_local`` where:
        - ``A_local`` is the local block of matrix ``A`` (shape ``N_local x K``)
        - ``X_local`` is the broadcasted operand (shape ``K x M_local``)
 
-    4. **Layer Gather**: Results from all processes in each layer are gathered
+    4. **Row-wise Gather**: Results from all processes in each row are gathered
        using ``allgather`` to reconstruct the full result matrix vertically.
 
     **Adjoint Operation step-by-step**
@@ -112,9 +112,9 @@ class MPIMatrixMult(MPILinearOperator):
         producing a partial result of  shape ``(K, M_local)``.
         This computes the local contribution of columns of  ``A^H`` to the final result.
 
-    3. **Layer Reduction**: Since the full result ``Y = A^H \cdot X`` is the
+    3. **Row-wise Reduction**: Since the full result ``Y = A^H \cdot X`` is the
        sum of contributions from all column blocks of ``A^H``, processes in the 
-       same layer perform an ``allreduce`` sum to combine their partial results. 
+       same rows perform an ``allreduce`` sum to combine their partial results.
        This gives the complete ``(K, M_local)`` result for their assigned columns.
     
     """
@@ -135,29 +135,28 @@ def __init__(
         if self._P_prime * self._C != size:
             raise Exception(f"Number of processes must be a square number, provided {size} instead...")
 
-        # Compute this process's group and layer indices
-        self._group_id = rank % self._P_prime
-        self._layer_id = rank // self._P_prime
+        self._col_id = rank % self._P_prime
+        self._row_id = rank // self._P_prime
 
         # Split communicators by layer (rows) and by group (columns)
         self.base_comm = base_comm
-        self._layer_comm = base_comm.Split(color=self._layer_id, key=self._group_id)
-        self._group_comm = base_comm.Split(color=self._group_id, key=self._layer_id)
+        self._row_comm = base_comm.Split(color=self._row_id, key=self._col_id)
+        self._col_comm = base_comm.Split(color=self._col_id, key=self._row_id)
 
         self.A = A.astype(np.dtype(dtype))
         if saveAt: self.At = A.T.conj()
 
-        self.N = self._layer_comm.allreduce(self.A.shape[0], op=MPI.SUM)
+        self.N = self._row_comm.allreduce(self.A.shape[0], op=MPI.SUM)
         self.K = A.shape[1]
         self.M = M
 
         block_cols = int(math.ceil(self.M / self._P_prime))
         blk_rows = int(math.ceil(self.N / self._P_prime))
 
-        self._row_start = self._group_id * blk_rows
+        self._row_start = self._col_id * blk_rows
         self._row_end = min(self.N, self._row_start + blk_rows)
 
-        self._col_start = self._layer_id * block_cols
+        self._col_start = self._row_id * block_cols
         self._col_end = min(self.M, self._col_start + block_cols)
 
         self._local_ncols = self._col_end - self._col_start
@@ -184,7 +183,7 @@ def _matvec(self, x: DistributedArray) -> DistributedArray:
         x_arr = x.local_array.reshape((self.dims[0], my_own_cols))
         X_local = x_arr.astype(self.dtype)
         Y_local = ncp.vstack(
-            self._layer_comm.allgather(
+            self._row_comm.allgather(
                 ncp.matmul(self.A, X_local)
             )
         )
@@ -208,6 +207,6 @@ def _rmatvec(self, x: DistributedArray) -> DistributedArray:
         X_tile = x_arr[self._row_start:self._row_end, :]
         A_local = self.At if hasattr(self, "At") else self.A.T.conj()
         Y_local = ncp.matmul(A_local, X_tile)
-        y_layer = self._layer_comm.allreduce(Y_local, op=MPI.SUM)
+        y_layer = self._row_comm.allreduce(Y_local, op=MPI.SUM)
         y[:] = y_layer.flatten()
         return y
diff --git a/tests/test_matrixmult.py b/tests/test_matrixmult.py
@@ -40,20 +40,20 @@ def test_SUMMAMatrixMult(N, K, M, dtype_str):
     cmplx = 1j if np.issubdtype(dtype, np.complexfloating) else 0
     base_float_dtype = np.float32 if dtype == np.complex64 else np.float64
 
-    my_group = rank % p_prime
-    my_layer = rank // p_prime
+    my_col = rank % p_prime
+    my_row = rank // p_prime
 
     # Create sub-communicators
-    layer_comm = comm.Split(color=my_layer, key=my_group)
-    group_comm = comm.Split(color=my_group, key=my_layer)
+    row_comm = comm.Split(color=my_row, key=my_col)
+    col_comm = comm.Split(color=my_col, key=my_row)
 
     # Calculate local matrix dimensions
     blk_rows_A = int(math.ceil(N / p_prime))
-    row_start_A = my_group * blk_rows_A
+    row_start_A = my_col * blk_rows_A
     row_end_A = min(N, row_start_A + blk_rows_A)
 
     blk_cols_X = int(math.ceil(M / p_prime))
-    col_start_X = my_layer * blk_cols_X
+    col_start_X = my_row * blk_cols_X
     col_end_X = min(M, col_start_X + blk_cols_X)
     local_col_X_len = max(0, col_end_X - col_start_X)
 
@@ -131,5 +131,5 @@ def test_SUMMAMatrixMult(N, K, M, dtype_str):
             err_msg=f"Rank {rank}: Ajoint verification failed."
         )
 
-    group_comm.Free()
-    layer_comm.Free()
+    col_comm.Free()
+    row_comm.Free()