Merge pull request #155 from mrava87/test-matrixmultchain

mrava87 · web-flow · commit 19e873ac6ebf · 2025-07-04T21:31:56.000+01:00
test/doc: added tests and example of chaining MPIMatrixMult
diff --git a/examples/plot_matrixmult.py b/examples/plot_matrixmult.py
@@ -24,8 +24,10 @@
 import numpy as np
 from mpi4py import MPI
 
-from pylops_mpi import DistributedArray, Partition
-from pylops_mpi.basicoperators.MatrixMult import MPIMatrixMult
+import pylops
+
+import pylops_mpi
+from pylops_mpi import Partition
 
 plt.close("all")
 
@@ -86,7 +88,8 @@
 # than the row or columm ranks.
 
 base_comm = MPI.COMM_WORLD
-comm, rank, row_id, col_id, is_active = MPIMatrixMult.active_grid_comm(base_comm, N, M)
+comm, rank, row_id, col_id, is_active = \
+    pylops_mpi.MPIMatrixMult.active_grid_comm(base_comm, N, M)
 print(f"Process {base_comm.Get_rank()} is {'active' if is_active else 'inactive'}")
 if not is_active: exit(0)
 
@@ -144,23 +147,24 @@
 ################################################################################
 # We are now ready to create the :py:class:`pylops_mpi.basicoperators.MPIMatrixMult`
 # operator and the input matrix :math:`\mathbf{X}`
-Aop = MPIMatrixMult(A_p, M, base_comm=comm, dtype="float32")
+Aop = pylops_mpi.MPIMatrixMult(A_p, M, base_comm=comm, dtype="float32")
 
 col_lens = comm.allgather(my_own_cols)
 total_cols = np.sum(col_lens)
-x = DistributedArray(global_shape=K * total_cols,
-                     local_shapes=[K * col_len for col_len in col_lens],
-                     partition=Partition.SCATTER,
-                     mask=[i % p_prime for i in range(comm.Get_size())],
-                     base_comm=comm,
-                     dtype="float32")
+x = pylops_mpi.DistributedArray(
+    global_shape=K * total_cols,
+    local_shapes=[K * col_len for col_len in col_lens],
+    partition=Partition.SCATTER,
+    mask=[i % p_prime for i in range(comm.Get_size())],
+    base_comm=comm,
+    dtype="float32")
 x[:] = X_p.flatten()
 
 ################################################################################
-# We can now apply the forward pass :math:`\mathbf{y} = \mathbf{Ax}` (which effectively
-# implements a distributed matrix-matrix multiplication :math:`Y = \mathbf{AX}`)
-# Note :math:`\mathbf{Y}` is distributed in the same way as the input
-# :math:`\mathbf{X}`.
+# We can now apply the forward pass :math:`\mathbf{y} = \mathbf{Ax}` (which
+# effectively implements a distributed matrix-matrix multiplication
+# :math:`Y = \mathbf{AX}`). Note :math:`\mathbf{Y}` is distributed in the same
+# way as the input :math:`\mathbf{X}`.
 y = Aop @ x
 
 ###############################################################################
@@ -172,52 +176,15 @@
 xadj = Aop.H @ y
 
 ###############################################################################
-# To conclude we verify our result against the equivalent serial version of
-# the operation by gathering the resulting matrices in rank0 and reorganizing
-# the returned 1D-arrays into 2D-arrays.
-
-# Local benchmarks
-y = y.asarray(masked=True)
-col_counts = [min(blk_cols, M - j * blk_cols) for j in range(p_prime)]
-y_blocks = []
-offset = 0
-for cnt in col_counts:
-    block_size = N * cnt
-    y_block = y[offset: offset + block_size]
-    if len(y_block) != 0:
-        y_blocks.append(
-            y_block.reshape(N, cnt)
-        )
-    offset += block_size
-y = np.hstack(y_blocks)
-
-xadj = xadj.asarray(masked=True)
-xadj_blocks = []
-offset = 0
-for cnt in col_counts:
-    block_size = K * cnt
-    xadj_blk = xadj[offset: offset + block_size]
-    if len(xadj_blk) != 0:
-        xadj_blocks.append(
-            xadj_blk.reshape(K, cnt)
-        )
-    offset += block_size
-xadj = np.hstack(xadj_blocks)
-
-if rank == 0:
-    y_loc = (A @ X).squeeze()
-    xadj_loc = (A.T.dot(y_loc.conj())).conj().squeeze()
-
-    if not np.allclose(y, y_loc, rtol=1e-6):
-        print("FORWARD VERIFICATION FAILED")
-        print(f'distributed: {y}')
-        print(f'expected: {y_loc}')
-    else:
-        print("FORWARD VERIFICATION PASSED")
-
-    if not np.allclose(xadj, xadj_loc, rtol=1e-6):
-        print("ADJOINT VERIFICATION FAILED")
-        print(f'distributed: {xadj}')
-        print(f'expected: {xadj_loc}')
-    else:
-        print("ADJOINT VERIFICATION PASSED")
+# Finally, we show the :py:class:`pylops_mpi.basicoperators.MPIMatrixMult`
+# operator can be combined with any other PyLops-MPI operator. We are going to
+# apply here a first derivative along the first axis to the output of the matrix
+# multiplication. The only gotcha here is that one needs to be aware of the
+# ad-hoc distribution of the arrays that are fed to this operator and make
+# sure it is matched in the other operators involved in the chain.
+Dop = pylops.FirstDerivative(dims=(N, my_own_cols), axis=0, 
+                             dtype=np.float32)
+DBop = pylops_mpi.MPIBlockDiag(ops=[Dop, ])
+Op = DBop @ Aop
+
+y1 = Op @ x
diff --git a/tests/test_matrixmult.py b/tests/test_matrixmult.py
@@ -8,8 +8,9 @@
 from mpi4py import MPI
 import pytest
 
+from pylops.basicoperators import FirstDerivative, Identity
 from pylops_mpi import DistributedArray, Partition
-from pylops_mpi.basicoperators.MatrixMult import MPIMatrixMult
+from pylops_mpi.basicoperators import MPIMatrixMult, MPIBlockDiag
 
 np.random.seed(42)
 base_comm = MPI.COMM_WORLD
@@ -19,28 +20,48 @@
 # M, K, N are matrix dimensions A(N,K), B(K,M)
 # P_prime will be ceil(sqrt(size)).
 test_params = [
-    pytest.param(37, 37, 37, "float32", id="f32_37_37_37"),
+    pytest.param(37, 37, 37, "float64", id="f32_37_37_37"),
     pytest.param(50, 30, 40, "float64", id="f64_50_30_40"),
     pytest.param(22, 20, 16, "complex64", id="c64_22_20_16"),
     pytest.param(3, 4, 5, "float32", id="f32_3_4_5"),
     pytest.param(1, 2, 1, "float64", id="f64_1_2_1",),
     pytest.param(2, 1, 3, "float32", id="f32_2_1_3",),
 ]
 
+def _reorganize_local_matrix(x_dist, N, M, blk_cols, p_prime):
+    """Re-organize distributed array in local matrix
+    """
+    x = x_dist.asarray(masked=True)
+    col_counts = [min(blk_cols, M - j * blk_cols) for j in range(p_prime)]
+    x_blocks = []
+    offset = 0
+    for cnt in col_counts:
+        block_size = N * cnt
+        x_block = x[offset: offset + block_size]
+        if len(x_block) != 0:
+            x_blocks.append(
+                x_block.reshape(N, cnt)
+            )
+        offset += block_size
+    x = np.hstack(x_blocks)
+    return x
+
 
 @pytest.mark.mpi(min_size=1)
-@pytest.mark.parametrize("M, K, N, dtype_str", test_params)
+@pytest.mark.parametrize("N, K, M, dtype_str", test_params)
 def test_MPIMatrixMult(N, K, M, dtype_str):
     dtype = np.dtype(dtype_str)
 
     cmplx = 1j if np.issubdtype(dtype, np.complexfloating) else 0
     base_float_dtype = np.float32 if dtype == np.complex64 else np.float64
 
-    comm, rank, row_id, col_id, is_active = MPIMatrixMult.active_grid_comm(base_comm, N, M)
+    comm, rank, row_id, col_id, is_active = \
+        MPIMatrixMult.active_grid_comm(base_comm, N, M)
     if not is_active: return
 
     size = comm.Get_size()
     p_prime = math.isqrt(size)
+    cols_id = comm.allgather(col_id)
 
     # Calculate local matrix dimensions
     blk_rows_A = int(math.ceil(N / p_prime))
@@ -52,6 +73,7 @@ def test_MPIMatrixMult(N, K, M, dtype_str):
     col_end_X = min(M, col_start_X + blk_cols_X)
     local_col_X_len = max(0, col_end_X - col_start_X)
 
+    # Fill local matrices
     A_glob_real = np.arange(N * K, dtype=base_float_dtype).reshape(N, K)
     A_glob_imag = np.arange(N * K, dtype=base_float_dtype).reshape(N, K) * 0.5
     A_glob = (A_glob_real + cmplx * A_glob_imag).astype(dtype)
@@ -88,32 +110,8 @@ def test_MPIMatrixMult(N, K, M, dtype_str):
     xadj_dist = Aop.H @ y_dist
 
     # Re-organize in local matrix
-    y = y_dist.asarray(masked=True)
-    col_counts = [min(blk_cols_X, M - j * blk_cols_X) for j in range(p_prime)]
-    y_blocks = []
-    offset = 0
-    for cnt in col_counts:
-        block_size = N * cnt
-        y_block = y[offset: offset + block_size]
-        if len(y_block) != 0:
-            y_blocks.append(
-                y_block.reshape(N, cnt)
-            )
-        offset += block_size
-    y = np.hstack(y_blocks)
-
-    xadj = xadj_dist.asarray(masked=True)
-    xadj_blocks = []
-    offset = 0
-    for cnt in col_counts:
-        block_size = K * cnt
-        xadj_blk = xadj[offset: offset + block_size]
-        if len(xadj_blk) != 0:
-            xadj_blocks.append(
-                xadj_blk.reshape(K, cnt)
-            )
-        offset += block_size
-    xadj = np.hstack(xadj_blocks)
+    y = _reorganize_local_matrix(y_dist, N, M, blk_cols_X, p_prime)
+    xadj = _reorganize_local_matrix(xadj_dist, K, M, blk_cols_X, p_prime)
 
     if rank == 0:
         y_loc = A_glob @ X_glob
@@ -129,5 +127,36 @@ def test_MPIMatrixMult(N, K, M, dtype_str):
             xadj.squeeze(),
             xadj_loc.squeeze(),
             rtol=np.finfo(np.dtype(dtype)).resolution,
-            err_msg=f"Rank {rank}: Ajoint verification failed."
-        )
+            err_msg=f"Rank {rank}: Adjoint verification failed."
+        )
+
+    # Chain with another operator
+    Dop = FirstDerivative(dims=(N, col_end_X - col_start_X),
+                          axis=0, dtype=dtype)
+    DBop = MPIBlockDiag(ops=[Dop, ], base_comm=comm, mask=cols_id)
+    Op = DBop @ Aop
+
+    y1_dist = Op @ x_dist
+    xadj1_dist = Op.H @ y1_dist
+
+    # Re-organize in local matrix
+    y1 = _reorganize_local_matrix(y1_dist, N, M, blk_cols_X, p_prime)
+    xadj1 = _reorganize_local_matrix(xadj1_dist, K, M, blk_cols_X, p_prime)
+
+    if rank == 0:
+        Dop_glob = FirstDerivative(dims=(N, M), axis=0, dtype=dtype)
+        y1_loc = (Dop_glob @ (A_glob @ X_glob).ravel()).reshape(N, M)
+        assert_allclose(
+            y1.squeeze(),
+            y1_loc.squeeze(),
+            rtol=np.finfo(np.dtype(dtype)).resolution,
+            err_msg=f"Rank {rank}: Forward verification failed."
+        )
+
+        xadj1_loc = A_glob.conj().T @ (Dop_glob.H @ y1_loc.ravel()).reshape(N, M)
+        assert_allclose(
+            xadj1.squeeze(),
+            xadj1_loc.squeeze(),
+            rtol=np.finfo(np.dtype(dtype)).resolution,
+            err_msg=f"Rank {rank}: Adjoint verification failed."
+        )