Fix Fredholm bug, Pass all 306 tests in NumPy+MPI and CuPy+NCCL

tharittk · tharittk · commit 0c7136fbd17c · 2025-06-26T08:28:14.000-05:00
diff --git a/pylops_mpi/signalprocessing/Fredholm1.py b/pylops_mpi/signalprocessing/Fredholm1.py
@@ -130,12 +130,10 @@ def _matvec(self, x: DistributedArray) -> DistributedArray:
         # gather results
         # TODO: _allgather is supposed to be private to DistributedArray
         # but so far, we do not take base_comm_nccl as an argument to Op.
-        # For consistency, y._allgather has to be call here.
-        # we can do if else for x.base_comm_nccl, but that means
+        # For consistency, y._allgather has to be called here.
+        # Alternatively, we can also do if-else checking x.base_comm_nccl, but that means
         # we have to call function from _nccl.py
-        # y[:] = np.vstack(y._allgather(y1)).ravel()
-        recv = y._allgather(y1)
-        y[:] = recv.ravel()
+        y[:] = ncp.vstack(y._allgather(y1)).ravel()
         return y
 
     def _rmatvec(self, x: NDArray) -> NDArray:
@@ -172,11 +170,15 @@ def _rmatvec(self, x: NDArray) -> NDArray:
                     y1[isl] = ncp.dot(x[isl].T.conj(), self.G[isl]).T.conj()
 
         # gather results
-        recv = y._allgather(y1) 
-        if self.usematmul:
-            # unrolling like DistributedArray asarray()
+        recv = y._allgather(y1)
+        # TODO: current of _allgather will call non-buffered MPI-AllGather (sub-optimal for CuPy+MPI)
+        # which returns a list (not flatten) and does not require unrolling
+        if self.usematmul and isinstance(recv, ncp.ndarray) :
+            # unrolling
             chunk_size = self.ny * self.nz
-            recv = ncp.vstack([recv[i*chunk_size: (i+1)*chunk_size].reshape(self.nz, self.ny).T for i in range((len(recv)+chunk_size-1)//chunk_size)])
-
+            num_partition = (len(recv)+chunk_size-1)//chunk_size
+            recv = ncp.vstack([recv[i*chunk_size: (i+1)*chunk_size].reshape(self.nz, self.ny).T for i in range(num_partition)])
+        else:
+            recv = ncp.vstack(recv)
         y[:] = recv.ravel()
         return y