Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pylops_mpi/DistributedArray.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,7 @@ def _compute_vector_norm(self, local_array: NDArray,
# TODO (tharitt): currently CuPy + MPI does not work well with buffered communication, particularly
# with MAX, MIN operator. Here we copy the array back to CPU, transfer, and copy them back to GPUs
send_buf = ncp.max(ncp.abs(local_array), axis=axis).astype(ncp.float64)
if self.engine=="cupy" and self.base_comm_nccl is None:
if self.engine == "cupy" and self.base_comm_nccl is None:
recv_buf = self._allreduce_subcomm(send_buf.get(), recv_buf.get(), op=MPI.MAX)
recv_buf = ncp.asarray(ncp.squeeze(recv_buf, axis=axis))
else:
Expand Down
8 changes: 8 additions & 0 deletions tests/test_blockdiag.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@
par2j = {'ny': 301, 'nx': 101, 'dtype': np.complex128}

np.random.seed(42)
rank = MPI.COMM_WORLD.Get_rank()
if backend == "cupy":
device_count = np.cuda.runtime.getDeviceCount()
device_id = int(
os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")
or rank % np.cuda.runtime.getDeviceCount()
)
np.cuda.Device(device_id).use()


@pytest.mark.mpi(min_size=2)
Expand Down
8 changes: 8 additions & 0 deletions tests/test_derivative.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@
np.random.seed(42)
rank = MPI.COMM_WORLD.Get_rank()
size = MPI.COMM_WORLD.Get_size()
if backend == "cupy":
device_count = np.cuda.runtime.getDeviceCount()
device_id = int(
os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")
or rank % np.cuda.runtime.getDeviceCount()
)
np.cuda.Device(device_id).use()


par1 = {
"nz": 600,
Expand Down
10 changes: 9 additions & 1 deletion tests/test_distributedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@
from pylops_mpi.DistributedArray import local_split

np.random.seed(42)
rank = MPI.COMM_WORLD.Get_rank()
if backend == "cupy":
device_count = np.cuda.runtime.getDeviceCount()
device_id = int(
os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")
or rank % np.cuda.runtime.getDeviceCount()
)
np.cuda.Device(device_id).use()

par1 = {'global_shape': (500, 501),
'partition': Partition.SCATTER, 'dtype': np.float64,
Expand Down Expand Up @@ -206,7 +214,7 @@ def test_distributed_norm(par):

# TODO (tharitt): FAIL with CuPy + MPI for inf norm
assert_allclose(arr.norm(ord=np.inf, axis=par['axis']),
np.linalg.norm(par['x'], ord=np.inf, axis=par['axis']), rtol=1e-14)
np.linalg.norm(par['x'], ord=np.inf, axis=par['axis']), rtol=1e-14)
assert_allclose(arr.norm(), np.linalg.norm(par['x'].flatten()), rtol=1e-13)


Expand Down
7 changes: 7 additions & 0 deletions tests/test_fredholm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@
np.random.seed(42)
rank = MPI.COMM_WORLD.Get_rank()
size = MPI.COMM_WORLD.Get_size()
if backend == "cupy":
device_count = np.cuda.runtime.getDeviceCount()
device_id = int(
os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")
or rank % np.cuda.runtime.getDeviceCount()
)
np.cuda.Device(device_id).use()

par1 = {
"nsl": 21,
Expand Down
11 changes: 9 additions & 2 deletions tests/test_linearop.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@
np.random.seed(42)
rank = MPI.COMM_WORLD.Get_rank()
size = MPI.COMM_WORLD.Get_size()
if backend == "cupy":
device_count = np.cuda.runtime.getDeviceCount()
device_id = int(
os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")
or rank % np.cuda.runtime.getDeviceCount()
)
np.cuda.Device(device_id).use()

par1 = {'ny': 101, 'nx': 101, 'dtype': np.float64}
par1j = {'ny': 101, 'nx': 101, 'dtype': np.complex128}
Expand Down Expand Up @@ -142,7 +149,7 @@ def test_power(par):
Op = pylops.MatrixMult(A=((rank + 1) * np.ones(shape=(par['ny'], par['nx']))).astype(par['dtype']),
dtype=par['dtype'])
BDiag_MPI = MPIBlockDiag(ops=[Op, ])

# Power Operator
Pop_MPI = BDiag_MPI ** 3

Expand All @@ -166,7 +173,7 @@ def test_power(par):
ops = [pylops.MatrixMult((i + 1) * np.ones(shape=(par['ny'], par['nx'])).astype(par['dtype'])) for i in
range(size)]
BDiag = pylops.BlockDiag(ops=ops)
Pop = BDiag * BDiag * BDiag ## temporarely replaced BDiag ** 3 until bug in PyLops is fixed
Pop = BDiag * BDiag * BDiag # temporarely replaced BDiag ** 3 until bug in PyLops is fixed
assert_allclose(Pop_x_np, Pop @ x_global, rtol=1e-9)
assert_allclose(Pop_y_np, Pop.H @ y_global, rtol=1e-9)

Expand Down
18 changes: 13 additions & 5 deletions tests/test_matrixmult.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,21 @@
from mpi4py import MPI
import pytest

from pylops.basicoperators import FirstDerivative, Identity
from pylops.basicoperators import FirstDerivative
from pylops_mpi import DistributedArray, Partition
from pylops_mpi.basicoperators import MPIMatrixMult, MPIBlockDiag

np.random.seed(42)
base_comm = MPI.COMM_WORLD
size = base_comm.Get_size()

rank = MPI.COMM_WORLD.Get_rank()
if backend == "cupy":
device_count = np.cuda.runtime.getDeviceCount()
device_id = int(
os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")
or rank % np.cuda.runtime.getDeviceCount()
)
np.cuda.Device(device_id).use()
# Define test cases: (N, K, M, dtype_str)
# M, K, N are matrix dimensions A(N,K), B(K,M)
# P_prime will be ceil(sqrt(size)).
Expand All @@ -39,6 +46,7 @@
pytest.param(2, 1, 3, "float32", id="f32_2_1_3",),
]


def _reorganize_local_matrix(x_dist, N, M, blk_cols, p_prime):
"""Re-organize distributed array in local matrix
"""
Expand Down Expand Up @@ -66,9 +74,9 @@ def test_MPIMatrixMult(N, K, M, dtype_str):
cmplx = 1j if np.issubdtype(dtype, np.complexfloating) else 0
base_float_dtype = np.float32 if dtype == np.complex64 else np.float64

comm, rank, row_id, col_id, is_active = \
MPIMatrixMult.active_grid_comm(base_comm, N, M)
if not is_active: return
comm, rank, row_id, col_id, is_active = MPIMatrixMult.active_grid_comm(base_comm, N, M)
if not is_active:
return

size = comm.Get_size()
p_prime = math.isqrt(size)
Expand Down
7 changes: 7 additions & 0 deletions tests/test_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@

size = MPI.COMM_WORLD.Get_size()
rank = MPI.COMM_WORLD.Get_rank()
if backend == "cupy":
device_count = np.cuda.runtime.getDeviceCount()
device_id = int(
os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")
or rank % np.cuda.runtime.getDeviceCount()
)
np.cuda.Device(device_id).use()

par1 = {
"ny": 11,
Expand Down
9 changes: 9 additions & 0 deletions tests/test_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@
import pylops_mpi
from pylops_mpi.utils.dottest import dottest

rank = MPI.COMM_WORLD.Get_rank()
if backend == "cupy":
device_count = np.cuda.runtime.getDeviceCount()
device_id = int(
os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")
or rank % np.cuda.runtime.getDeviceCount()
)
np.cuda.Device(device_id).use()

par1 = {'ny': 101, 'nx': 101, 'imag': 0, 'dtype': np.float64}
par1j = {'ny': 101, 'nx': 101, 'imag': 1j, 'dtype': np.complex128}
par2 = {'ny': 301, 'nx': 101, 'imag': 0, 'dtype': np.float64}
Expand Down
10 changes: 10 additions & 0 deletions tests/test_stackedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,19 @@
import numpy as npp
import pytest

from mpi4py import MPI
from pylops_mpi import DistributedArray, Partition, StackedDistributedArray

np.random.seed(42)
rank = MPI.COMM_WORLD.Get_rank()
if backend == "cupy":
device_count = np.cuda.runtime.getDeviceCount()
device_id = int(
os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")
or rank % np.cuda.runtime.getDeviceCount()
)
np.cuda.Device(device_id).use()


par1 = {'global_shape': (500, 501),
'partition': Partition.SCATTER, 'dtype': np.float64,
Expand Down
8 changes: 8 additions & 0 deletions tests/test_stackedlinearop.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@
np.random.seed(42)
rank = MPI.COMM_WORLD.Get_rank()
size = MPI.COMM_WORLD.Get_size()
if backend == "cupy":
device_count = np.cuda.runtime.getDeviceCount()
device_id = int(
os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")
or rank % np.cuda.runtime.getDeviceCount()
)
np.cuda.Device(device_id).use()


par1 = {'ny': 101, 'nx': 101, 'dtype': np.float64}
par1j = {'ny': 101, 'nx': 101, 'dtype': np.complex128}
Expand Down
Loading