Skip to content

Commit 4fe3fc1

Browse files
committed
doc fixes from PR comment
1 parent 5a41da5 commit 4fe3fc1

File tree

3 files changed

+27
-19
lines changed

3 files changed

+27
-19
lines changed

docs/source/gpu.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ In the following, we provide a list of modules (i.e., operators and solvers) whe
152152
- ✅
153153
* - :class:`pylops_mpi.optimization.basic.cgls`
154154
- ✅
155+
* - :class:`pylops_mpi.signalprocessing.Fredhoml1`
156+
- Planned ⏳
155157
* - ISTA Solver
156158
- Planned ⏳
157159
* - Complex Numeric Data Type for NCCL

tests_nccl/test_stackedarray_nccl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Test the StackedDistributedArray class
22
Designed to run with n GPUs (with 1 MPI process per GPU)
3-
$ mpiexec -n 10 pytest test_stackedarray.py --with-mpi
3+
$ mpiexec -n 10 pytest test_stackedarray_nccl.py --with-mpi
44
55
This file employs the same test sets as test_stackedarray under NCCL environment
66
"""

tutorials/poststack_nccl.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
r"""
2-
Post Stack Inversion - 3D with Nvidia's NCCL
3-
=========================
4-
This tutorial is similar to poststack.py but it shows how to run PyLops-MPI in multi-GPU environment
5-
and have those GPUs communicate via NCCL
2+
Post Stack Inversion - 3D with NCCL
3+
============================================
4+
This tutorial is an extension of the :ref:`sphx_glr_tutorials_poststack.py` tutorial where PyLops-MPI is run in multi-GPU setting with GPUs communicating via NCCL.
65
"""
76

87
import numpy as np
@@ -21,11 +20,11 @@
2120

2221
plt.close("all")
2322
rank = MPI.COMM_WORLD.Get_rank()
24-
# size = MPI.COMM_WORLD.Get_size()
2523

2624
###############################################################################
27-
# This section is exactly the same as MPI version. We will keep using MPI for
28-
# transfering meta data i.e., shapes, dims, etc.
25+
# Let's start by defining all the parameters required by the
26+
# :py:func:`pylops.avo.poststack.PoststackLinearModelling` operator.
27+
# Note that this section is exactly the same as the one in the MPI example as we will keep using MPI for transfering metadata (i.e., shapes, dims, etc.)
2928

3029
# Model
3130
model = np.load("../testdata/avo/poststack_model.npz")
@@ -59,43 +58,50 @@
5958
###############################################################################
6059
# NCCL communication can be easily initialized with
6160
# :py:func:`pylops_mpi.utils._nccl.initialize_nccl_comm` operator.
62-
# One can think of this as GPU-counterpart of MPI.COMM_WORLD
61+
# One can think of this as GPU-counterpart of :code:`MPI.COMM_WORLD`
62+
6363
nccl_comm = pylops_mpi.utils._nccl.initialize_nccl_comm()
6464

65+
###############################################################################
66+
# We are now ready to initialize various :py:class:`pylops_mpi.DistributedArray` objects.
67+
# Compared to the MPI tutorial, we need to make sure that we pass :code:`base_comm_nccl = nccl_comm` and set CuPy as the engine.
6568

66-
# Initialize DistributedArray with `base_comm_nccl = nccl_comm` and CuPy engine
67-
# This DistributedArray internally have both MPI.COMM_WORLD (by default) and `cupy.cuda.nccl.NcclCommunicator` to operate on
6869
m3d_dist = pylops_mpi.DistributedArray(global_shape=ny * nx * nz, base_comm_nccl=nccl_comm, engine="cupy")
6970
m3d_dist[:] = cp.asarray(m3d_i.flatten())
7071

7172
# Do the same thing for smooth model
7273
mback3d_dist = pylops_mpi.DistributedArray(global_shape=ny * nx * nz, base_comm_nccl=nccl_comm, engine="cupy")
7374
mback3d_dist[:] = cp.asarray(mback3d_i.flatten())
7475

76+
###############################################################################
7577
# For PostStackLinearModelling, there is no change needed to have it run with NCCL.
7678
# This PyLops operator has GPU-support (https://pylops.readthedocs.io/en/stable/gpu.html)
7779
# so it can run with DistributedArray whose engine is Cupy
80+
7881
PPop = PoststackLinearModelling(wav, nt0=nz, spatdims=(ny_i, nx))
7982
Top = Transpose((ny_i, nx, nz), (2, 0, 1))
8083
BDiag = pylops_mpi.basicoperators.MPIBlockDiag(ops=[Top.H @ PPop @ Top, ])
8184

82-
# This computation will be done in GPU. The call asarray() trigger the NCCL communication (gather result from each GPU).
83-
# But array `d` and `d_0` still live in GPU memory
85+
###############################################################################
86+
# This computation will be done in GPU. The call :code:`asarray()` triggers the NCCL communication (gather result from each GPU).
87+
# But array :code:`d` and :code:`d_0` still live in GPU memory
88+
8489
d_dist = BDiag @ m3d_dist
8590
d_local = d_dist.local_array.reshape((ny_i, nx, nz))
8691
d = d_dist.asarray().reshape((ny, nx, nz))
8792
d_0_dist = BDiag @ mback3d_dist
8893
d_0 = d_dist.asarray().reshape((ny, nx, nz))
8994

90-
# ###############################################################################
91-
95+
###############################################################################
9296
# Inversion using CGLS solver - There is no code change to have run on NCCL (it handles though MPI operator and DistributedArray)
9397
# In this particular case, the local computation will be done in GPU. Collective communication calls
9498
# will be carried through NCCL GPU-to-GPU.
99+
100+
# Inversion using CGLS solver
95101
minv3d_iter_dist = pylops_mpi.optimization.basic.cgls(BDiag, d_dist, x0=mback3d_dist, niter=1, show=True)[0]
96102
minv3d_iter = minv3d_iter_dist.asarray().reshape((ny, nx, nz))
97103

98-
# ###############################################################################
104+
###############################################################################
99105

100106
# Regularized inversion with normal equations
101107
epsR = 1e2
@@ -106,7 +112,7 @@
106112
minv3d_ne_dist = pylops_mpi.optimization.basic.cg(NormEqOp, dnorm_dist, x0=mback3d_dist, niter=10, show=True)[0]
107113
minv3d_ne = minv3d_ne_dist.asarray().reshape((ny, nx, nz))
108114

109-
# ###############################################################################
115+
###############################################################################
110116

111117
# Regularized inversion with regularized equations
112118
StackOp = pylops_mpi.MPIStackedVStack([BDiag, np.sqrt(epsR) * LapOp])
@@ -118,8 +124,8 @@
118124
minv3d_reg_dist = pylops_mpi.optimization.basic.cgls(StackOp, dstack_dist, x0=mback3d_dist, niter=10, show=True)[0]
119125
minv3d_reg = minv3d_reg_dist.asarray().reshape((ny, nx, nz))
120126

121-
# ###############################################################################
122-
# To plot the inversion result, the array must be copied back to cpu via `get()`
127+
###############################################################################
128+
# To plot the inversion results, the array must be copied back to cpu via :code:`get()`
123129

124130
if rank == 0:
125131
# Check the distributed implementation gives the same result

0 commit comments

Comments
 (0)