|
16 | 16 | plt.close("all") |
17 | 17 | np.random.seed(42) |
18 | 18 |
|
| 19 | +# MPI parameters |
| 20 | +size = MPI.COMM_WORLD.Get_size() # number of nodes |
| 21 | +rank = MPI.COMM_WORLD.Get_rank() # rank of current node |
| 22 | + |
| 23 | + |
19 | 24 | # Defining the global shape of the distributed array |
20 | 25 | global_shape = (10, 5) |
21 | 26 |
|
22 | 27 | ############################################################################### |
23 | | -# Let's start by defining the |
24 | | -# class with the input parameters ``global_shape``, |
25 | | -# ``partition``, and ``axis``. Here's an example implementation of the class with ``axis=0``. |
| 28 | +# Let's start by defining the class with the input parameters ``global_shape``, |
| 29 | +# ``partition``, and ``axis``. Here's an example implementation of the class |
| 30 | +# with ``axis=0``. |
26 | 31 | arr = pylops_mpi.DistributedArray(global_shape=global_shape, |
27 | 32 | partition=pylops_mpi.Partition.SCATTER, |
28 | 33 | axis=0) |
|
72 | 77 | pylops_mpi.plot_local_arrays(arr2, "Distributed Array - 2", vmin=0, vmax=1) |
73 | 78 |
|
74 | 79 | ############################################################################### |
| 80 | +# Let's move now to consider various operations that one can perform on |
| 81 | +# :py:class:`pylops_mpi.DistributedArray` objects. |
| 82 | +# |
75 | 83 | # **Scaling** - Each process operates on its local portion of |
76 | 84 | # the array and scales the corresponding elements by a given scalar. |
77 | 85 | scale_arr = .5 * arr1 |
|
101 | 109 | # of the array and multiplies the corresponding elements together. |
102 | 110 | mult_arr = arr1 * arr2 |
103 | 111 | pylops_mpi.plot_local_arrays(mult_arr, "Multiplication", vmin=0, vmax=1) |
| 112 | + |
| 113 | +############################################################################### |
| 114 | +# Finally, let's look at the case where parallelism could be applied over |
| 115 | +# multiple axes - and more specifically one belonging to the model/data and one |
| 116 | +# to the operator. This kind of "2D"-parallelism requires repeating parts of |
| 117 | +# the model/data over groups of ranks. However, when global operations such as |
| 118 | +# ``dot`` or ``norm`` are applied on a ``pylops_mpi.DistributedArray`` of |
| 119 | +# this kind, we need to ensure that the repeated portions to do all contribute |
| 120 | +# to the computation. This can be achieved via the ``mask`` input parameter: |
| 121 | +# a list of size equal to the number of ranks, whose elements contain the index |
| 122 | +# of the subgroup/subcommunicator (with partial arrays in different groups |
| 123 | +# are identical to each other). |
| 124 | + |
| 125 | +# Defining the local and global shape of the distributed array |
| 126 | +local_shape = 5 |
| 127 | +global_shape = local_shape * size |
| 128 | + |
| 129 | +# Create mask |
| 130 | +nsub = 2 |
| 131 | +mask = np.repeat(np.arange(size // nsub), nsub) |
| 132 | +if rank == 0: print(f"Mask: {mask}") |
| 133 | + |
| 134 | +# Create and fill the distributed array |
| 135 | +x = pylops_mpi.DistributedArray(global_shape=global_shape, |
| 136 | + partition=Partition.SCATTER, |
| 137 | + mask=mask) |
| 138 | +x[:] = (MPI.COMM_WORLD.Get_rank() + 1) * np.ones(local_shape) |
| 139 | +xloc = x.asarray() |
| 140 | + |
| 141 | +# Dot product |
| 142 | +dot = x.dot(x) |
| 143 | +dotloc = np.dot(xloc[local_shape * nsub * (rank // nsub):local_shape * nsub * (rank // nsub + 1)], |
| 144 | + xloc[local_shape * nsub * (rank // nsub):local_shape * nsub * (rank // nsub + 1)]) |
| 145 | +print(f"Dot check (Rank {rank}): {np.allclose(dot, dotloc)}") |
| 146 | + |
| 147 | +# Norm |
| 148 | +norm = x.norm(ord=2) |
| 149 | +normloc = np.linalg.norm(xloc[local_shape * nsub * (rank // nsub):local_shape * nsub * (rank // nsub + 1)], |
| 150 | + ord=2) |
| 151 | +print(f"Norm check (Rank {rank}): {np.allclose(norm, normloc)}") |
0 commit comments