Skip to content

Commit f326ee4

Browse files
authored
Merge pull request #208 from LLNL/multisendrecv
MultiSendRecv operation
2 parents 1812bac + 943466a commit f326ee4

23 files changed

+993
-24
lines changed

benchmark/benchmark_ops.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,18 @@ void run_benchmark(cxxopts::ParseResult& parsed_opts) {
124124
}
125125
}
126126
}
127+
if (Op == Al::AlOperation::multisendrecv) {
128+
// Send to and receive from every other rank.
129+
op_options.srcs = std::vector<int>(comm_wrapper.size() - 1);
130+
op_options.dests = std::vector<int>(comm_wrapper.size() - 1);
131+
for (int i = 0, rank = 0; rank < comm_wrapper.size(); ++rank) {
132+
if (rank != comm_wrapper.rank()) {
133+
op_options.srcs[i] = rank;
134+
op_options.dests[i] = rank;
135+
++i;
136+
}
137+
}
138+
}
127139

128140
size_t num_iters = parsed_opts["num-iters"].as<size_t>();
129141
size_t num_warmup = parsed_opts["num-warmup"].as<size_t>();
@@ -135,6 +147,11 @@ void run_benchmark(cxxopts::ParseResult& parsed_opts) {
135147
op_options.recv_counts = op_options.send_counts;
136148
op_options.recv_displs = op_options.send_displs;
137149
}
150+
if (Op == Al::AlOperation::multisendrecv) {
151+
// Set up to be similar to vector operations.
152+
op_options.send_counts = std::vector<size_t>(comm_wrapper.size() - 1, size);
153+
op_options.recv_counts = op_options.send_counts;
154+
}
138155

139156
OpDispatcher<Backend, T> op_runner(op, op_options);
140157
size_t in_size = op_runner.get_input_size(size, comm_wrapper.comm());

benchmark/run_benchmarks.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,19 +86,20 @@
8686
pt2pt_ops = [OpDesc('send', inplace=False, min_procs=2),
8787
OpDesc('recv', inplace=False, min_procs=2),
8888
OpDesc('sendrecv', inplace=False, min_procs=2)]
89+
other_ops = [OpDesc('multisendrecv')]
8990

9091
# Full set of cases.
9192
test_cases = {
9293
'mpi': {
93-
'ops': coll_ops + vector_coll_ops + pt2pt_ops,
94+
'ops': coll_ops + vector_coll_ops + pt2pt_ops + other_ops,
9495
'datatypes': mpi_datatypes
9596
},
9697
'nccl': {
97-
'ops': coll_ops + vector_coll_ops + pt2pt_ops,
98+
'ops': coll_ops + vector_coll_ops + pt2pt_ops + other_ops,
9899
'datatypes': nccl_datatypes
99100
},
100101
'ht': {
101-
'ops': coll_ops + vector_coll_ops + pt2pt_ops,
102+
'ops': coll_ops + vector_coll_ops + pt2pt_ops + other_ops,
102103
'datatypes': mpi_datatypes
103104
},
104105
}

docs/comm.rst

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,12 @@ Send and Receive
8787

8888
These are the most basic communication operations, and need to be paired: one process sends data and the other process receives the data.
8989

90-
There are some subtleties here that one should be aware of.
91-
Unlike MPI, Aluminum does not support tagged messages.
92-
You should properly order your sends and receives, so that you start your sends and receives in the same order.
93-
Finally, note that sends may (but are not guaranteed) to complete *before* the data has been sent or before it has been received (it is, however, safe to read or modify a send buffer after the send has completed).
90+
There are some subtleties here that one should be aware of:
91+
92+
* Unlike MPI, Aluminum does not support tagged messages.
93+
* Also unlike MPI, a receive will only match a send with the exact same ``count`` (whereas MPI treats the receive count as an upper bound).
94+
* You should properly order your sends and receives, so that you start your sends and receives in the same order.
95+
* Finally, note that sends may (but are not guaranteed) to complete *before* the data has been sent or before it has been received (it is, however, safe to read or modify a send buffer after the send has completed).
9496

9597
There are no special in-place versions of send or receive, as they only have one buffer argument.
9698

@@ -116,6 +118,20 @@ There is an in-place version of ``SendRecv`` (this is similar to ``MPI_Sendrecv_
116118
* :cpp:func:`Al::SendRecv()`
117119
* :cpp:func:`Al::NonblockingSendRecv()`
118120

121+
.. _multisendrecv
122+
123+
MultiSendRecv
124+
^^^^^^^^^^^^^
125+
126+
This implements an arbitrary sequence of ``Send`` and ``Recv`` operations as a single operation.
127+
This is similar to an ``Alltoall`` collective, but does not require that all ranks in a communicator participate.
128+
Likewise, unlike a sequence of ``SendRecv`` operations, this does not require all ranks to both send and receive; i.e., there may be asymmetric communication.
129+
130+
There is an in-place version of ``MultiSendRecv``, which differs slightly from a regular ``MultiSendRecv``: It functions exactly like a sequence of in-place ``SendRecv`` operations.
131+
132+
* :cpp:func:`Al::MultiSendRecv()`
133+
* :cpp:func:`Al::NonblockingMultiSendRecv()`
134+
119135
Collective Operations
120136
---------------------
121137

include/Al.hpp

Lines changed: 116 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,7 +1404,7 @@ void NonblockingScatterv(
14041404
/**
14051405
* Send a point-to-point message.
14061406
*
1407-
* See \verbatim :embed:rst:inline :ref:`Send and Recv <send-and-recv>`. \endverbatim
1407+
* See \verbatim embed:rst:inline :ref:`Send and Recv <send-and-recv>`. \endverbatim
14081408
*
14091409
* @param[in] sendbuf Buffer containing the local data to send.
14101410
* @param[in] count Length of \p sendbuf in elements of type `T`.
@@ -1439,7 +1439,7 @@ void NonblockingSend(const T* sendbuf, size_t count, int dest,
14391439
/**
14401440
* Receive a point-to-point message.
14411441
*
1442-
* See \verbatim :embed:rst:inline :ref:`Send and Recv <send-and-recv>`. \endverbatim
1442+
* See \verbatim embed:rst:inline :ref:`Send and Recv <send-and-recv>`. \endverbatim
14431443
*
14441444
* @param[out] recvbuf Buffer to receive the sent data.
14451445
* @param[in] count Length of \p recvbuf in elements of type `T`.
@@ -1474,7 +1474,7 @@ void NonblockingRecv(T* recvbuf, size_t count, int src,
14741474
/**
14751475
* Perform a simultaneous Send() and Recv().
14761476
*
1477-
* See \verbatim :embed:rst:inline :ref:`SendRecv <sendrecv>`. \endverbatim
1477+
* See \verbatim embed:rst:inline :ref:`SendRecv <sendrecv>`. \endverbatim
14781478
*
14791479
* @param[in] sendbuf Buffer containing the local data to send.
14801480
* @param[in] send_count Length of \p sendbuf in elements of type `T`.
@@ -1497,7 +1497,7 @@ void SendRecv(const T* sendbuf, size_t send_count, int dest,
14971497
/**
14981498
* Perform an \verbatim embed:rst:inline :ref:`in-place <comm-inplace>` \endverbatim SendRecv().
14991499
*
1500-
* @param[in,out] buffer Input and output buffer initially contaiuning the
1500+
* @param[in,out] buffer Input and output buffer initially containing the
15011501
* local data to send. Will be replaced with the received data.
15021502
* @param[in] count Length of data to send and receive. \p buffer should
15031503
* be `count` elements.
@@ -1522,8 +1522,8 @@ void SendRecv(T* buffer, size_t count, int dest, int src,
15221522
* @param[out] recvbuf Buffer to receive the sent data.
15231523
* @param[in] recv_count Length of \p recvbuf in elements of type `T`.
15241524
* @param[in] src Rank in comm to receive from.
1525-
* @param[out] req Request object for the asynchronous operation.
15261525
* @param[in] comm Communicator to send/recv within.
1526+
* @param[out] req Request object for the asynchronous operation.
15271527
*/
15281528
template <typename Backend, typename T>
15291529
void NonblockingSendRecv(const T* sendbuf, size_t send_count, int dest,
@@ -1548,8 +1548,8 @@ void NonblockingSendRecv(const T* sendbuf, size_t send_count, int dest,
15481548
* be `count` elements.
15491549
* @param[in] dest Rank in comm to send to.
15501550
* @param[in] src Rank in comm to receive from.
1551-
* @param[out] req Request object for the asynchronous operation.
15521551
* @param[in] comm Communicator to send/recv within.
1552+
* @param[out] req Request object for the asynchronous operation.
15531553
*/
15541554
template <typename Backend, typename T>
15551555
void NonblockingSendRecv(T* buffer, size_t count, int dest, int src,
@@ -1560,6 +1560,116 @@ void NonblockingSendRecv(T* buffer, size_t count, int dest, int src,
15601560
Backend::template NonblockingSendRecv<T>(buffer, count, dest, src, comm, req);
15611561
}
15621562

1563+
/**
1564+
* Perform an arbitrary sequence of Send() and Recv() operations.
1565+
*
1566+
* See \verbatim embed:rst:inline :ref:`MultiSendRecv <multisendrecv>`. \endverbatim
1567+
*
1568+
* @param[in] send_buffers Vector of buffers containing the local data to send.
1569+
* @param[in] send_counts Vector of the lengths of each buffer in
1570+
* \p send_buffers in elements of type `T`.
1571+
* @param[in] dests Vector of the destination rank to send each buffer to.
1572+
* @param[out] recv_buffers Vector of buffers to receive data in.
1573+
* @param[in] recv_counts Vector of the lengths of each buffer in
1574+
* \p recv_buffers in elements of type `T`.
1575+
* @param[in] srcs Vector of the ranks to receive from.
1576+
* @param[in] comm Communicator to send/recv within.
1577+
*/
1578+
template <typename Backend, typename T>
1579+
void MultiSendRecv(std::vector<const T*> send_buffers,
1580+
std::vector<size_t> send_counts,
1581+
std::vector<int> dests,
1582+
std::vector<T*> recv_buffers,
1583+
std::vector<size_t> recv_counts,
1584+
std::vector<int> srcs,
1585+
typename Backend::comm_type& comm) {
1586+
internal::trace::record_op<Backend, T>("multisendrecv", comm,
1587+
send_buffers, send_counts, dests,
1588+
recv_buffers, recv_counts, srcs);
1589+
Backend::template MultiSendRecv<T>(send_buffers, send_counts, dests,
1590+
recv_buffers, recv_counts, srcs,
1591+
comm);
1592+
}
1593+
1594+
/**
1595+
* Perform an \verbatim embed:rst:inline :ref:`in-place <comm-inplace>` \endverbatim MultiSendRecv().
1596+
*
1597+
* @param[in, out] buffers Vector of input and output buffers initially
1598+
* containing the local data to send. Will be replaced with the received
1599+
* data.
1600+
* @param[in] counts Vector of the lengths of data to send and receive.
1601+
* @param[in] dests Vector of the destination rank to send each buffer to.
1602+
* @param[in] srcs Vector of the ranks to receive from.
1603+
* @param[in] comm Communicator to send/recv within.
1604+
*/
1605+
template <typename Backend, typename T>
1606+
void MultiSendRecv(std::vector<T*> buffers,
1607+
std::vector<size_t> counts,
1608+
std::vector<int> dests,
1609+
std::vector<int> srcs,
1610+
typename Backend::comm_type& comm) {
1611+
internal::trace::record_op<Backend, T>("multisendrecv", comm,
1612+
buffers, counts, dests, srcs);
1613+
Backend::template MultiSendRecv<T>(buffers, counts, dests, srcs, comm);
1614+
}
1615+
1616+
/**
1617+
* Perform a \verbatim embed:rst:inline :ref:`non-blocking <comm-nonblocking>` \endverbatim MultiSendRecv().
1618+
*
1619+
* @param[in] send_buffers Vector of buffers containing the local data to send.
1620+
* @param[in] send_counts Vector of the lengths of each buffer in
1621+
* \p send_buffers in elements of type `T`.
1622+
* @param[in] dests Vector of the destination rank to send each buffer to.
1623+
* @param[out] recv_buffers Vector of buffers to receive data in.
1624+
* @param[in] recv_counts Vector of the lengths of each buffer in
1625+
* \p recv_buffers in elements of type `T`.
1626+
* @param[in] srcs Vector of the ranks to receive from.
1627+
* @param[in] comm Communicator to send/recv within.
1628+
* @param[out] req Request object for the asynchronous operation.
1629+
*/
1630+
template <typename Backend, typename T>
1631+
void NonblockingMultiSendRecv(std::vector<const T*> send_buffers,
1632+
std::vector<size_t> send_counts,
1633+
std::vector<int> dests,
1634+
std::vector<T*> recv_buffers,
1635+
std::vector<size_t> recv_counts,
1636+
std::vector<int> srcs,
1637+
typename Backend::comm_type& comm,
1638+
typename Backend::req_type& req) {
1639+
internal::trace::record_op<Backend, T>("nonblocking-multisendrecv", comm,
1640+
send_buffers, send_counts, dests,
1641+
recv_buffers, recv_counts, srcs);
1642+
Backend::template NonblockingMultiSendRecv<T>(send_buffers, send_counts, dests,
1643+
recv_buffers, recv_counts, srcs,
1644+
comm, req);
1645+
}
1646+
1647+
/**
1648+
* Perform a \verbatim embed:rst:inline :ref:`non-blocking <comm-nonblocking>` \endverbatim
1649+
* \verbatim embed:rst:inline :ref:`in-place <comm-inplace>` \endverbatim MultiSendRecv().
1650+
*
1651+
* @param[in, out] buffers Vector of input and output buffers initially
1652+
* containing the local data to send. Will be replaced with the received
1653+
* data.
1654+
* @param[in] counts Vector of the lengths of data to send and receive.
1655+
* @param[in] dests Vector of the destination rank to send each buffer to.
1656+
* @param[in] srcs Vector of the ranks to receive from.
1657+
* @param[in] comm Communicator to send/recv within.
1658+
* @param[out] req Request object for the asynchronous operation.
1659+
*/
1660+
template <typename Backend, typename T>
1661+
void NonblockingMultiSendRecv(std::vector<T*> buffers,
1662+
std::vector<size_t> counts,
1663+
std::vector<int> dests,
1664+
std::vector<int> srcs,
1665+
typename Backend::comm_type& comm,
1666+
typename Backend::req_type& req) {
1667+
internal::trace::record_op<Backend, T>("nonblocking-multisendrecv", comm,
1668+
buffers, counts, dests, srcs);
1669+
Backend::template NonblockingMultiSendRecv<T>(buffers, counts, dests, srcs,
1670+
comm, req);
1671+
}
1672+
15631673
/**
15641674
* Return true if the asynchronous operation associated with \p req
15651675
* has completed.

include/aluminum/ht/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ set_source_path(THIS_DIR_HEADERS
1010
communicator.hpp
1111
gather.hpp
1212
gatherv.hpp
13+
multisendrecv.hpp
1314
reduce.hpp
1415
reduce_scatter.hpp
1516
reduce_scatterv.hpp

include/aluminum/ht/communicator.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ namespace Al {
3737
namespace internal {
3838
namespace ht {
3939

40+
// Define the tag that point-to-point operations will use.
41+
constexpr int pt2pt_tag = 2;
42+
4043
/** Communicator for host-transfer operations. */
4144
class HostTransferCommunicator: public MPICommAndStreamWrapper<AlGpuStream_t> {
4245
public:

0 commit comments

Comments
 (0)