Skip to content

Commit f1b2a09

Browse files
authored
Merge pull request #6649 from devreal/rdma-fetchop-local
OSC rdma: make sure accumulating in shared memory is safe
2 parents 7005e46 + c67e229 commit f1b2a09

File tree

3 files changed

+16
-3
lines changed

3 files changed

+16
-3
lines changed

ompi/mca/osc/rdma/osc_rdma.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ struct ompi_osc_rdma_module_t {
147147

148148
bool acc_use_amo;
149149

150+
/** whether the group is located on a single node */
151+
bool single_node;
152+
150153
/** flavor of this window */
151154
int flavor;
152155

ompi/mca/osc/rdma/osc_rdma_accumulate.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -893,10 +893,19 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
893893
(void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
894894
}
895895

896+
/* accumulate in (shared) memory if there is only a single node
897+
* OR if we have an exclusive lock
898+
* OR if other processes won't try to use the network either */
899+
bool use_shared_mem = module->single_node ||
900+
(ompi_osc_rdma_peer_local_base (peer) &&
901+
(ompi_osc_rdma_peer_is_exclusive (peer) ||
902+
!module->acc_single_intrinsic));
903+
896904
/* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute
897905
* the atomic operation. this should be safe in all cases as either 1) the user has assured us they will
898-
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */
899-
if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer)) {
906+
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock.
907+
* avoid using the NIC if the operation can be done directly in shared memory. */
908+
if (origin_extent <= 8 && 1 == origin_count && !use_shared_mem) {
900909
if (module->acc_use_amo && ompi_datatype_is_predefined (origin_datatype)) {
901910
if (NULL == result_addr) {
902911
ret = ompi_osc_rdma_acc_single_atomic (sync, origin_addr, origin_datatype, origin_extent, peer, target_address,

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
555555
local_size = ompi_comm_size (shared_comm);
556556

557557
/* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */
558-
module->use_cpu_atomics = local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
558+
module->single_node = local_size == global_size;
559+
module->use_cpu_atomics = module->single_node || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
559560

560561
if (1 == local_size) {
561562
/* no point using a shared segment if there are no other processes on this node */

0 commit comments

Comments
 (0)