Skip to content

Commit 8dcca83

Browse files
committed
osc/rdma: Split OMPI_OSC_RDMA_PEER_CPU_ATOMICS from OMPI_OSC_RDMA_PEER_LOCAL_BASE
We need to distinguish between the ability of using cpu atomics and being able to access the peer's memory. Signed-off-by: Joseph Schuchart <[email protected]>
1 parent 3687ce0 commit 8dcca83

File tree

3 files changed

+16
-4
lines changed

3 files changed

+16
-4
lines changed

ompi/mca/osc/rdma/osc_rdma_accumulate.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -990,7 +990,7 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
990990
* OR if we have an exclusive lock
991991
* OR if other processes won't try to use the network either */
992992
bool use_shared_mem = module->single_node ||
993-
(ompi_osc_rdma_peer_local_base (peer) &&
993+
(ompi_osc_rdma_peer_cpu_atomics (peer) &&
994994
(ompi_osc_rdma_peer_is_exclusive (peer) ||
995995
!module->acc_single_intrinsic));
996996

@@ -1013,7 +1013,7 @@ int ompi_osc_rdma_compare_and_swap (const void *origin_addr, const void *compare
10131013
lock_acquired = true;
10141014
}
10151015

1016-
if (ompi_osc_rdma_peer_local_base (peer)) {
1016+
if (ompi_osc_rdma_peer_cpu_atomics (peer)) {
10171017
ret = ompi_osc_rdma_cas_local (origin_addr, compare_addr, result_addr, dt,
10181018
peer, target_address, target_handle, module,
10191019
lock_acquired);
@@ -1095,7 +1095,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_win_t *win, const void *origin_
10951095
(void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
10961096
}
10971097

1098-
if (ompi_osc_rdma_peer_local_base (peer)) {
1098+
if (ompi_osc_rdma_peer_cpu_atomics (peer)) {
10991099
/* local/self optimization */
11001100
ret = ompi_osc_rdma_gacc_local (origin_addr, origin_count, origin_datatype, result_addr, result_count,
11011101
result_datatype, peer, target_address, target_handle, target_count,

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,7 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s
530530
module->my_peer = my_peer;
531531
module->free_after = module->rank_array;
532532
my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
533+
my_peer->flags |= OMPI_OSC_RDMA_PEER_CPU_ATOMICS;
533534
my_peer->state = (uint64_t) (uintptr_t) module->state;
534535

535536
if (use_cpu_atomics) {
@@ -839,6 +840,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
839840
if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor) {
840841
if (use_cpu_atomics && peer_rank == my_rank) {
841842
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
843+
peer->flags |= OMPI_OSC_RDMA_PEER_CPU_ATOMICS;
842844
}
843845
/* nothing more to do */
844846
continue;
@@ -853,7 +855,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
853855
ex_peer->size = temp[i].size;
854856
}
855857

856-
if (use_cpu_atomics && (MPI_WIN_FLAVOR_ALLOCATE == module->flavor || peer_rank == my_rank)) {
858+
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor || peer_rank == my_rank) {
857859
/* base is local and cpu atomics are available */
858860
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
859861
ex_peer->super.base = (uintptr_t) module->segment_base + offset;
@@ -862,6 +864,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
862864
}
863865

864866
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
867+
if (use_cpu_atomics) {
868+
peer->flags |= OMPI_OSC_RDMA_PEER_CPU_ATOMICS;
869+
}
865870
offset += temp[i].size;
866871
} else {
867872
ex_peer->super.base = peer_region->base;

ompi/mca/osc/rdma/osc_rdma_peer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ enum {
142142
OMPI_OSC_RDMA_PEER_BASE_FREE = 0x40,
143143
/** peer was demand locked as part of lock-all (when in demand locking mode) */
144144
OMPI_OSC_RDMA_PEER_DEMAND_LOCKED = 0x80,
145+
/** we can use CPU atomics on that peer */
146+
OMPI_OSC_RDMA_PEER_CPU_ATOMICS = 0x100,
145147
};
146148

147149
/**
@@ -224,6 +226,11 @@ static inline bool ompi_osc_rdma_peer_local_base (ompi_osc_rdma_peer_t *peer)
224226
return !!(peer->flags & OMPI_OSC_RDMA_PEER_LOCAL_BASE);
225227
}
226228

229+
static inline bool ompi_osc_rdma_peer_cpu_atomics (ompi_osc_rdma_peer_t *peer)
230+
{
231+
return ompi_osc_rdma_peer_local_base(peer) && !!(peer->flags & OMPI_OSC_RDMA_PEER_CPU_ATOMICS);
232+
}
233+
227234
/**
228235
* @brief check if the peer's state pointer is local to this process
229236
*

0 commit comments

Comments
 (0)