Skip to content

Commit c69f441

Browse files
committed
osc/rdma: cleanup local peer setup and fix a bug
The data endpoint was not being set correctly for local peers in some cases. This commit fixes the bug and cleans the associated code to simplify the logic. Signed-off-by: Nathan Hjelm <[email protected]> (cherry picked from commit 31ab833) Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 4f104be commit c69f441

File tree

2 files changed

+39
-31
lines changed

2 files changed

+39
-31
lines changed

ompi/mca/osc/rdma/osc_rdma.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ struct ompi_osc_rdma_module_t {
128128
/** value of same_size info key for this window */
129129
bool same_size;
130130

131+
/** CPU atomics can be used */
132+
bool use_cpu_atomics;
133+
131134
/** passive-target synchronization will not be used in this window */
132135
bool no_locks;
133136

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
* University of Stuttgart. All rights reserved.
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
12-
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
12+
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
1313
* reserved.
1414
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
1515
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
@@ -446,7 +446,7 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s
446446
my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
447447
my_peer->state = (uint64_t) (uintptr_t) module->state;
448448

449-
if (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB) {
449+
if (module->use_cpu_atomics) {
450450
/* all peers are local or it is safe to mix cpu and nic atomics */
451451
my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE;
452452
} else {
@@ -496,6 +496,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
496496
local_rank = ompi_comm_rank (shared_comm);
497497
local_size = ompi_comm_size (shared_comm);
498498

499+
/* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */
500+
module->use_cpu_atomics = local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
501+
499502
if (1 == local_size) {
500503
/* no point using a shared segment if there are no other processes on this node */
501504
return allocate_state_single (module, base, size);
@@ -625,13 +628,15 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
625628
}
626629
}
627630

628-
/* barrier to make sure all ranks have attached */
631+
/* barrier to make sure all ranks have set up their region data */
629632
shared_comm->c_coll->coll_barrier(shared_comm, shared_comm->c_coll->coll_barrier_module);
630633

631634
offset = data_base;
632635
for (int i = 0 ; i < local_size ; ++i) {
636+
/* local pointer to peer's state */
637+
ompi_osc_rdma_state_t *peer_state = (ompi_osc_rdma_state_t *) ((uintptr_t) module->segment_base + state_base + module->state_size * i);
638+
ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions;
633639
ompi_osc_rdma_peer_extended_t *ex_peer;
634-
ompi_osc_rdma_state_t *peer_state;
635640
ompi_osc_rdma_peer_t *peer;
636641
int peer_rank = temp[i].rank;
637642

@@ -642,13 +647,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
642647

643648
ex_peer = (ompi_osc_rdma_peer_extended_t *) peer;
644649

645-
/* peer state local pointer */
646-
peer_state = (ompi_osc_rdma_state_t *) ((uintptr_t) module->segment_base + state_base + module->state_size * i);
647-
648-
if (local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB)) {
650+
/* set up peer state */
651+
if (module->use_cpu_atomics) {
649652
/* all peers are local or it is safe to mix cpu and nic atomics */
650653
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE;
651654
peer->state = (osc_rdma_counter_t) peer_state;
655+
peer->state_endpoint = NULL;
652656
} else {
653657
/* use my endpoint handle to modify the peer's state */
654658
if (module->selected_btl->btl_register_mem) {
@@ -658,38 +662,39 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
658662
peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, temp[0].rank);
659663
}
660664

661-
/* finish setting up the local peer structure */
662-
if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor) {
663-
if (!module->same_disp_unit) {
664-
ex_peer->disp_unit = peer_state->disp_unit;
665-
}
666-
667-
if (!module->same_size) {
668-
ex_peer->size = temp[i].size;
669-
}
665+
if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor || MPI_WIN_FLAVOR_CREATE == module->flavor) {
666+
/* use the peer's BTL endpoint directly */
667+
peer->data_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, peer_rank);
668+
} else if (!module->use_cpu_atomics && temp[i].size) {
669+
/* use the local leader's endpoint */
670+
peer->data_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, temp[0].rank);
671+
}
670672

671-
if (my_rank == peer_rank) {
672-
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
673-
}
673+
ompi_osc_module_add_peer (module, peer);
674674

675-
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
676-
if (temp[i].size) {
677-
ex_peer->super.base = state_region->base + offset;
678-
offset += temp[i].size;
679-
} else {
680-
ex_peer->super.base = 0;
681-
}
682-
}
675+
if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor || 0 == temp[i].size) {
676+
/* nothing more to do */
677+
continue;
678+
}
683679

684-
ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions;
680+
/* finish setting up the local peer structure for win allocate/create */
681+
if (!(module->same_disp_unit && module->same_size)) {
682+
ex_peer->disp_unit = peer_state->disp_unit;
683+
ex_peer->size = temp[i].size;
684+
}
685685

686+
if (module->use_cpu_atomics && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
687+
/* base is local and cpu atomics are available */
688+
ex_peer->super.base = (uintptr_t) module->segment_base + offset;
689+
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
690+
offset += temp[i].size;
691+
} else {
686692
ex_peer->super.base = peer_region->base;
693+
687694
if (module->selected_btl->btl_register_mem) {
688695
ex_peer->super.base_handle = (mca_btl_base_registration_handle_t *) peer_region->btl_handle_data;
689696
}
690697
}
691-
692-
ompi_osc_module_add_peer (module, peer);
693698
}
694699
} while (0);
695700

0 commit comments

Comments
 (0)