Skip to content

Commit 31ab833

Browse files
committed
osc/rdma: cleanup local peer setup and fix a bug
The data endpoint was not being set correctly for local peers in some cases. This commit fixes the bug and cleans the associated code to simplify the logic. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent c38866e commit 31ab833

File tree

2 files changed

+39
-31
lines changed

2 files changed

+39
-31
lines changed

ompi/mca/osc/rdma/osc_rdma.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ struct ompi_osc_rdma_module_t {
128128
/** value of same_size info key for this window */
129129
bool same_size;
130130

131+
/** CPU atomics can be used */
132+
bool use_cpu_atomics;
133+
131134
/** passive-target synchronization will not be used in this window */
132135
bool no_locks;
133136

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
* University of Stuttgart. All rights reserved.
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
12-
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
12+
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
1313
* reserved.
1414
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
1515
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
@@ -452,7 +452,7 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s
452452
my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
453453
my_peer->state = (uint64_t) (uintptr_t) module->state;
454454

455-
if (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB) {
455+
if (module->use_cpu_atomics) {
456456
/* all peers are local or it is safe to mix cpu and nic atomics */
457457
my_peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE;
458458
} else {
@@ -502,6 +502,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
502502
local_rank = ompi_comm_rank (shared_comm);
503503
local_size = ompi_comm_size (shared_comm);
504504

505+
/* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */
506+
module->use_cpu_atomics = local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB);
507+
505508
if (1 == local_size) {
506509
/* no point using a shared segment if there are no other processes on this node */
507510
return allocate_state_single (module, base, size);
@@ -631,13 +634,15 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
631634
}
632635
}
633636

634-
/* barrier to make sure all ranks have attached */
637+
/* barrier to make sure all ranks have set up their region data */
635638
shared_comm->c_coll->coll_barrier(shared_comm, shared_comm->c_coll->coll_barrier_module);
636639

637640
offset = data_base;
638641
for (int i = 0 ; i < local_size ; ++i) {
642+
/* local pointer to peer's state */
643+
ompi_osc_rdma_state_t *peer_state = (ompi_osc_rdma_state_t *) ((uintptr_t) module->segment_base + state_base + module->state_size * i);
644+
ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions;
639645
ompi_osc_rdma_peer_extended_t *ex_peer;
640-
ompi_osc_rdma_state_t *peer_state;
641646
ompi_osc_rdma_peer_t *peer;
642647
int peer_rank = temp[i].rank;
643648

@@ -648,13 +653,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
648653

649654
ex_peer = (ompi_osc_rdma_peer_extended_t *) peer;
650655

651-
/* peer state local pointer */
652-
peer_state = (ompi_osc_rdma_state_t *) ((uintptr_t) module->segment_base + state_base + module->state_size * i);
653-
654-
if (local_size == global_size || (module->selected_btl->btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB)) {
656+
/* set up peer state */
657+
if (module->use_cpu_atomics) {
655658
/* all peers are local or it is safe to mix cpu and nic atomics */
656659
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE;
657660
peer->state = (osc_rdma_counter_t) peer_state;
661+
peer->state_endpoint = NULL;
658662
} else {
659663
/* use my endpoint handle to modify the peer's state */
660664
if (module->selected_btl->btl_register_mem) {
@@ -664,38 +668,39 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
664668
peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, temp[0].rank);
665669
}
666670

667-
/* finish setting up the local peer structure */
668-
if (MPI_WIN_FLAVOR_DYNAMIC != module->flavor) {
669-
if (!module->same_disp_unit) {
670-
ex_peer->disp_unit = peer_state->disp_unit;
671-
}
672-
673-
if (!module->same_size) {
674-
ex_peer->size = temp[i].size;
675-
}
671+
if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor || MPI_WIN_FLAVOR_CREATE == module->flavor) {
672+
/* use the peer's BTL endpoint directly */
673+
peer->data_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, peer_rank);
674+
} else if (!module->use_cpu_atomics && temp[i].size) {
675+
/* use the local leader's endpoint */
676+
peer->data_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, temp[0].rank);
677+
}
676678

677-
if (my_rank == peer_rank) {
678-
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
679-
}
679+
ompi_osc_module_add_peer (module, peer);
680680

681-
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
682-
if (temp[i].size) {
683-
ex_peer->super.base = state_region->base + offset;
684-
offset += temp[i].size;
685-
} else {
686-
ex_peer->super.base = 0;
687-
}
688-
}
681+
if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor || 0 == temp[i].size) {
682+
/* nothing more to do */
683+
continue;
684+
}
689685

690-
ompi_osc_rdma_region_t *peer_region = (ompi_osc_rdma_region_t *) peer_state->regions;
686+
/* finish setting up the local peer structure for win allocate/create */
687+
if (!(module->same_disp_unit && module->same_size)) {
688+
ex_peer->disp_unit = peer_state->disp_unit;
689+
ex_peer->size = temp[i].size;
690+
}
691691

692+
if (module->use_cpu_atomics && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
693+
/* base is local and cpu atomics are available */
694+
ex_peer->super.base = (uintptr_t) module->segment_base + offset;
695+
peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE;
696+
offset += temp[i].size;
697+
} else {
692698
ex_peer->super.base = peer_region->base;
699+
693700
if (module->selected_btl->btl_register_mem) {
694701
ex_peer->super.base_handle = (mca_btl_base_registration_handle_t *) peer_region->btl_handle_data;
695702
}
696703
}
697-
698-
ompi_osc_module_add_peer (module, peer);
699704
}
700705
} while (0);
701706

0 commit comments

Comments
 (0)