99 * University of Stuttgart. All rights reserved.
1010 * Copyright (c) 2004-2005 The Regents of the University of California.
1111 * All rights reserved.
12- * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
12+ * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
1313 * reserved.
1414 * Copyright (c) 2006-2008 University of Houston. All rights reserved.
1515 * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
@@ -452,7 +452,7 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s
452452 my_peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE ;
453453 my_peer -> state = (uint64_t ) (uintptr_t ) module -> state ;
454454
455- if (module -> selected_btl -> btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB ) {
455+ if (module -> use_cpu_atomics ) {
456456 /* all peers are local or it is safe to mix cpu and nic atomics */
457457 my_peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE ;
458458 } else {
@@ -502,6 +502,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
502502 local_rank = ompi_comm_rank (shared_comm );
503503 local_size = ompi_comm_size (shared_comm );
504504
505+ /* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */
506+ module -> use_cpu_atomics = local_size == global_size || (module -> selected_btl -> btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB );
507+
505508 if (1 == local_size ) {
506509 /* no point using a shared segment if there are no other processes on this node */
507510 return allocate_state_single (module , base , size );
@@ -631,13 +634,15 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
631634 }
632635 }
633636
634- /* barrier to make sure all ranks have attached */
637+ /* barrier to make sure all ranks have set up their region data */
635638 shared_comm -> c_coll -> coll_barrier (shared_comm , shared_comm -> c_coll -> coll_barrier_module );
636639
637640 offset = data_base ;
638641 for (int i = 0 ; i < local_size ; ++ i ) {
642+ /* local pointer to peer's state */
643+ ompi_osc_rdma_state_t * peer_state = (ompi_osc_rdma_state_t * ) ((uintptr_t ) module -> segment_base + state_base + module -> state_size * i );
644+ ompi_osc_rdma_region_t * peer_region = (ompi_osc_rdma_region_t * ) peer_state -> regions ;
639645 ompi_osc_rdma_peer_extended_t * ex_peer ;
640- ompi_osc_rdma_state_t * peer_state ;
641646 ompi_osc_rdma_peer_t * peer ;
642647 int peer_rank = temp [i ].rank ;
643648
@@ -648,13 +653,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
648653
649654 ex_peer = (ompi_osc_rdma_peer_extended_t * ) peer ;
650655
651- /* peer state local pointer */
652- peer_state = (ompi_osc_rdma_state_t * ) ((uintptr_t ) module -> segment_base + state_base + module -> state_size * i );
653-
654- if (local_size == global_size || (module -> selected_btl -> btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB )) {
656+ /* set up peer state */
657+ if (module -> use_cpu_atomics ) {
655658 /* all peers are local or it is safe to mix cpu and nic atomics */
656659 peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE ;
657660 peer -> state = (osc_rdma_counter_t ) peer_state ;
661+ peer -> state_endpoint = NULL ;
658662 } else {
659663 /* use my endpoint handle to modify the peer's state */
660664 if (module -> selected_btl -> btl_register_mem ) {
@@ -664,38 +668,39 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
664668 peer -> state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module , temp [0 ].rank );
665669 }
666670
667- /* finish setting up the local peer structure */
668- if (MPI_WIN_FLAVOR_DYNAMIC != module -> flavor ) {
669- if (!module -> same_disp_unit ) {
670- ex_peer -> disp_unit = peer_state -> disp_unit ;
671- }
672-
673- if (!module -> same_size ) {
674- ex_peer -> size = temp [i ].size ;
675- }
671+ if (MPI_WIN_FLAVOR_DYNAMIC == module -> flavor || MPI_WIN_FLAVOR_CREATE == module -> flavor ) {
672+ /* use the peer's BTL endpoint directly */
673+ peer -> data_endpoint = ompi_osc_rdma_peer_btl_endpoint (module , peer_rank );
674+ } else if (!module -> use_cpu_atomics && temp [i ].size ) {
675+ /* use the local leader's endpoint */
676+ peer -> data_endpoint = ompi_osc_rdma_peer_btl_endpoint (module , temp [0 ].rank );
677+ }
676678
677- if (my_rank == peer_rank ) {
678- peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE ;
679- }
679+ ompi_osc_module_add_peer (module , peer );
680680
681- if (MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
682- if (temp [i ].size ) {
683- ex_peer -> super .base = state_region -> base + offset ;
684- offset += temp [i ].size ;
685- } else {
686- ex_peer -> super .base = 0 ;
687- }
688- }
681+ if (MPI_WIN_FLAVOR_DYNAMIC == module -> flavor || 0 == temp [i ].size ) {
682+ /* nothing more to do */
683+ continue ;
684+ }
689685
690- ompi_osc_rdma_region_t * peer_region = (ompi_osc_rdma_region_t * ) peer_state -> regions ;
686+ /* finish setting up the local peer structure for win allocate/create */
687+ if (!(module -> same_disp_unit && module -> same_size )) {
688+ ex_peer -> disp_unit = peer_state -> disp_unit ;
689+ ex_peer -> size = temp [i ].size ;
690+ }
691691
692+ if (module -> use_cpu_atomics && MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
693+ /* base is local and cpu atomics are available */
694+ ex_peer -> super .base = (uintptr_t ) module -> segment_base + offset ;
695+ peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE ;
696+ offset += temp [i ].size ;
697+ } else {
692698 ex_peer -> super .base = peer_region -> base ;
699+
693700 if (module -> selected_btl -> btl_register_mem ) {
694701 ex_peer -> super .base_handle = (mca_btl_base_registration_handle_t * ) peer_region -> btl_handle_data ;
695702 }
696703 }
697-
698- ompi_osc_module_add_peer (module , peer );
699704 }
700705 } while (0 );
701706
0 commit comments