9
9
* University of Stuttgart. All rights reserved.
10
10
* Copyright (c) 2004-2005 The Regents of the University of California.
11
11
* All rights reserved.
12
- * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
12
+ * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
13
13
* reserved.
14
14
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
15
15
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
@@ -452,7 +452,7 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s
452
452
my_peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE ;
453
453
my_peer -> state = (uint64_t ) (uintptr_t ) module -> state ;
454
454
455
- if (module -> selected_btl -> btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB ) {
455
+ if (module -> use_cpu_atomics ) {
456
456
/* all peers are local or it is safe to mix cpu and nic atomics */
457
457
my_peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE ;
458
458
} else {
@@ -502,6 +502,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
502
502
local_rank = ompi_comm_rank (shared_comm );
503
503
local_size = ompi_comm_size (shared_comm );
504
504
505
+ /* CPU atomics can be used if every process is on the same node or the NIC allows mixing CPU and NIC atomics */
506
+ module -> use_cpu_atomics = local_size == global_size || (module -> selected_btl -> btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB );
507
+
505
508
if (1 == local_size ) {
506
509
/* no point using a shared segment if there are no other processes on this node */
507
510
return allocate_state_single (module , base , size );
@@ -631,13 +634,15 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
631
634
}
632
635
}
633
636
634
- /* barrier to make sure all ranks have attached */
637
+ /* barrier to make sure all ranks have set up their region data */
635
638
shared_comm -> c_coll -> coll_barrier (shared_comm , shared_comm -> c_coll -> coll_barrier_module );
636
639
637
640
offset = data_base ;
638
641
for (int i = 0 ; i < local_size ; ++ i ) {
642
+ /* local pointer to peer's state */
643
+ ompi_osc_rdma_state_t * peer_state = (ompi_osc_rdma_state_t * ) ((uintptr_t ) module -> segment_base + state_base + module -> state_size * i );
644
+ ompi_osc_rdma_region_t * peer_region = (ompi_osc_rdma_region_t * ) peer_state -> regions ;
639
645
ompi_osc_rdma_peer_extended_t * ex_peer ;
640
- ompi_osc_rdma_state_t * peer_state ;
641
646
ompi_osc_rdma_peer_t * peer ;
642
647
int peer_rank = temp [i ].rank ;
643
648
@@ -648,13 +653,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
648
653
649
654
ex_peer = (ompi_osc_rdma_peer_extended_t * ) peer ;
650
655
651
- /* peer state local pointer */
652
- peer_state = (ompi_osc_rdma_state_t * ) ((uintptr_t ) module -> segment_base + state_base + module -> state_size * i );
653
-
654
- if (local_size == global_size || (module -> selected_btl -> btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB )) {
656
+ /* set up peer state */
657
+ if (module -> use_cpu_atomics ) {
655
658
/* all peers are local or it is safe to mix cpu and nic atomics */
656
659
peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE ;
657
660
peer -> state = (osc_rdma_counter_t ) peer_state ;
661
+ peer -> state_endpoint = NULL ;
658
662
} else {
659
663
/* use my endpoint handle to modify the peer's state */
660
664
if (module -> selected_btl -> btl_register_mem ) {
@@ -664,38 +668,39 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
664
668
peer -> state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module , temp [0 ].rank );
665
669
}
666
670
667
- /* finish setting up the local peer structure */
668
- if (MPI_WIN_FLAVOR_DYNAMIC != module -> flavor ) {
669
- if (!module -> same_disp_unit ) {
670
- ex_peer -> disp_unit = peer_state -> disp_unit ;
671
- }
672
-
673
- if (!module -> same_size ) {
674
- ex_peer -> size = temp [i ].size ;
675
- }
671
+ if (MPI_WIN_FLAVOR_DYNAMIC == module -> flavor || MPI_WIN_FLAVOR_CREATE == module -> flavor ) {
672
+ /* use the peer's BTL endpoint directly */
673
+ peer -> data_endpoint = ompi_osc_rdma_peer_btl_endpoint (module , peer_rank );
674
+ } else if (!module -> use_cpu_atomics && temp [i ].size ) {
675
+ /* use the local leader's endpoint */
676
+ peer -> data_endpoint = ompi_osc_rdma_peer_btl_endpoint (module , temp [0 ].rank );
677
+ }
676
678
677
- if (my_rank == peer_rank ) {
678
- peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE ;
679
- }
679
+ ompi_osc_module_add_peer (module , peer );
680
680
681
- if (MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
682
- if (temp [i ].size ) {
683
- ex_peer -> super .base = state_region -> base + offset ;
684
- offset += temp [i ].size ;
685
- } else {
686
- ex_peer -> super .base = 0 ;
687
- }
688
- }
681
+ if (MPI_WIN_FLAVOR_DYNAMIC == module -> flavor || 0 == temp [i ].size ) {
682
+ /* nothing more to do */
683
+ continue ;
684
+ }
689
685
690
- ompi_osc_rdma_region_t * peer_region = (ompi_osc_rdma_region_t * ) peer_state -> regions ;
686
+ /* finish setting up the local peer structure for win allocate/create */
687
+ if (!(module -> same_disp_unit && module -> same_size )) {
688
+ ex_peer -> disp_unit = peer_state -> disp_unit ;
689
+ ex_peer -> size = temp [i ].size ;
690
+ }
691
691
692
+ if (module -> use_cpu_atomics && MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
693
+ /* base is local and cpu atomics are available */
694
+ ex_peer -> super .base = (uintptr_t ) module -> segment_base + offset ;
695
+ peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE ;
696
+ offset += temp [i ].size ;
697
+ } else {
692
698
ex_peer -> super .base = peer_region -> base ;
699
+
693
700
if (module -> selected_btl -> btl_register_mem ) {
694
701
ex_peer -> super .base_handle = (mca_btl_base_registration_handle_t * ) peer_region -> btl_handle_data ;
695
702
}
696
703
}
697
-
698
- ompi_osc_module_add_peer (module , peer );
699
704
}
700
705
} while (0 );
701
706
0 commit comments