@@ -326,7 +326,7 @@ static int ompi_osc_rdma_initialize_region (ompi_osc_rdma_module_t *module, void
326326 region -> len = size ;
327327
328328 if (module -> selected_btl -> btl_register_mem && size ) {
329- if (MPI_WIN_FLAVOR_ALLOCATE != module -> flavor ) {
329+ if (MPI_WIN_FLAVOR_ALLOCATE != module -> flavor || NULL == module -> state_handle ) {
330330 ret = ompi_osc_rdma_register (module , MCA_BTL_ENDPOINT_ANY , * base , size , MCA_BTL_REG_FLAG_ACCESS_ANY ,
331331 & module -> base_handle );
332332 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
@@ -450,6 +450,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
450450 size_t local_rank_array_size , leader_peer_data_size ;
451451 int my_rank = ompi_comm_rank (module -> comm );
452452 int global_size = ompi_comm_size (module -> comm );
453+ ompi_osc_rdma_region_t * state_region ;
453454 int my_base_offset = 0 ;
454455 struct _local_data * temp ;
455456 char * data_file ;
@@ -470,8 +471,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
470471 leader_peer_data_size = module -> region_size * module -> node_count ;
471472
472473 /* calculate base offsets */
473- module -> state_offset = state_base = local_rank_array_size ;
474- data_base = local_rank_array_size + leader_peer_data_size + module -> state_size * local_size ;
474+ module -> state_offset = state_base = local_rank_array_size + module -> region_size ;
475+ data_base = state_base + leader_peer_data_size + module -> state_size * local_size ;
475476
476477 do {
477478 temp = calloc (local_size , sizeof (temp [0 ]));
@@ -533,12 +534,13 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
533534 break ;
534535 }
535536
536- module -> rank_array = (ompi_osc_rdma_rank_data_t * ) module -> segment_base ;
537-
538537 if (size && MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
539538 * base = (void * )((intptr_t ) module -> segment_base + my_base_offset );
540539 }
541540
541+ module -> rank_array = (ompi_osc_rdma_rank_data_t * ) module -> segment_base ;
542+ /* put local state region data after the rank array */
543+ state_region = (ompi_osc_rdma_region_t * ) ((uintptr_t ) module -> segment_base + local_rank_array_size );
542544 module -> state = (ompi_osc_rdma_state_t * ) ((uintptr_t ) module -> segment_base + state_base + module -> state_size * local_rank );
543545
544546 /* all local ranks share the array containing the peer data of leader ranks */
@@ -547,11 +549,18 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
547549 /* initialize my state */
548550 memset (module -> state , 0 , module -> state_size );
549551
550- /* just go ahead and register the whole segment */
551- ret = ompi_osc_rdma_register (module , MCA_BTL_ENDPOINT_ANY , module -> segment_base , total_size , MCA_BTL_REG_FLAG_ACCESS_ANY ,
552- & module -> state_handle );
553- if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
554- break ;
552+ if (0 == local_rank ) {
553+ /* just go ahead and register the whole segment */
554+ ret = ompi_osc_rdma_register (module , MCA_BTL_ENDPOINT_ANY , module -> segment_base , total_size , MCA_BTL_REG_FLAG_ACCESS_ANY ,
555+ & module -> state_handle );
556+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
557+ break ;
558+ }
559+
560+ state_region -> base = (intptr_t ) module -> segment_base ;
561+ if (module -> state_handle ) {
562+ memcpy (state_region -> btl_handle_data , module -> state_handle , module -> selected_btl -> btl_registration_handle_size );
563+ }
555564 }
556565
557566 if (MPI_WIN_FLAVOR_DYNAMIC != module -> flavor ) {
@@ -572,6 +581,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
572581 offset = data_base ;
573582 for (int i = 0 ; i < local_size ; ++ i ) {
574583 ompi_osc_rdma_peer_extended_t * ex_peer ;
584+ ompi_osc_rdma_state_t * peer_state ;
575585 ompi_osc_rdma_peer_t * peer ;
576586 int peer_rank = temp [i ].rank ;
577587
@@ -582,21 +592,24 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
582592
583593 ex_peer = (ompi_osc_rdma_peer_extended_t * ) peer ;
584594
585- peer -> state = (osc_rdma_counter_t ) ((uintptr_t ) module -> segment_base + state_base + module -> state_size * i );
595+ /* peer state local pointer */
596+ peer_state = (ompi_osc_rdma_state_t * ) ((uintptr_t ) module -> segment_base + state_base + module -> state_size * i );
586597
587598 if (local_size == global_size || (module -> selected_btl -> btl_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB )) {
588599 /* all peers are local or it is safe to mix cpu and nic atomics */
589600 peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_STATE ;
601+ peer -> state = (osc_rdma_counter_t ) peer_state ;
590602 } else {
591603 /* use my endpoint handle to modify the peer's state */
592- peer -> state_handle = module -> state_handle ;
593- peer -> state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module , my_rank );
604+ if (module -> selected_btl -> btl_register_mem ) {
605+ peer -> state_handle = (mca_btl_base_registration_handle_t * ) state_region -> btl_handle_data ;
606+ }
607+ peer -> state = (osc_rdma_counter_t ) ((uintptr_t ) state_region -> base + state_base + module -> state_size * i );
608+ peer -> state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module , peer_rank );
594609 }
595610
596611 /* finish setting up the local peer structure */
597612 if (MPI_WIN_FLAVOR_DYNAMIC != module -> flavor ) {
598- ompi_osc_rdma_state_t * peer_state = (ompi_osc_rdma_state_t * ) (intptr_t ) peer -> state ;
599-
600613 if (!module -> same_disp_unit ) {
601614 ex_peer -> disp_unit = peer_state -> disp_unit ;
602615 }
@@ -1050,8 +1063,6 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base,
10501063
10511064 /* calculate and store various structure sizes */
10521065
1053- /* the following two structures have similar usage but the later is meant to be a small as possible. they may
1054- * be merged into a single structure in a later version of this component. */
10551066 module -> region_size = module -> selected_btl -> btl_registration_handle_size + sizeof (ompi_osc_rdma_region_t );
10561067
10571068 module -> state_size = sizeof (ompi_osc_rdma_state_t );
0 commit comments