11/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22/*
3- * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
3+ * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
44 * reserved.
55 * Copyright (c) 2015 Research Organization for Information Science
66 * and Technology (RIST). All rights reserved.
1919
2020#include "ompi/mca/bml/base/base.h"
2121
22- #define NODE_ID_TO_RANK (module , node_id ) (( node_id) * ((ompi_comm_size ((module)->comm) + (module )->node_count - 1) / (module)->node_count) )
22+ #define NODE_ID_TO_RANK (module , peer_data , node_id ) ((int)(peer_data )->len )
2323
2424/**
2525 * @brief find the btl endpoint for a process
@@ -99,7 +99,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
9999 ompi_osc_rdma_rank_data_t rank_data ;
100100 int registration_handle_size = 0 ;
101101 int node_id , node_rank , array_index ;
102- int ret , disp_unit ;
102+ int ret , disp_unit , comm_size ;
103103 char * peer_data ;
104104
105105 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_DEBUG , "configuring peer for rank %d" , peer -> rank );
@@ -108,13 +108,18 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
108108 registration_handle_size = module -> selected_btl -> btl_registration_handle_size ;
109109 }
110110
111+ comm_size = ompi_comm_size (module -> comm );
112+
111113 /* each node is responsible for holding a part of the rank -> node/local rank mapping array. this code
112114 * calculates the node and offset the mapping can be found. once the mapping has been read the state
113115 * part of the peer structure can be initialized. */
114- node_id = (peer -> rank * module -> node_count ) / ompi_comm_size (module -> comm );
115- node_rank = NODE_ID_TO_RANK (module , node_id );
116- array_index = peer -> rank - node_rank ;
116+ node_id = (peer -> rank * module -> node_count ) / comm_size ;
117117 array_peer_data = (ompi_osc_rdma_region_t * ) ((intptr_t ) module -> node_comm_info + node_id * module -> region_size );
118+
119+ /* the node leader rank is stored in the length field */
120+ node_rank = NODE_ID_TO_RANK (module , array_peer_data , node_id );
121+ array_index = peer -> rank % ((comm_size + module -> node_count - 1 ) / module -> node_count );
122+
118123 array_pointer = array_peer_data -> base + array_index * sizeof (rank_data );
119124
120125 /* lookup the btl endpoint needed to retrieve the mapping */
@@ -123,8 +128,8 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
123128 return OMPI_ERR_UNREACH ;
124129 }
125130
126- OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_DEBUG , "reading region data from rank: %d pointer: 0x%" PRIx64
127- ", size: %lu" , node_rank , array_pointer , sizeof (rank_data ));
131+ OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_DEBUG , "reading region data for %d from rank: %d, index: %d, pointer: 0x%" PRIx64
132+ ", size: %lu" , peer -> rank , node_rank , array_index , array_pointer , sizeof (rank_data ));
128133
129134 ret = ompi_osc_get_data_blocking (module , array_endpoint , array_pointer , (mca_btl_base_registration_handle_t * ) array_peer_data -> btl_handle_data ,
130135 & rank_data , sizeof (rank_data ));
@@ -143,7 +148,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
143148 peer -> state_handle = (mca_btl_base_registration_handle_t * ) node_peer_data -> btl_handle_data ;
144149 }
145150
146- peer -> state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module , NODE_ID_TO_RANK (module , rank_data .node_id ));
151+ peer -> state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module , NODE_ID_TO_RANK (module , node_peer_data , rank_data .node_id ));
147152 if (OPAL_UNLIKELY (NULL == peer -> state_endpoint )) {
148153 return OPAL_ERR_UNREACH ;
149154 }
0 commit comments