Skip to content

Commit 7bda3eb

Browse files
committed
osc/rdma: fix global index array calculation
This commit fixes a bug that occurs when ranks are either not mapped evenly or by something other than core. Fixes #1599 Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 01c90d4 commit 7bda3eb

File tree

2 files changed

+19
-10
lines changed

2 files changed

+19
-10
lines changed

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,8 @@ static int ompi_osc_rdma_share_data (ompi_osc_rdma_module_t *module)
845845
module->region_size);
846846

847847
my_data->base = (uint64_t) (intptr_t) module->rank_array;
848+
/* store my rank in the length field */
849+
my_data->len = (osc_rdma_size_t) my_rank;
848850

849851
if (module->selected_btl->btl_register_mem) {
850852
memcpy (my_data->btl_handle_data, module->state_handle, module->selected_btl->btl_registration_handle_size);
@@ -861,9 +863,11 @@ static int ompi_osc_rdma_share_data (ompi_osc_rdma_module_t *module)
861863
}
862864
}
863865

866+
int base_rank = ompi_comm_rank (module->local_leaders) * ((comm_size + module->node_count - 1) / module->node_count);
867+
864868
/* fill in the local part of the rank -> node map */
865869
for (int i = 0 ; i < RANK_ARRAY_COUNT(module) ; ++i) {
866-
int save_rank = my_rank + i;
870+
int save_rank = base_rank + i;
867871
if (save_rank >= comm_size) {
868872
break;
869873
}

ompi/mca/osc/rdma/osc_rdma_peer.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
22
/*
3-
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
3+
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
44
* reserved.
55
* Copyright (c) 2015 Research Organization for Information Science
66
* and Technology (RIST). All rights reserved.
@@ -19,7 +19,7 @@
1919

2020
#include "ompi/mca/bml/base/base.h"
2121

22-
#define NODE_ID_TO_RANK(module, node_id) ((node_id) * ((ompi_comm_size ((module)->comm) + (module)->node_count - 1) / (module)->node_count))
22+
#define NODE_ID_TO_RANK(module, peer_data, node_id) ((int)(peer_data)->len)
2323

2424
/**
2525
* @brief find the btl endpoint for a process
@@ -99,7 +99,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
9999
ompi_osc_rdma_rank_data_t rank_data;
100100
int registration_handle_size = 0;
101101
int node_id, node_rank, array_index;
102-
int ret, disp_unit;
102+
int ret, disp_unit, comm_size;
103103
char *peer_data;
104104

105105
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "configuring peer for rank %d", peer->rank);
@@ -108,13 +108,18 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
108108
registration_handle_size = module->selected_btl->btl_registration_handle_size;
109109
}
110110

111+
comm_size = ompi_comm_size (module->comm);
112+
111113
/* each node is responsible for holding a part of the rank -> node/local rank mapping array. this code
112114
* calculates the node and offset the mapping can be found. once the mapping has been read the state
113115
* part of the peer structure can be initialized. */
114-
node_id = (peer->rank * module->node_count) / ompi_comm_size (module->comm);
115-
node_rank = NODE_ID_TO_RANK(module, node_id);
116-
array_index = peer->rank - node_rank;
116+
node_id = (peer->rank * module->node_count) / comm_size;
117117
array_peer_data = (ompi_osc_rdma_region_t *) ((intptr_t) module->node_comm_info + node_id * module->region_size);
118+
119+
/* the node leader rank is stored in the length field */
120+
node_rank = NODE_ID_TO_RANK(module, array_peer_data, node_id);
121+
array_index = peer->rank % ((comm_size + module->node_count - 1) / module->node_count);
122+
118123
array_pointer = array_peer_data->base + array_index * sizeof (rank_data);
119124

120125
/* lookup the btl endpoint needed to retrieve the mapping */
@@ -123,8 +128,8 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
123128
return OMPI_ERR_UNREACH;
124129
}
125130

126-
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "reading region data from rank: %d pointer: 0x%" PRIx64
127-
", size: %lu", node_rank, array_pointer, sizeof (rank_data));
131+
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "reading region data for %d from rank: %d, index: %d, pointer: 0x%" PRIx64
132+
", size: %lu", peer->rank, node_rank, array_index, array_pointer, sizeof (rank_data));
128133

129134
ret = ompi_osc_get_data_blocking (module, array_endpoint, array_pointer, (mca_btl_base_registration_handle_t *) array_peer_data->btl_handle_data,
130135
&rank_data, sizeof (rank_data));
@@ -143,7 +148,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd
143148
peer->state_handle = (mca_btl_base_registration_handle_t *) node_peer_data->btl_handle_data;
144149
}
145150

146-
peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, NODE_ID_TO_RANK(module, rank_data.node_id));
151+
peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, NODE_ID_TO_RANK(module, node_peer_data, rank_data.node_id));
147152
if (OPAL_UNLIKELY(NULL == peer->state_endpoint)) {
148153
return OPAL_ERR_UNREACH;
149154
}

0 commit comments

Comments
 (0)