Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit 4bdd64b

Browse files
committed
btl/openib: update rdmacm for dynamic add_procs
This commit adds the data necessesary for supporting dynamic add_procs to the rdma message (opal_process_name_t). The endpoint lookup function has been updated to match the code in udcm. Closes open-mpi/ompi#1468. Signed-off-by: Nathan Hjelm <[email protected]> (cherry picked from open-mpi/ompi@645bd9d) Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 3c4f09c commit 4bdd64b

File tree

1 file changed

+18
-58
lines changed

1 file changed

+18
-58
lines changed

opal/mca/btl/openib/connect/btl_openib_connect_rdmacm.c

Lines changed: 18 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* Copyright (c) 2008 Mellanox Technologies. All rights reserved.
66
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
77
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
8-
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
8+
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
99
* reserved.
1010
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
1111
* Copyright (c) 2014 The University of Tennessee and The University
@@ -185,6 +185,7 @@ typedef struct {
185185
#endif
186186
uint32_t rem_index;
187187
uint8_t qpnum;
188+
opal_process_name_t rem_name;
188189
} __opal_attribute_packed__ private_data_t;
189190

190191
#if !BTL_OPENIB_RDMACM_IB_ADDR
@@ -376,68 +377,23 @@ static char *stringify(uint32_t addr)
376377
* the rdma_cm event id
377378
*/
378379
static mca_btl_openib_endpoint_t *rdmacm_find_endpoint(rdmacm_contents_t *contents,
379-
struct rdma_cm_id *id,
380-
#if BTL_OPENIB_RDMACM_IB_ADDR
381-
uint64_t rem_port)
382-
#else
383-
uint16_t rem_port)
384-
#endif
380+
opal_process_name_t rem_name)
385381
{
386-
int i;
382+
mca_btl_openib_module_t *btl = contents->openib_btl;
387383
mca_btl_openib_endpoint_t *ep = NULL;
388-
opal_pointer_array_t *endpoints = contents->openib_btl->device->endpoints;
389-
390-
struct sockaddr *peeraddr = rdma_get_peer_addr(id);
391-
#if BTL_OPENIB_RDMACM_IB_ADDR
392-
union ibv_gid *ep_gid, peer_gid;
393-
memcpy(peer_gid.raw, ((struct sockaddr_ib *) peeraddr)->sib_addr.sib_raw, sizeof peer_gid);
394-
#else
395-
uint32_t peeripaddr = ((struct sockaddr_in *) peeraddr)->sin_addr.s_addr;
396-
397-
#if OPAL_ENABLE_DEBUG
398-
char *a;
399-
#endif
400-
401-
OPAL_OUTPUT((-1, "remote peer requesting connection: %s port %d",
402-
a = stringify(peeripaddr), rem_port));
403-
#if OPAL_ENABLE_DEBUG
404-
free(a);
405-
#endif
406-
#endif
384+
opal_proc_t *opal_proc;
407385

408-
for (i = 0; i < opal_pointer_array_get_size(endpoints); i++) {
409-
mca_btl_openib_endpoint_t *endpoint;
410-
modex_message_t *message;
411-
412-
endpoint = (mca_btl_openib_endpoint_t *) opal_pointer_array_get_item(endpoints, i);
413-
if (NULL == endpoint) {
414-
continue;
415-
}
416-
417-
message = (modex_message_t *) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
418-
#if !BTL_OPENIB_RDMACM_IB_ADDR
419-
OPAL_OUTPUT((-1, "message ipaddr = %s port %d",
420-
a = stringify(message->ipaddr), message->tcp_port));
421-
#if OPAL_ENABLE_DEBUG
422-
free(a);
423-
#endif
424-
#endif
425-
426-
#if BTL_OPENIB_RDMACM_IB_ADDR
427-
ep_gid = (union ibv_gid *) message->gid;
428-
if (ep_gid->global.interface_id == peer_gid.global.interface_id &&
429-
ep_gid->global.subnet_prefix == peer_gid.global.subnet_prefix &&
430-
message->service_id == rem_port) {
431-
#else
432-
if (message->ipaddr == peeripaddr && message->tcp_port == rem_port) {
433-
#endif
434-
ep = endpoint;
435-
break;
436-
}
386+
opal_proc = opal_proc_for_name (rem_name);
387+
if (NULL == opal_proc) {
388+
BTL_ERROR(("could not get proc associated with remote peer %s",
389+
opal_process_name_print (rem_name)));
390+
return NULL;
437391
}
438392

393+
ep = mca_btl_openib_get_ep (&btl->super, opal_proc);
439394
if (NULL == ep) {
440-
BTL_ERROR(("can't find suitable endpoint for this peer"));
395+
BTL_ERROR(("could not find endpoint for peer %s",
396+
opal_process_name_print (rem_name)));
441397
}
442398

443399
return ep;
@@ -986,6 +942,7 @@ static int handle_connect_request(struct rdma_cm_event *event)
986942
rdmacm_contents_t *contents = listener_context->contents;
987943
mca_btl_openib_endpoint_t *endpoint;
988944
struct rdma_conn_param conn_param;
945+
opal_process_name_t rem_name;
989946
modex_message_t *message;
990947
private_data_t msg;
991948
int rc = -1, qpnum;
@@ -999,10 +956,11 @@ static int handle_connect_request(struct rdma_cm_event *event)
999956
qpnum = ((private_data_t *)event->param.conn.private_data)->qpnum;
1000957
rem_port = ((private_data_t *)event->param.conn.private_data)->rem_port;
1001958
rem_index = ((private_data_t *)event->param.conn.private_data)->rem_index;
959+
rem_name = ((private_data_t *)event->param.conn.private_data)->rem_name;
1002960

1003961
/* Determine which endpoint the remote side is trying to connect
1004962
to; use the listener's context->contents to figure it out */
1005-
endpoint = rdmacm_find_endpoint(contents, event->id, rem_port);
963+
endpoint = rdmacm_find_endpoint(contents, rem_name);
1006964
if (NULL == endpoint) {
1007965
#if !BTL_OPENIB_RDMACM_IB_ADDR
1008966
struct sockaddr *peeraddr = rdma_get_peer_addr(event->id);
@@ -1145,6 +1103,7 @@ static int handle_connect_request(struct rdma_cm_event *event)
11451103
/* Fill the private data being sent to the other side */
11461104
msg.qpnum = qpnum;
11471105
msg.rem_index = endpoint->index;
1106+
msg.rem_name = OPAL_PROC_MY_NAME;
11481107

11491108
/* Accepting the connection will result in a
11501109
RDMA_CM_EVENT_ESTABLISHED event on both the client and server
@@ -1617,6 +1576,7 @@ static int finish_connect(id_context_t *context)
16171576

16181577
msg.qpnum = context->qpnum;
16191578
msg.rem_index = contents->endpoint->index;
1579+
msg.rem_name = OPAL_PROC_MY_NAME;
16201580
#if BTL_OPENIB_RDMACM_IB_ADDR
16211581
memset(msg.librdmacm_header, 0, sizeof(msg.librdmacm_header));
16221582
msg.rem_port = contents->service_id;

0 commit comments

Comments
 (0)