Skip to content

Commit c78d676

Browse files
committed
usnic: set MCA_BTL_FLAGS_SINGLE_ADD_PROCS
The btl_recv.h:lookup_sender() function uses the hashed ORTE proc name to determine the sender of the packet. With add_procs_cutoff>0, the usnic BTL may not have knowledge of all the senders. Until the usNIC BTL can be adjusted to do something like the openib/ugni BTLs (i.e., use opal_proc_for_name() to lookup unknown sender proc names), set MCA_BTL_FLAGS_SINGLE_ADD_PROCS, which means that ob1 will only all add_procs() once -- with all the procs in it. Also in this commit, adapt the connectivity checker to not rely on knowing all the senders (which is a bit easier than adapting the main BTL send path): the receiving connectivity agent will simply echo back the same PING message (which contains the sender's IP address+UDP port) back to the sender without checking that it knows who the sender is. If the sender receives the echoed PING back on the expexted interface, it will find a match in the pending pings list. If the sender receives the echoed PING back an unexpected interface, a match will not be found, and the incoming PING message will be dropped. Fixes open-mpi#1440 (cherry picked from commit open-mpi/ompi@584b801)
1 parent 98ac7a8 commit c78d676

File tree

2 files changed

+19
-62
lines changed

2 files changed

+19
-62
lines changed

opal/mca/btl/usnic/btl_usnic_cagent.c

Lines changed: 14 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
2+
* Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved.
33
* Copyright (c) 2015 Research Organization for Information Science
44
* and Technology (RIST). All rights reserved.
55
* $COPYRIGHT$
@@ -327,48 +327,6 @@ static void agent_sendto(int fd, char *buffer, ssize_t numbytes,
327327
* All of the following functions run in agent thread
328328
**************************************************************************/
329329

330-
/*
331-
* Check to ensure that we expected to receive a ping from this sender
332-
* on the interface in which it was received (i.e., did the usnic
333-
* module corresponding to the received interface choose to pair
334-
* itself with the sender's interface). If not, discard it.
335-
*
336-
* Note that there may be a race condition here. We may get a ping
337-
* before we've setup endpoints on the module in question. It's no
338-
* problem -- if we don't find it, we'll drop the PING and let the
339-
* sender try again later.
340-
*/
341-
static bool agent_thread_is_ping_expected(opal_btl_usnic_module_t *module,
342-
uint32_t src_ipv4_addr)
343-
{
344-
bool found = false;
345-
opal_list_item_t *item;
346-
347-
/* If we have a NULL value for the module, it means that the MPI
348-
process that is the agent hasn't submitted the LISTEN command
349-
yet (which can happen for a fast sender / slow receiver). So
350-
just return "ping is not [yet] expected". */
351-
if (NULL == module) {
352-
return false;
353-
}
354-
355-
opal_mutex_lock(&module->all_endpoints_lock);
356-
if (module->all_endpoints_constructed) {
357-
OPAL_LIST_FOREACH(item, &module->all_endpoints, opal_list_item_t) {
358-
opal_btl_usnic_endpoint_t *ep;
359-
ep = container_of(item, opal_btl_usnic_endpoint_t,
360-
endpoint_endpoint_li);
361-
if (src_ipv4_addr == ep->endpoint_remote_modex.ipv4_addr) {
362-
found = true;
363-
break;
364-
}
365-
}
366-
}
367-
opal_mutex_unlock(&module->all_endpoints_lock);
368-
369-
return found;
370-
}
371-
372330
/*
373331
* Handle an incoming PING message (send an ACK)
374332
*/
@@ -411,29 +369,20 @@ static void agent_thread_handle_ping(agent_udp_port_listener_t *listener,
411369
return;
412370
}
413371

414-
/* Finally, check that the ping is from an interface that the
415-
module expects */
416-
if (!agent_thread_is_ping_expected(listener->module,
417-
src_addr_in->sin_addr.s_addr)) {
418-
opal_output_verbose(20, USNIC_OUT,
419-
"usNIC connectivity got bad ping (from unexpected address: listener %s not paired with peer interface %s, discarded)",
420-
listener->ipv4_addr_str,
421-
real_ipv4_addr_str);
422-
return;
423-
}
424-
425-
/* Ok, this is a good ping. Send the ACK back */
372+
/* Ok, this is a good ping. Send the ACK back. The PING sender
373+
will verify that the ACK came back from the IP address that it
374+
expected. */
426375

427376
opal_output_verbose(20, USNIC_OUT,
428377
"usNIC connectivity got PING (size=%ld) from %s; sending ACK",
429378
numbytes, msg_ipv4_addr_str);
430379

431380
/* Send back an ACK. No need to allocate a new buffer; just
432381
re-use the same buffer we just got. Note that msg->size is
433-
already set. */
382+
already set. We simply echo back the sender's IP address/port
383+
in the msg (the sender will use the msg fields and the
384+
recvfrom() src_addr to check for a match). */
434385
msg->message_type = AGENT_MSG_TYPE_ACK;
435-
msg->src_ipv4_addr = listener->ipv4_addr;
436-
msg->src_udp_port = listener->udp_port;
437386

438387
agent_sendto(listener->fd, (char*) listener->buffer, sizeof(*msg), from);
439388
}
@@ -458,11 +407,15 @@ static void agent_thread_handle_ack(agent_udp_port_listener_t *listener,
458407
return;
459408
}
460409

461-
/* Find the pending ping request that this ACK is for */
410+
/* Find the pending ping request (on this interface) for this ACK.
411+
If we don't find a match, we'll drop it. */
462412
agent_ping_t *ap;
413+
uint32_t src_in_port = ntohs(src_addr_in->sin_port);
463414
OPAL_LIST_FOREACH(ap, &pings_pending, agent_ping_t) {
464-
if (ap->dest_ipv4_addr == msg->src_ipv4_addr &&
465-
ap->dest_udp_port == msg->src_udp_port) {
415+
if (ap->dest_ipv4_addr == src_addr_in->sin_addr.s_addr &&
416+
ap->dest_udp_port == src_in_port &&
417+
ap->src_ipv4_addr == msg->src_ipv4_addr &&
418+
ap->src_udp_port == msg->src_udp_port) {
466419
/* Found it -- indicate that it has been acked */
467420
for (int i = 0; i < NUM_PING_SIZES; ++i) {
468421
if (ap->sizes[i] == msg->size) {

opal/mca/btl/usnic/btl_usnic_module.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2424,7 +2424,11 @@ opal_btl_usnic_module_t opal_btl_usnic_module_template = {
24242424
.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT,
24252425
.btl_flags =
24262426
MCA_BTL_FLAGS_SEND |
2427-
MCA_BTL_FLAGS_SEND_INPLACE,
2427+
MCA_BTL_FLAGS_SEND_INPLACE |
2428+
/* Need to set FLAGS_SINGLE_ADD_PROCS until
2429+
btl_recv.h:lookup_sender() can handle an incoming
2430+
message with an unknown sender. */
2431+
MCA_BTL_FLAGS_SINGLE_ADD_PROCS,
24282432

24292433
.btl_add_procs = usnic_add_procs,
24302434
.btl_del_procs = usnic_del_procs,

0 commit comments

Comments
 (0)