Skip to content

Commit 8ad6a1e

Browse files
committed
Merge pull request open-mpi#1012 from jsquyres/pr/v2.0.0/usnic-fix-add-procs-cutoff=0
v2.0.0: usnic: set MCA_BTL_FLAGS_SINGLE_ADD_PROCS
2 parents 98ac7a8 + 5ca23fa commit 8ad6a1e

File tree

2 files changed

+49
-58
lines changed

2 files changed

+49
-58
lines changed

opal/mca/btl/usnic/btl_usnic_cagent.c

Lines changed: 44 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
2+
* Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved.
33
* Copyright (c) 2015 Research Organization for Information Science
44
* and Technology (RIST). All rights reserved.
55
* $COPYRIGHT$
@@ -97,6 +97,10 @@ typedef enum {
9797
AGENT_MSG_TYPE_ACK
9898
} agent_udp_message_type_t;
9999

100+
// Arbitrary 64 bit numbers
101+
#define MAGIC_ORIGINATOR 0x9a9e2fbce63a11e5
102+
#define MAGIC_TARGET 0x60735c68f368aace
103+
100104
/*
101105
* Ping and ACK messages
102106
*/
@@ -110,6 +114,11 @@ typedef struct {
110114
uint32_t src_ipv4_addr;
111115
uint32_t src_udp_port;
112116

117+
/* A magic number that helps determine that the sender was Open
118+
MPI */
119+
uint64_t magic_number;
120+
uint32_t major_version, minor_version;
121+
113122
/* If this is a PING, the message should be this size.
114123
If this is an ACK, we are ACKing a ping of this size. */
115124
uint32_t size;
@@ -327,48 +336,6 @@ static void agent_sendto(int fd, char *buffer, ssize_t numbytes,
327336
* All of the following functions run in agent thread
328337
**************************************************************************/
329338

330-
/*
331-
* Check to ensure that we expected to receive a ping from this sender
332-
* on the interface in which it was received (i.e., did the usnic
333-
* module corresponding to the received interface choose to pair
334-
* itself with the sender's interface). If not, discard it.
335-
*
336-
* Note that there may be a race condition here. We may get a ping
337-
* before we've setup endpoints on the module in question. It's no
338-
* problem -- if we don't find it, we'll drop the PING and let the
339-
* sender try again later.
340-
*/
341-
static bool agent_thread_is_ping_expected(opal_btl_usnic_module_t *module,
342-
uint32_t src_ipv4_addr)
343-
{
344-
bool found = false;
345-
opal_list_item_t *item;
346-
347-
/* If we have a NULL value for the module, it means that the MPI
348-
process that is the agent hasn't submitted the LISTEN command
349-
yet (which can happen for a fast sender / slow receiver). So
350-
just return "ping is not [yet] expected". */
351-
if (NULL == module) {
352-
return false;
353-
}
354-
355-
opal_mutex_lock(&module->all_endpoints_lock);
356-
if (module->all_endpoints_constructed) {
357-
OPAL_LIST_FOREACH(item, &module->all_endpoints, opal_list_item_t) {
358-
opal_btl_usnic_endpoint_t *ep;
359-
ep = container_of(item, opal_btl_usnic_endpoint_t,
360-
endpoint_endpoint_li);
361-
if (src_ipv4_addr == ep->endpoint_remote_modex.ipv4_addr) {
362-
found = true;
363-
break;
364-
}
365-
}
366-
}
367-
opal_mutex_unlock(&module->all_endpoints_lock);
368-
369-
return found;
370-
}
371-
372339
/*
373340
* Handle an incoming PING message (send an ACK)
374341
*/
@@ -411,29 +378,36 @@ static void agent_thread_handle_ping(agent_udp_port_listener_t *listener,
411378
return;
412379
}
413380

414-
/* Finally, check that the ping is from an interface that the
415-
module expects */
416-
if (!agent_thread_is_ping_expected(listener->module,
417-
src_addr_in->sin_addr.s_addr)) {
381+
if (msg->magic_number != MAGIC_ORIGINATOR) {
418382
opal_output_verbose(20, USNIC_OUT,
419-
"usNIC connectivity got bad ping (from unexpected address: listener %s not paired with peer interface %s, discarded)",
420-
listener->ipv4_addr_str,
421-
real_ipv4_addr_str);
383+
"usNIC connectivity got bad ping (magic number: %" PRIu64 ", discarded)",
384+
msg->magic_number);
385+
return;
386+
}
387+
if (msg->major_version != OPAL_MAJOR_VERSION ||
388+
msg->minor_version != OPAL_MINOR_VERSION) {
389+
opal_output_verbose(20, USNIC_OUT,
390+
"usNIC connectivity got bad ping (originator version: %d.%d, expected %d.%d, discarded)",
391+
msg->major_version, msg->minor_version,
392+
OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION);
422393
return;
423394
}
424395

425-
/* Ok, this is a good ping. Send the ACK back */
396+
/* Ok, this is a good ping. Send the ACK back. The PING sender
397+
will verify that the ACK came back from the IP address that it
398+
expected. */
426399

427400
opal_output_verbose(20, USNIC_OUT,
428401
"usNIC connectivity got PING (size=%ld) from %s; sending ACK",
429402
numbytes, msg_ipv4_addr_str);
430403

431404
/* Send back an ACK. No need to allocate a new buffer; just
432405
re-use the same buffer we just got. Note that msg->size is
433-
already set. */
406+
already set. We simply echo back the sender's IP address/port
407+
in the msg (the sender will use the msg fields and the
408+
recvfrom() src_addr to check for a match). */
434409
msg->message_type = AGENT_MSG_TYPE_ACK;
435-
msg->src_ipv4_addr = listener->ipv4_addr;
436-
msg->src_udp_port = listener->udp_port;
410+
msg->magic_number = MAGIC_TARGET;
437411

438412
agent_sendto(listener->fd, (char*) listener->buffer, sizeof(*msg), from);
439413
}
@@ -457,12 +431,22 @@ static void agent_thread_handle_ack(agent_udp_port_listener_t *listener,
457431
(int) numbytes, str, (int) sizeof(*msg));
458432
return;
459433
}
434+
if (msg->magic_number != MAGIC_TARGET) {
435+
opal_output_verbose(20, USNIC_OUT,
436+
"usNIC connectivity got bad ACK (magic number: %" PRIu64 ", discarded)",
437+
msg->magic_number);
438+
return;
439+
}
460440

461-
/* Find the pending ping request that this ACK is for */
441+
/* Find the pending ping request (on this interface) for this ACK.
442+
If we don't find a match, we'll drop it. */
462443
agent_ping_t *ap;
444+
uint32_t src_in_port = ntohs(src_addr_in->sin_port);
463445
OPAL_LIST_FOREACH(ap, &pings_pending, agent_ping_t) {
464-
if (ap->dest_ipv4_addr == msg->src_ipv4_addr &&
465-
ap->dest_udp_port == msg->src_udp_port) {
446+
if (ap->dest_ipv4_addr == src_addr_in->sin_addr.s_addr &&
447+
ap->dest_udp_port == src_in_port &&
448+
ap->src_ipv4_addr == msg->src_ipv4_addr &&
449+
ap->src_udp_port == msg->src_udp_port) {
466450
/* Found it -- indicate that it has been acked */
467451
for (int i = 0; i < NUM_PING_SIZES; ++i) {
468452
if (ap->sizes[i] == msg->size) {
@@ -913,6 +897,9 @@ static void agent_thread_cmd_ping(agent_ipc_listener_t *ipc_listener)
913897
msg->message_type = AGENT_MSG_TYPE_PING;
914898
msg->src_ipv4_addr = ap->src_ipv4_addr;
915899
msg->src_udp_port = ap->src_udp_port;
900+
msg->magic_number = MAGIC_ORIGINATOR;
901+
msg->major_version = OPAL_MAJOR_VERSION;
902+
msg->minor_version = OPAL_MINOR_VERSION;
916903
msg->size = ap->sizes[i];
917904
}
918905

opal/mca/btl/usnic/btl_usnic_module.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2424,7 +2424,11 @@ opal_btl_usnic_module_t opal_btl_usnic_module_template = {
24242424
.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT,
24252425
.btl_flags =
24262426
MCA_BTL_FLAGS_SEND |
2427-
MCA_BTL_FLAGS_SEND_INPLACE,
2427+
MCA_BTL_FLAGS_SEND_INPLACE |
2428+
/* Need to set FLAGS_SINGLE_ADD_PROCS until
2429+
btl_recv.h:lookup_sender() can handle an incoming
2430+
message with an unknown sender. */
2431+
MCA_BTL_FLAGS_SINGLE_ADD_PROCS,
24282432

24292433
.btl_add_procs = usnic_add_procs,
24302434
.btl_del_procs = usnic_del_procs,

0 commit comments

Comments
 (0)