Skip to content

Commit a0d29ac

Browse files
committed
btl/tcp: use show_help to print the dropped-TCP warning
Make the message more friendly / more detailed, and de-duplicate it (just in case it happens a lot). Signed-off-by: Jeff Squyres <[email protected]> (cherry picked from commit 5b484c9)
1 parent aec1235 commit a0d29ac

File tree

2 files changed

+25
-7
lines changed

2 files changed

+25
-7
lines changed

opal/mca/btl/tcp/btl_tcp_proc.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
* and Technology (RIST). All rights reserved.
1717
* Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights
1818
* reserved.
19-
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
19+
* Copyright (c) 2015-2017 Cisco Systems, Inc. All rights reserved
2020
* $COPYRIGHT$
2121
*
2222
* Additional copyrights may follow
@@ -41,6 +41,7 @@
4141
#include "opal/util/if.h"
4242
#include "opal/util/net.h"
4343
#include "opal/util/proc.h"
44+
#include "opal/util/show_help.h"
4445

4546
#include "btl_tcp.h"
4647
#include "btl_tcp_proc.h"
@@ -833,11 +834,13 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr
833834
len = 1024 - strlen(addr_str);
834835
}
835836
}
836-
opal_output(0, "btl: tcp: Incoming connection from %s does not match known addresses for peer %s [hostname=%s addr=%s]. Drop !\n",
837-
opal_net_get_hostname((struct sockaddr*)addr),
838-
OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name),
839-
btl_proc->proc_opal->proc_hostname,
840-
addr_str);
837+
opal_show_help("help-mpi-btl-tcp.txt", "dropped inbound connection",
838+
true, opal_process_info.nodename,
839+
getpid(),
840+
btl_proc->proc_opal->proc_hostname,
841+
OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name),
842+
opal_net_get_hostname((struct sockaddr*)addr),
843+
addr_str);
841844
free(addr_str);
842845
}
843846
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);

opal/mca/btl/tcp/help-mpi-btl-tcp.txt

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- text -*-
22
#
3-
# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
3+
# Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved
44
# Copyright (c) 2015-2016 The University of Tennessee and The University
55
# of Tennessee Research Foundation. All rights
66
# reserved.
@@ -91,3 +91,18 @@ or other external events.
9191
Local PID: %d
9292
Peer host: %s
9393
#
94+
[dropped inbound connection]
95+
Open MPI detected an inbound MPI TCP connection request from a peer
96+
that appears to be part of this MPI job (i.e., it identified itself as
97+
part of this Open MPI job), but it is from an IP address that is
98+
unexpected. This is highly unusual.
99+
100+
The inbound connection has been dropped, and the peer should simply
101+
try again with a different IP interface (i.e., the job should
102+
hopefully be able to continue).
103+
104+
Local host: %s
105+
Local PID: %d
106+
Peer hostname: %s (%s)
107+
Source IP of socket: %s
108+
Known IPs of peer: %s

0 commit comments

Comments
 (0)