|
16 | 16 | * and Technology (RIST). All rights reserved. |
17 | 17 | * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights |
18 | 18 | * reserved. |
19 | | - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. |
| 19 | + * Copyright (c) 2015-2017 Cisco Systems, Inc. All rights reserved |
20 | 20 | * $COPYRIGHT$ |
21 | 21 | * |
22 | 22 | * Additional copyrights may follow |
|
41 | 41 | #include "opal/util/if.h" |
42 | 42 | #include "opal/util/net.h" |
43 | 43 | #include "opal/util/proc.h" |
| 44 | +#include "opal/util/show_help.h" |
44 | 45 |
|
45 | 46 | #include "btl_tcp.h" |
46 | 47 | #include "btl_tcp_proc.h" |
@@ -122,16 +123,18 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc) |
122 | 123 | return btl_proc; |
123 | 124 | } |
124 | 125 |
|
125 | | - do { |
| 126 | + do { /* This loop is only necessary so that we can break out of the serial code */ |
126 | 127 | btl_proc = OBJ_NEW(mca_btl_tcp_proc_t); |
127 | 128 | if(NULL == btl_proc) { |
128 | 129 | rc = OPAL_ERR_OUT_OF_RESOURCE; |
129 | 130 | break; |
130 | 131 | } |
131 | 132 |
|
132 | | - btl_proc->proc_opal = proc; |
133 | | - |
134 | | - OBJ_RETAIN(btl_proc->proc_opal); |
| 133 | + /* Retain the proc, but don't store the ref into the btl_proc just yet. This |
| 134 | + * provides a way to release the btl_proc in case of failure without having to |
| 135 | + * unlock the mutex. |
| 136 | + */ |
| 137 | + OBJ_RETAIN(proc); |
135 | 138 |
|
136 | 139 | /* lookup tcp parameters exported by this proc */ |
137 | 140 | OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version, |
@@ -181,12 +184,14 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc) |
181 | 184 | } while (0); |
182 | 185 |
|
183 | 186 | if (OPAL_SUCCESS == rc) { |
| 187 | + btl_proc->proc_opal = proc; /* link with the proc */ |
184 | 188 | /* add to hash table of all proc instance. */ |
185 | 189 | opal_proc_table_set_value(&mca_btl_tcp_component.tcp_procs, |
186 | 190 | proc->proc_name, btl_proc); |
187 | 191 | } else { |
188 | 192 | if (btl_proc) { |
189 | | - OBJ_RELEASE(btl_proc); |
| 193 | + OBJ_RELEASE(btl_proc); /* release the local proc */ |
| 194 | + OBJ_RELEASE(proc); /* and the ref on the OMPI proc */ |
190 | 195 | btl_proc = NULL; |
191 | 196 | } |
192 | 197 | } |
@@ -823,9 +828,38 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr |
823 | 828 | OPAL_THREAD_UNLOCK(&btl_proc->proc_lock); |
824 | 829 | return; |
825 | 830 | } |
826 | | - OPAL_THREAD_UNLOCK(&btl_proc->proc_lock); |
827 | 831 | /* No further use of this socket. Close it */ |
828 | 832 | CLOSE_THE_SOCKET(sd); |
| 833 | + { |
| 834 | + size_t len = 1024; |
| 835 | + char* addr_str = (char*)malloc(len); |
| 836 | + if( NULL != addr_str ) { |
| 837 | + memset(addr_str, 0, len); |
| 838 | + for (size_t i = 0; i < btl_proc->proc_endpoint_count; i++) { |
| 839 | + mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i]; |
| 840 | + if (btl_endpoint->endpoint_addr->addr_family != addr->sa_family) { |
| 841 | + continue; |
| 842 | + } |
| 843 | + |
| 844 | + if (addr_str[0] != '\0') { |
| 845 | + strncat(addr_str, ", ", len); |
| 846 | + len -= 2; |
| 847 | + } |
| 848 | + strncat(addr_str, inet_ntop(AF_INET6, (void*)(struct in6_addr*)&btl_endpoint->endpoint_addr->addr_inet, |
| 849 | + addr_str + 1024 - len, INET6_ADDRSTRLEN), len); |
| 850 | + len = 1024 - strlen(addr_str); |
| 851 | + } |
| 852 | + } |
| 853 | + opal_show_help("help-mpi-btl-tcp.txt", "dropped inbound connection", |
| 854 | + true, opal_process_info.nodename, |
| 855 | + getpid(), |
| 856 | + btl_proc->proc_opal->proc_hostname, |
| 857 | + OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name), |
| 858 | + opal_net_get_hostname((struct sockaddr*)addr), |
| 859 | + addr_str); |
| 860 | + free(addr_str); |
| 861 | + } |
| 862 | + OPAL_THREAD_UNLOCK(&btl_proc->proc_lock); |
829 | 863 | } |
830 | 864 |
|
831 | 865 | /* |
|
0 commit comments