@@ -964,8 +964,13 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family)
964964 } else
965965#endif
966966 {
967+ char str [16 ];
967968 mca_btl_tcp_component .tcp_listen_port = ((struct sockaddr_in * ) & inaddr )-> sin_port ;
968969 mca_btl_tcp_component .tcp_listen_sd = sd ;
970+ inet_ntop (AF_INET , & (((struct sockaddr_in * )& inaddr )-> sin_addr ), str , sizeof (str ));
971+ opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
972+ "btl:tcp: my listening v4 socket is %s:%u" ,
973+ str , ntohs (mca_btl_tcp_component .tcp_listen_port ));
969974 }
970975
971976 /* setup listen backlog to maximum allowed by kernel */
@@ -1104,6 +1109,7 @@ static int mca_btl_tcp_component_exchange(void)
11041109 size_t current_addr = 0 ;
11051110
11061111 if (mca_btl_tcp_component .tcp_num_btls != 0 ) {
1112+ char ifn [32 ];
11071113 mca_btl_tcp_addr_t * addrs = (mca_btl_tcp_addr_t * )malloc (size );
11081114 memset (addrs , 0 , size );
11091115
@@ -1121,6 +1127,9 @@ static int mca_btl_tcp_component_exchange(void)
11211127 continue ;
11221128 }
11231129
1130+ opal_ifindextoname (index , ifn , sizeof (ifn ));
1131+ opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1132+ "btl:tcp: examining interface %s" , ifn );
11241133 if (OPAL_SUCCESS !=
11251134 opal_ifindextoaddr (index , (struct sockaddr * ) & my_ss ,
11261135 sizeof (my_ss ))) {
@@ -1144,6 +1153,8 @@ static int mca_btl_tcp_component_exchange(void)
11441153 addrs [current_addr ].addr_ifkindex =
11451154 opal_ifindextokindex (index );
11461155 current_addr ++ ;
1156+ opal_output_verbose (30 , opal_btl_base_framework .framework_output ,
1157+ "btl:tcp: using ipv4 interface %s" , ifn );
11471158 } else
11481159#endif
11491160 if ((AF_INET == my_ss .ss_family ) &&
@@ -1382,38 +1393,76 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
13821393 /* recv the process identifier */
13831394 retval = mca_btl_tcp_recv_blocking (sd , (char * )& guid , sizeof (guid ));
13841395 if (retval != sizeof (guid )) {
1396+ opal_show_help ("help-mpi-btl-tcp.txt" , "server did not get guid" ,
1397+ true, opal_process_info .nodename ,
1398+ getpid ());
13851399 CLOSE_THE_SOCKET (sd );
13861400 return ;
13871401 }
13881402 OPAL_PROCESS_NAME_NTOH (guid );
13891403
13901404 /* now set socket up to be non-blocking */
13911405 if ((flags = fcntl (sd , F_GETFL , 0 )) < 0 ) {
1392- BTL_ERROR (("fcntl(F_GETFL) failed: %s (%d)" ,
1393- strerror (opal_socket_errno ), opal_socket_errno ));
1406+ opal_show_help ("help-mpi-btl-tcp.txt" , "socket flag fail" ,
1407+ true, opal_process_info .nodename ,
1408+ getpid (), "fcntl(sd, F_GETFL, 0)" ,
1409+ strerror (opal_socket_errno ), opal_socket_errno );
1410+ CLOSE_THE_SOCKET (sd );
13941411 } else {
13951412 flags |= O_NONBLOCK ;
13961413 if (fcntl (sd , F_SETFL , flags ) < 0 ) {
1397- BTL_ERROR (("fcntl(F_SETFL) failed: %s (%d)" ,
1398- strerror (opal_socket_errno ), opal_socket_errno ));
1414+ opal_show_help ("help-mpi-btl-tcp.txt" , "socket flag fail" ,
1415+ true, opal_process_info .nodename ,
1416+ getpid (),
1417+ "fcntl(sd, F_SETFL, flags & O_NONBLOCK)" ,
1418+ strerror (opal_socket_errno ), opal_socket_errno );
1419+ CLOSE_THE_SOCKET (sd );
13991420 }
14001421 }
14011422
14021423 /* lookup the corresponding process */
14031424 btl_proc = mca_btl_tcp_proc_lookup (& guid );
14041425 if (NULL == btl_proc ) {
1426+ opal_show_help ("help-mpi-btl-tcp.txt" ,
1427+ "server accept cannot find guid" ,
1428+ true, opal_process_info .nodename ,
1429+ getpid ());
14051430 CLOSE_THE_SOCKET (sd );
14061431 return ;
14071432 }
14081433
14091434 /* lookup peer address */
14101435 if (getpeername (sd , (struct sockaddr * )& addr , & addr_len ) != 0 ) {
1411- BTL_ERROR (("getpeername() failed: %s (%d)" ,
1412- strerror (opal_socket_errno ), opal_socket_errno ));
1436+ opal_show_help ("help-mpi-btl-tcp.txt" ,
1437+ "server getpeername failed" ,
1438+ true, opal_process_info .nodename ,
1439+ getpid (),
1440+ strerror (opal_socket_errno ), opal_socket_errno );
14131441 CLOSE_THE_SOCKET (sd );
14141442 return ;
14151443 }
14161444
14171445 /* are there any existing peer instances willing to accept this connection */
14181446 (void )mca_btl_tcp_proc_accept (btl_proc , (struct sockaddr * )& addr , sd );
1447+
1448+ switch (addr .ss_family ) {
1449+ case AF_INET :
1450+ inet_ntop (AF_INET , & (((struct sockaddr_in * ) & addr )-> sin_addr ), str , sizeof (str ));
1451+ break ;
1452+
1453+ #if OPAL_ENABLE_IPV6
1454+ case AF_INET6 :
1455+ inet_ntop (AF_INET6 , & (((struct sockaddr_in6 * ) & addr )-> sin6_addr ), str , sizeof (str ));
1456+ break ;
1457+ #endif
1458+
1459+ default :
1460+ BTL_ERROR (("Got an accept() from an unknown address family -- this shouldn't happen" ));
1461+ CLOSE_THE_SOCKET (sd );
1462+ return ;
1463+
1464+ }
1465+ opal_output_verbose (10 , opal_btl_base_framework .framework_output ,
1466+ "btl:tcp: now connected to %s, process %s" , str ,
1467+ OPAL_NAME_PRINT (btl_proc -> proc_opal -> proc_name ));
14191468}
0 commit comments