@@ -721,13 +721,23 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo
721721
722722 /* setup the socket as non-blocking */
723723 if ((flags = fcntl (btl_endpoint -> endpoint_sd , F_GETFL , 0 )) < 0 ) {
724- BTL_ERROR (("fcntl(F_GETFL) failed: %s (%d)" ,
725- strerror (opal_socket_errno ), opal_socket_errno ));
724+ opal_show_help ("help-mpi-btl-tcp.txt" , "socket flag fail" ,
725+ true, opal_process_info .nodename ,
726+ getpid (), "fcntl(sd, F_GETFL, 0)" ,
727+ strerror (opal_socket_errno ), opal_socket_errno );
728+ /* Upper layer will handler the error */
729+ return OPAL_ERR_UNREACH ;
726730 } else {
727731 flags |= O_NONBLOCK ;
728- if (fcntl (btl_endpoint -> endpoint_sd , F_SETFL , flags ) < 0 )
729- BTL_ERROR (("fcntl(F_SETFL) failed: %s (%d)" ,
730- strerror (opal_socket_errno ), opal_socket_errno ));
732+ if (fcntl (btl_endpoint -> endpoint_sd , F_SETFL , flags ) < 0 ) {
733+ opal_show_help ("help-mpi-btl-tcp.txt" , "socket flag fail" ,
734+ true, opal_process_info .nodename ,
735+ getpid (),
736+ "fcntl(sd, F_SETFL, flags & O_NONBLOCK)" ,
737+ strerror (opal_socket_errno ), opal_socket_errno );
738+ /* Upper layer will handler the error */
739+ return OPAL_ERR_UNREACH ;
740+ }
731741 }
732742
733743 /* start the connect - will likely fail with EINPROGRESS */
@@ -778,7 +788,7 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo
778788 * later. Otherwise, send this processes identifier to the endpoint on the
779789 * newly connected socket.
780790 */
781- static void mca_btl_tcp_endpoint_complete_connect (mca_btl_base_endpoint_t * btl_endpoint )
791+ static int mca_btl_tcp_endpoint_complete_connect (mca_btl_base_endpoint_t * btl_endpoint )
782792{
783793 int so_error = 0 ;
784794 opal_socklen_t so_length = sizeof (so_error );
@@ -794,32 +804,49 @@ static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_e
794804
795805 /* check connect completion status */
796806 if (getsockopt (btl_endpoint -> endpoint_sd , SOL_SOCKET , SO_ERROR , (char * )& so_error , & so_length ) < 0 ) {
797- BTL_ERROR (("getsockopt() to %s failed: %s (%d)" ,
807+ opal_show_help ("help-mpi-btl-tcp.txt" , "socket flag fail" ,
808+ true, opal_process_info .nodename ,
809+ getpid (), "fcntl(sd, F_GETFL, 0)" ,
810+ strerror (opal_socket_errno ), opal_socket_errno );
811+ BTL_ERROR (("getsockopt() to %s:%d failed: %s (%d)" ,
798812 opal_net_get_hostname ((struct sockaddr * ) & endpoint_addr ),
813+ ((struct sockaddr_in * ) & endpoint_addr )-> sin_port ,
799814 strerror (opal_socket_errno ), opal_socket_errno ));
800815 mca_btl_tcp_endpoint_close (btl_endpoint );
801- return ;
816+ return OPAL_ERROR ;
802817 }
803818 if (so_error == EINPROGRESS || so_error == EWOULDBLOCK ) {
804- return ;
819+ return OPAL_SUCCESS ;
805820 }
806821 if (so_error != 0 ) {
807- BTL_ERROR (("connect() to %s failed: %s (%d)" ,
808- opal_net_get_hostname ((struct sockaddr * ) & endpoint_addr ),
809- strerror (so_error ), so_error ));
822+ char * msg ;
823+ asprintf (& msg , "connect() to %s:%d failed" ,
824+ opal_net_get_hostname ((struct sockaddr * ) & endpoint_addr ),
825+ ntohs (((struct sockaddr_in * ) & endpoint_addr )-> sin_port ));
826+ opal_show_help ("help-mpi-btl-tcp.txt" , "client connect fail" ,
827+ true, opal_process_info .nodename ,
828+ getpid (), msg ,
829+ strerror (opal_socket_errno ), opal_socket_errno );
830+ free (msg );
810831 mca_btl_tcp_endpoint_close (btl_endpoint );
811- return ;
832+ return OPAL_ERROR ;
812833 }
813834
835+ opal_output_verbose (10 , opal_btl_base_framework .framework_output ,
836+ "btl:tcp: connect() to %s:%d completed (complete_connect), sending connect ACK" ,
837+ opal_net_get_hostname ((struct sockaddr * ) & endpoint_addr ),
838+ ntohs (((struct sockaddr_in * ) & endpoint_addr )-> sin_port ));
839+
814840 if (mca_btl_tcp_endpoint_send_connect_ack (btl_endpoint ) == OPAL_SUCCESS ) {
815841 btl_endpoint -> endpoint_state = MCA_BTL_TCP_CONNECT_ACK ;
816842 opal_event_add (& btl_endpoint -> endpoint_recv_event , 0 );
817843 MCA_BTL_TCP_ENDPOINT_DUMP (10 , btl_endpoint , false, "event_add(recv) [complete_connect]" );
818- return ;
844+ return OPAL_SUCCESS ;
819845 }
820846 MCA_BTL_TCP_ENDPOINT_DUMP (1 , btl_endpoint , false, " [complete_connect]" );
821847 btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
822848 mca_btl_tcp_endpoint_close (btl_endpoint );
849+ return OPAL_ERROR ;
823850}
824851
825852
0 commit comments