22 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33 * University Research and Technology
44 * Corporation. All rights reserved.
5- * Copyright (c) 2004-2017 The University of Tennessee and The University
5+ * Copyright (c) 2004-2016 The University of Tennessee and The University
66 * of Tennessee Research Foundation. All rights
77 * reserved.
88 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -387,7 +387,6 @@ mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint,
387387{
388388 int ret = mca_btl_tcp_send_blocking (btl_endpoint -> endpoint_sd , data , size );
389389 if (ret < 0 ) {
390- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
391390 mca_btl_tcp_endpoint_close (btl_endpoint );
392391 }
393392 return ret ;
@@ -537,30 +536,20 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
537536 btl_endpoint -> endpoint_sd = -1 ;
538537 /**
539538 * If we keep failing to connect to the peer let the caller know about
540- * this situation by triggering the callback on all pending fragments and
541- * reporting the error. The upper layer has then the opportunity to
542- * re-route or re-schedule the fragments.
539+ * this situation by triggering all the pending fragments callback and
540+ * reporting the error.
543541 */
544542 if ( MCA_BTL_TCP_FAILED == btl_endpoint -> endpoint_state ) {
545543 mca_btl_tcp_frag_t * frag = btl_endpoint -> endpoint_send_frag ;
546544 if ( NULL == frag )
547545 frag = (mca_btl_tcp_frag_t * )opal_list_remove_first (& btl_endpoint -> endpoint_frags );
548546 while (NULL != frag ) {
549547 frag -> base .des_cbfunc (& frag -> btl -> super , frag -> endpoint , & frag -> base , OPAL_ERR_UNREACH );
550- if ( frag -> base .des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ) {
551- MCA_BTL_TCP_FRAG_RETURN (frag );
552- }
548+
553549 frag = (mca_btl_tcp_frag_t * )opal_list_remove_first (& btl_endpoint -> endpoint_frags );
554550 }
555- btl_endpoint -> endpoint_send_frag = NULL ;
556- /* Let's report the error upstream */
557- if (NULL != btl_endpoint -> endpoint_btl -> tcp_error_cb ) {
558- btl_endpoint -> endpoint_btl -> tcp_error_cb ((mca_btl_base_module_t * )btl_endpoint -> endpoint_btl , 0 ,
559- btl_endpoint -> endpoint_proc -> proc_opal , "Socket closed" );
560- }
561- } else {
562- btl_endpoint -> endpoint_state = MCA_BTL_TCP_CLOSED ;
563551 }
552+ btl_endpoint -> endpoint_state = MCA_BTL_TCP_CLOSED ;
564553}
565554
566555/*
@@ -617,6 +606,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
617606 opal_show_help ("help-mpi-btl-tcp.txt" , "client handshake fail" ,
618607 true, opal_process_info .nodename ,
619608 getpid (), "did not receive entire connect ACK from peer" );
609+
620610 return OPAL_ERR_BAD_PARAM ;
621611 }
622612 if (0 != strncmp (hs_msg .magic_id , mca_btl_tcp_magic_id_string , len )) {
@@ -636,7 +626,6 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
636626 if (0 != opal_compare_proc (btl_proc -> proc_opal -> proc_name , guid )) {
637627 BTL_ERROR (("received unexpected process identifier %s" ,
638628 OPAL_NAME_PRINT (guid )));
639- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
640629 mca_btl_tcp_endpoint_close (btl_endpoint );
641630 return OPAL_ERR_UNREACH ;
642631 }
@@ -843,7 +832,6 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en
843832 opal_net_get_hostname ((struct sockaddr * ) & endpoint_addr ),
844833 ((struct sockaddr_in * ) & endpoint_addr )-> sin_port ,
845834 strerror (opal_socket_errno ), opal_socket_errno ));
846- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
847835 mca_btl_tcp_endpoint_close (btl_endpoint );
848836 return OPAL_ERROR ;
849837 }
@@ -860,7 +848,6 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en
860848 getpid (), msg ,
861849 strerror (opal_socket_errno ), opal_socket_errno );
862850 free (msg );
863- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
864851 mca_btl_tcp_endpoint_close (btl_endpoint );
865852 return OPAL_ERROR ;
866853 }
@@ -932,15 +919,12 @@ static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user)
932919 OPAL_THREAD_UNLOCK (& btl_endpoint -> endpoint_send_lock );
933920 MCA_BTL_TCP_ENDPOINT_DUMP (10 , btl_endpoint , true, "connected" );
934921 }
935- else if (OPAL_ERR_BAD_PARAM == rc
936- || OPAL_ERROR == rc ) {
922+ else if (OPAL_ERR_BAD_PARAM == rc ) {
937923 /* If we get a BAD_PARAM, it means that it probably wasn't
938924 an OMPI process on the other end of the socket (e.g.,
939- the magic string ID failed). recv_connect_ack already cleaned
940- up the socket. */
941- /* If we get OPAL_ERROR, the other end closed the connection
942- * because it has initiated a symetrical connexion on its end.
943- * recv_connect_ack already cleaned up the socket. */
925+ the magic string ID failed). So we can probably just
926+ close the socket and ignore this connection. */
927+ CLOSE_THE_SOCKET (sd );
944928 }
945929 else {
946930 /* Otherwise, it probably *was* an OMPI peer process on
@@ -1079,8 +1063,6 @@ static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user)
10791063 opal_event_del (& btl_endpoint -> endpoint_send_event );
10801064 }
10811065 break ;
1082- case MCA_BTL_TCP_FAILED :
1083- break ;
10841066 default :
10851067 BTL_ERROR (("invalid connection state (%d)" , btl_endpoint -> endpoint_state ));
10861068 MCA_BTL_TCP_ENDPOINT_DUMP (1 , btl_endpoint , true, "event_del(send) [endpoint_send_handler:error]" );
0 commit comments