22 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33 * University Research and Technology
44 * Corporation. All rights reserved.
5- * Copyright (c) 2004-2017 The University of Tennessee and The University
5+ * Copyright (c) 2004-2016 The University of Tennessee and The University
66 * of Tennessee Research Foundation. All rights
77 * reserved.
88 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -388,7 +388,6 @@ mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint,
388388{
389389 int ret = mca_btl_tcp_send_blocking (btl_endpoint -> endpoint_sd , data , size );
390390 if (ret < 0 ) {
391- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
392391 mca_btl_tcp_endpoint_close (btl_endpoint );
393392 }
394393 return ret ;
@@ -539,30 +538,20 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
539538 btl_endpoint -> endpoint_sd = -1 ;
540539 /**
541540 * If we keep failing to connect to the peer let the caller know about
542- * this situation by triggering the callback on all pending fragments and
543- * reporting the error. The upper layer has then the opportunity to
544- * re-route or re-schedule the fragments.
541+ * this situation by triggering all the pending fragments callback and
542+ * reporting the error.
545543 */
546544 if ( MCA_BTL_TCP_FAILED == btl_endpoint -> endpoint_state ) {
547545 mca_btl_tcp_frag_t * frag = btl_endpoint -> endpoint_send_frag ;
548546 if ( NULL == frag )
549547 frag = (mca_btl_tcp_frag_t * )opal_list_remove_first (& btl_endpoint -> endpoint_frags );
550548 while (NULL != frag ) {
551549 frag -> base .des_cbfunc (& frag -> btl -> super , frag -> endpoint , & frag -> base , OPAL_ERR_UNREACH );
552- if ( frag -> base .des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ) {
553- MCA_BTL_TCP_FRAG_RETURN (frag );
554- }
550+
555551 frag = (mca_btl_tcp_frag_t * )opal_list_remove_first (& btl_endpoint -> endpoint_frags );
556552 }
557- btl_endpoint -> endpoint_send_frag = NULL ;
558- /* Let's report the error upstream */
559- if (NULL != btl_endpoint -> endpoint_btl -> tcp_error_cb ) {
560- btl_endpoint -> endpoint_btl -> tcp_error_cb ((mca_btl_base_module_t * )btl_endpoint -> endpoint_btl , 0 ,
561- btl_endpoint -> endpoint_proc -> proc_opal , "Socket closed" );
562- }
563- } else {
564- btl_endpoint -> endpoint_state = MCA_BTL_TCP_CLOSED ;
565553 }
554+ btl_endpoint -> endpoint_state = MCA_BTL_TCP_CLOSED ;
566555}
567556
568557/*
@@ -619,6 +608,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
619608 opal_show_help ("help-mpi-btl-tcp.txt" , "client handshake fail" ,
620609 true, opal_process_info .nodename ,
621610 getpid (), "did not receive entire connect ACK from peer" );
611+
622612 return OPAL_ERR_BAD_PARAM ;
623613 }
624614 if (0 != strncmp (hs_msg .magic_id , mca_btl_tcp_magic_id_string , len )) {
@@ -638,7 +628,6 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
638628 if (0 != opal_compare_proc (btl_proc -> proc_opal -> proc_name , guid )) {
639629 BTL_ERROR (("received unexpected process identifier %s" ,
640630 OPAL_NAME_PRINT (guid )));
641- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
642631 mca_btl_tcp_endpoint_close (btl_endpoint );
643632 return OPAL_ERR_UNREACH ;
644633 }
@@ -845,7 +834,6 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en
845834 opal_net_get_hostname ((struct sockaddr * ) & endpoint_addr ),
846835 ((struct sockaddr_in * ) & endpoint_addr )-> sin_port ,
847836 strerror (opal_socket_errno ), opal_socket_errno ));
848- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
849837 mca_btl_tcp_endpoint_close (btl_endpoint );
850838 return OPAL_ERROR ;
851839 }
@@ -862,7 +850,6 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en
862850 getpid (), msg ,
863851 strerror (opal_socket_errno ), opal_socket_errno );
864852 free (msg );
865- btl_endpoint -> endpoint_state = MCA_BTL_TCP_FAILED ;
866853 mca_btl_tcp_endpoint_close (btl_endpoint );
867854 return OPAL_ERROR ;
868855 }
@@ -934,15 +921,12 @@ static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user)
934921 OPAL_THREAD_UNLOCK (& btl_endpoint -> endpoint_send_lock );
935922 MCA_BTL_TCP_ENDPOINT_DUMP (10 , btl_endpoint , true, "connected" );
936923 }
937- else if (OPAL_ERR_BAD_PARAM == rc
938- || OPAL_ERROR == rc ) {
924+ else if (OPAL_ERR_BAD_PARAM == rc ) {
939925 /* If we get a BAD_PARAM, it means that it probably wasn't
940926 an OMPI process on the other end of the socket (e.g.,
941- the magic string ID failed). recv_connect_ack already cleaned
942- up the socket. */
943- /* If we get OPAL_ERROR, the other end closed the connection
944- * because it has initiated a symetrical connexion on its end.
945- * recv_connect_ack already cleaned up the socket. */
927+ the magic string ID failed). So we can probably just
928+ close the socket and ignore this connection. */
929+ CLOSE_THE_SOCKET (sd );
946930 }
947931 else {
948932 /* Otherwise, it probably *was* an OMPI peer process on
@@ -1081,8 +1065,6 @@ static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user)
10811065 opal_event_del (& btl_endpoint -> endpoint_send_event );
10821066 }
10831067 break ;
1084- case MCA_BTL_TCP_FAILED :
1085- break ;
10861068 default :
10871069 BTL_ERROR (("invalid connection state (%d)" , btl_endpoint -> endpoint_state ));
10881070 MCA_BTL_TCP_ENDPOINT_DUMP (1 , btl_endpoint , true, "event_del(send) [endpoint_send_handler:error]" );
0 commit comments