@@ -486,7 +486,7 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
486486 }
487487
488488 /*
489- * in case the peer address has all intended connections,
489+ * in case the peer address has created all intended connections,
490490 * mark the complete peer interface as 'not available'
491491 */
492492 if (endpoint_addr -> addr_inuse >= mca_btl_tcp_component .tcp_num_links ) {
@@ -810,12 +810,15 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name)
810810void mca_btl_tcp_proc_accept (mca_btl_tcp_proc_t * btl_proc , struct sockaddr * addr , int sd )
811811{
812812 OPAL_THREAD_LOCK (& btl_proc -> proc_lock );
813+ int found_match = 0 ;
814+ mca_btl_base_endpoint_t * match_btl_endpoint ;
815+
813816 for ( size_t i = 0 ; i < btl_proc -> proc_endpoint_count ; i ++ ) {
814817 mca_btl_base_endpoint_t * btl_endpoint = btl_proc -> proc_endpoints [i ];
815818 /* We are not here to make a decision about what is good socket
816819 * and what is not. We simply check that this socket fit the endpoint
817- * end we prepare for the real decision function mca_btl_tcp_endpoint_accept. */
818- if ( btl_endpoint -> endpoint_addr -> addr_family != addr -> sa_family ) {
820+ * end we prepare for the real decision function mca_btl_tcp_endpoint_accept. */
821+ if ( btl_endpoint -> endpoint_addr -> addr_family != addr -> sa_family ) {
819822 continue ;
820823 }
821824 switch (addr -> sa_family ) {
@@ -833,6 +836,10 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr
833836 tmp [1 ], 16 ),
834837 (int )i , (int )btl_proc -> proc_endpoint_count );
835838 continue ;
839+ } else if (btl_endpoint -> endpoint_state != MCA_BTL_TCP_CLOSED ) {
840+ found_match = 1 ;
841+ match_btl_endpoint = btl_endpoint ;
842+ continue ;
836843 }
837844 break ;
838845#if OPAL_ENABLE_IPV6
@@ -857,10 +864,20 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr
857864 ;
858865 }
859866
867+ /* Set state to CONNECTING to ensure that subsequent conenctions do not attempt to re-use endpoint in the num_links > 1 case*/
868+ btl_endpoint -> endpoint_state = MCA_BTL_TCP_CONNECTING ;
860869 (void )mca_btl_tcp_endpoint_accept (btl_endpoint , addr , sd );
861870 OPAL_THREAD_UNLOCK (& btl_proc -> proc_lock );
862871 return ;
863872 }
873+ /* In this case the connection was inbound to an address exported, but was not in a CLOSED state.
874+ * mca_btl_tcp_endpoint_accept() has logic to deal with the race condition that has likely caused this
875+ * scenario, so call it here.*/
876+ if (found_match ) {
877+ (void )mca_btl_tcp_endpoint_accept (match_btl_endpoint , addr , sd );
878+ OPAL_THREAD_UNLOCK (& btl_proc -> proc_lock );
879+ return ;
880+ }
864881 /* No further use of this socket. Close it */
865882 CLOSE_THE_SOCKET (sd );
866883 {
0 commit comments