@@ -970,10 +970,7 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata)
970970 ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
971971 ORTE_NAME_PRINT (& pop -> peer ));
972972
973- /* if we are terminating, or recovery isn't enabled, then don't attempt to reconnect */
974- if (!orte_enable_recovery || orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered ) {
975- goto cleanup ;
976- }
973+ MCA_OOB_TCP_CHECK_SHUTDOWN (pop );
977974
978975 /* Mark that we no longer support this peer */
979976 memcpy (& ui64 , (char * )& pop -> peer , sizeof (uint64_t ));
@@ -987,7 +984,6 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata)
987984 ORTE_ERROR_LOG (rc );
988985 }
989986
990- cleanup :
991987 /* activate the proc state */
992988 if (ORTE_SUCCESS != orte_routed .route_lost (& pop -> peer )) {
993989 ORTE_ACTIVATE_PROC_STATE (& pop -> peer , ORTE_PROC_STATE_LIFELINE_LOST );
@@ -1010,6 +1006,8 @@ void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata)
10101006 ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
10111007 ORTE_NAME_PRINT (& mop -> hop ));
10121008
1009+ MCA_OOB_TCP_CHECK_SHUTDOWN (mop );
1010+
10131011 /* mark that we cannot reach this hop */
10141012 memcpy (& ui64 , (char * )& (mop -> hop ), sizeof (uint64_t ));
10151013 if (OPAL_SUCCESS != opal_hash_table_get_value_uint64 (& orte_oob_base .peers ,
@@ -1022,16 +1020,11 @@ void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata)
10221020 ORTE_ERROR_LOG (rc );
10231021 }
10241022
1025- /* report the error back to the OOB and let it try other components
1026- * or declare a problem
1027- */
1028- if (!orte_finalizing && !orte_abnormal_term_ordered ) {
1029- /* if this was a lifeline, then alert */
1030- if (ORTE_SUCCESS != orte_routed .route_lost (& mop -> hop )) {
1031- ORTE_ACTIVATE_PROC_STATE (& mop -> hop , ORTE_PROC_STATE_LIFELINE_LOST );
1032- } else {
1033- ORTE_ACTIVATE_PROC_STATE (& mop -> hop , ORTE_PROC_STATE_COMM_FAILED );
1034- }
1023+ /* if this was a lifeline, then alert */
1024+ if (ORTE_SUCCESS != orte_routed .route_lost (& mop -> hop )) {
1025+ ORTE_ACTIVATE_PROC_STATE (& mop -> hop , ORTE_PROC_STATE_LIFELINE_LOST );
1026+ } else {
1027+ ORTE_ACTIVATE_PROC_STATE (& mop -> hop , ORTE_PROC_STATE_COMM_FAILED );
10351028 }
10361029
10371030 OBJ_RELEASE (mop );
@@ -1049,11 +1042,7 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata)
10491042 ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
10501043 ORTE_NAME_PRINT (& mop -> hop ));
10511044
1052- if (orte_finalizing || orte_abnormal_term_ordered ) {
1053- /* just ignore the problem */
1054- OBJ_RELEASE (mop );
1055- return ;
1056- }
1045+ MCA_OOB_TCP_CHECK_SHUTDOWN (mop );
10571046
10581047 /* mark that this component cannot reach this hop */
10591048 memcpy (& ui64 , (char * )& (mop -> hop ), sizeof (uint64_t ));
@@ -1121,11 +1110,7 @@ void mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata)
11211110 ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
11221111 ORTE_NAME_PRINT (& pop -> peer ));
11231112
1124- /* if we are terminating, then don't attempt to reconnect */
1125- if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered ) {
1126- OBJ_RELEASE (pop );
1127- return ;
1128- }
1113+ MCA_OOB_TCP_CHECK_SHUTDOWN (pop );
11291114
11301115 /* activate the proc state */
11311116 opal_output_verbose (OOB_TCP_DEBUG_CONNECT , orte_oob_base_framework .framework_output ,
0 commit comments