@@ -267,7 +267,7 @@ static int ucp_ep_shall_use_indirect_id(ucp_context_h context,
267267 return !(ep_init_flags & UCP_EP_INIT_FLAG_INTERNAL ) &&
268268 ((context -> config .ext .proto_indirect_id == UCS_CONFIG_ON ) ||
269269 ((context -> config .ext .proto_indirect_id == UCS_CONFIG_AUTO ) &&
270- (ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE )));
270+ (ep_init_flags & UCP_EP_INIT_ERR_MODE_FAILOVER_MASK )));
271271}
272272
273273void ucp_ep_peer_mem_destroy (ucp_context_h context ,
@@ -573,8 +573,13 @@ void ucp_ep_release_id(ucp_ep_h ep)
573573void ucp_ep_config_key_set_err_mode (ucp_ep_config_key_t * key ,
574574 unsigned ep_init_flags )
575575{
576- key -> err_mode = (ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE ) ?
577- UCP_ERR_HANDLING_MODE_PEER : UCP_ERR_HANDLING_MODE_NONE ;
576+ if (ep_init_flags & UCP_EP_INIT_ERR_MODE_FAILOVER ) {
577+ key -> err_mode = UCP_ERR_HANDLING_MODE_FAILOVER ;
578+ } else if (ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE ) {
579+ key -> err_mode = UCP_ERR_HANDLING_MODE_PEER ;
580+ } else {
581+ key -> err_mode = UCP_ERR_HANDLING_MODE_NONE ;
582+ }
578583}
579584
580585void ucp_ep_config_key_init_flags (ucp_ep_config_key_t * key ,
@@ -790,7 +795,7 @@ static ucs_status_t ucp_ep_init_create_wireup(ucp_ep_h ep,
790795 if (ucp_ep_init_flags_has_cm (ep_init_flags )) {
791796 key .cm_lane = 0 ;
792797 /* Send keepalive on wireup_ep (which will send on aux_ep) */
793- if (ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE ) {
798+ if (ep_init_flags & UCP_EP_INIT_ERR_MODE_FAILOVER_MASK ) {
794799 key .keepalive_lane = 0 ;
795800 }
796801 } else {
@@ -931,8 +936,7 @@ ucp_sa_data_v1_unpack(const ucp_wireup_sockaddr_data_base_t *sa_data,
931936 return UCS_ERR_UNSUPPORTED ;
932937 }
933938
934- * ep_init_flags_p = (sa_data -> header == UCP_ERR_HANDLING_MODE_PEER ) ?
935- UCP_EP_INIT_ERR_MODE_PEER_FAILURE : 0 ;
939+ * ep_init_flags_p = ucp_ep_err_mode_init_flags (sa_data -> header );
936940 * worker_addr_p = sa_data_v1 + 1 ;
937941 return UCS_OK ;
938942}
@@ -942,8 +946,15 @@ ucp_sa_data_v2_unpack(const ucp_wireup_sockaddr_data_base_t *sa_data,
942946 unsigned * ep_init_flags_p ,
943947 const void * * worker_addr_p )
944948{
945- * ep_init_flags_p = (sa_data -> header & UCP_SA_DATA_FLAG_ERR_MODE_PEER ) ?
946- UCP_EP_INIT_ERR_MODE_PEER_FAILURE : 0 ;
949+ if (sa_data -> header & UCP_SA_DATA_FLAG_ERR_MODE_FAILOVER ) {
950+ ucs_assert (sa_data -> header & UCP_SA_DATA_FLAG_ERR_MODE_PEER );
951+ * ep_init_flags_p = UCP_EP_INIT_ERR_MODE_FAILOVER_MASK ;
952+ } else if (sa_data -> header & UCP_SA_DATA_FLAG_ERR_MODE_PEER ) {
953+ * ep_init_flags_p = UCP_EP_INIT_ERR_MODE_PEER_FAILURE ;
954+ } else {
955+ * ep_init_flags_p = 0 ;
956+ }
957+
947958 * worker_addr_p = sa_data + 1 ;
948959 return UCS_OK ;
949960}
@@ -1142,7 +1153,7 @@ ucp_ep_create_api_to_worker_addr(ucp_worker_h worker,
11421153out_resolve_remote_id :
11431154 if ((context -> config .ext .resolve_remote_ep_id == UCS_CONFIG_ON ) ||
11441155 ((context -> config .ext .resolve_remote_ep_id == UCS_CONFIG_AUTO ) &&
1145- (ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE ) &&
1156+ (ep_init_flags & UCP_EP_INIT_ERR_MODE_FAILOVER_MASK ) &&
11461157 ucp_worker_keepalive_is_enabled (worker ))) {
11471158 /* If resolving remote ID forced by configuration or PEER_FAILURE
11481159 * and keepalive were requested, resolve remote endpoint ID prior to
@@ -1169,11 +1180,7 @@ ucp_ep_create_api_to_worker_addr(ucp_worker_h worker,
11691180static void ucp_ep_params_check_err_handling (ucp_ep_h ep ,
11701181 const ucp_ep_params_t * params )
11711182{
1172- ucp_err_handling_mode_t err_mode =
1173- UCP_PARAM_VALUE (EP , params , err_mode , ERR_HANDLING_MODE ,
1174- UCP_ERR_HANDLING_MODE_NONE );
1175-
1176- if (err_mode == UCP_ERR_HANDLING_MODE_NONE ) {
1183+ if (ucp_ep_params_err_handling_mode (params ) == UCP_ERR_HANDLING_MODE_NONE ) {
11771184 return ;
11781185 }
11791186
@@ -1193,13 +1200,6 @@ ucs_status_t ucp_ep_create(ucp_worker_h worker, const ucp_ep_params_t *params,
11931200 unsigned flags = UCP_PARAM_VALUE (EP , params , flags , FLAGS , 0 );
11941201 ucs_status_t status ;
11951202
1196- /* TODO: Implement failover error handling mode */
1197- if (UCP_PARAM_VALUE (EP , params , err_mode , ERR_HANDLING_MODE ,
1198- UCP_ERR_HANDLING_MODE_NONE ) == UCP_ERR_HANDLING_MODE_FAILOVER ) {
1199- ucs_error ("failover error handling mode is not implemented" );
1200- return UCS_ERR_NOT_IMPLEMENTED ;
1201- }
1202-
12031203 UCS_ASYNC_BLOCK (& worker -> async );
12041204
12051205 if (flags & UCP_EP_PARAMS_FLAGS_CLIENT_SERVER ) {
@@ -1413,10 +1413,9 @@ static void ucp_ep_failed_destroy(uct_ep_h ep)
14131413
14141414static void ucp_ep_discard_lanes (ucp_ep_h ep , ucs_status_t discard_status )
14151415{
1416- unsigned ep_flush_flags = (ucp_ep_config (ep )-> key .err_mode ==
1417- UCP_ERR_HANDLING_MODE_NONE ) ?
1418- UCT_FLUSH_FLAG_LOCAL :
1419- UCT_FLUSH_FLAG_CANCEL ;
1416+ unsigned ep_flush_flags = ucp_ep_config_err_handling_enabled (ep ) ?
1417+ UCT_FLUSH_FLAG_CANCEL :
1418+ UCT_FLUSH_FLAG_LOCAL ;
14201419 uct_ep_h uct_eps [UCP_MAX_LANES ] = { NULL };
14211420 ucp_ep_discard_lanes_arg_t * discard_arg ;
14221421 ucs_status_t status ;
@@ -1475,7 +1474,6 @@ ucp_ep_set_failed(ucp_ep_h ucp_ep, ucp_lane_index_t lane, ucs_status_t status)
14751474{
14761475 UCS_STRING_BUFFER_ONSTACK (lane_info_strb , 64 );
14771476 ucp_ep_ext_t * ep_ext = ucp_ep -> ext ;
1478- ucp_err_handling_mode_t err_mode ;
14791477 ucs_log_level_t log_level ;
14801478 ucp_request_t * close_req ;
14811479
@@ -1516,10 +1514,8 @@ ucp_ep_set_failed(ucp_ep_h ucp_ep, ucp_lane_index_t lane, ucs_status_t status)
15161514 } else if (ep_ext -> err_cb == NULL ) {
15171515 /* Print error if user requested error handling support but did not
15181516 install a valid error handling callback */
1519- err_mode = ucp_ep_config (ucp_ep )-> key .err_mode ;
1520- log_level = (err_mode == UCP_ERR_HANDLING_MODE_NONE ) ?
1521- UCS_LOG_LEVEL_DIAG :
1522- UCS_LOG_LEVEL_ERROR ;
1517+ log_level = ucp_ep_config_err_handling_enabled (ucp_ep ) ?
1518+ UCS_LOG_LEVEL_ERROR : UCS_LOG_LEVEL_DIAG ;
15231519
15241520 ucp_ep_get_lane_info_str (ucp_ep , lane , & lane_info_strb );
15251521 ucs_log (log_level ,
@@ -1755,7 +1751,7 @@ ucs_status_ptr_t ucp_ep_close_nbx(ucp_ep_h ep, const ucp_request_param_t *param)
17551751 ucp_request_t * close_req ;
17561752
17571753 if ((ucp_request_param_flags (param ) & UCP_EP_CLOSE_FLAG_FORCE ) &&
1758- ( ucp_ep_config ( ep ) -> key . err_mode != UCP_ERR_HANDLING_MODE_PEER )) {
1754+ ! ucp_ep_config_err_handling_enabled ( ep )) {
17591755 return UCS_STATUS_PTR (UCS_ERR_INVALID_PARAM );
17601756 }
17611757
@@ -2683,7 +2679,8 @@ ucs_status_t ucp_ep_config_init(ucp_worker_h worker, ucp_ep_config_t *config,
26832679 config -> md_index [lane ] = context -> tl_rscs [rsc_index ].md_index ;
26842680 if (ucp_ep_config_connect_p2p (worker , & config -> key , rsc_index )) {
26852681 config -> p2p_lanes |= UCS_BIT (lane );
2686- } else if (config -> key .err_mode == UCP_ERR_HANDLING_MODE_PEER ) {
2682+ } else if ((config -> key .err_mode == UCP_ERR_HANDLING_MODE_PEER ) ||
2683+ (config -> key .err_mode == UCP_ERR_HANDLING_MODE_FAILOVER )) {
26872684 config -> uct_rkey_pack_flags |= UCT_MD_MKEY_PACK_FLAG_INVALIDATE_RMA ;
26882685 }
26892686
@@ -3969,3 +3966,17 @@ void ucp_ep_set_cfg_index(ucp_ep_h ep, ucp_worker_cfg_index_t cfg_index)
39693966 ucp_ep_config_activate_worker_ifaces (ep -> worker , cfg_index );
39703967 ucp_ep_config_proto_init (ep -> worker , cfg_index );
39713968}
3969+
3970+ unsigned ucp_ep_err_mode_init_flags (ucp_err_handling_mode_t err_mode )
3971+ {
3972+ switch (err_mode ) {
3973+ case UCP_ERR_HANDLING_MODE_NONE :
3974+ return 0 ;
3975+ case UCP_ERR_HANDLING_MODE_PEER :
3976+ return UCP_EP_INIT_ERR_MODE_PEER_FAILURE ;
3977+ case UCP_ERR_HANDLING_MODE_FAILOVER :
3978+ return UCP_EP_INIT_ERR_MODE_FAILOVER_MASK ;
3979+ default :
3980+ ucs_fatal ("invalid error handling mode: %d" , err_mode );
3981+ }
3982+ }
0 commit comments