Skip to content

Commit f0e513e

Browse files
committed
DAOS-17861 cart: fix err handling in corpc (#17299)
1. Some fail cases already set the rc by crt_corpc_fail_parent_rpc()/ crt_corpc_fail_child_rpc(), or called crt_corpc_complete() that will reply parent already, so need to reset rc to 0 to avoid call crt_hg_reply_error_send() again or drop refcount. 2. Fix a refcount leak in a case when need not call local RPC handler in middle node. Signed-off-by: Xuezhao Liu <xuezhao.liu@hpe.com>
1 parent f455361 commit f0e513e

File tree

2 files changed

+28
-14
lines changed

2 files changed

+28
-14
lines changed

src/cart/crt_corpc.c

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -790,8 +790,13 @@ crt_corpc_req_hdlr(struct crt_rpc_priv *rpc_priv)
790790
opc_info = rpc_priv->crp_opc_info;
791791
co_ops = opc_info->coi_co_ops;
792792

793-
if (rpc_priv->crp_fail_hlc)
794-
D_GOTO(forward_done, rc = -DER_HLC_SYNC);
793+
if (rpc_priv->crp_fail_hlc) {
794+
rc = -DER_HLC_SYNC;
795+
RPC_ERROR(rpc_priv, "crp_fail_hlc (group %s) failed: " DF_RC "\n",
796+
co_info->co_grp_priv->gp_pub.cg_grpid, DP_RC(rc));
797+
crt_corpc_fail_parent_rpc(rpc_priv, rc);
798+
D_GOTO(forward_done, rc);
799+
}
795800

796801
/* Invoke pre-forward callback first if it is registered */
797802
if (co_ops && co_ops->co_pre_forward) {
@@ -908,20 +913,28 @@ crt_corpc_req_hdlr(struct crt_rpc_priv *rpc_priv)
908913
}
909914

910915
forward_done:
911-
if (rc != 0 && rpc_priv->crp_flags & CRT_RPC_FLAG_CO_FAILOUT)
912-
co_failout = true;
916+
if (rc != 0) {
917+
/* reset rc to 0 as it already failed the parent/child RPC and
918+
* will be replied/completed by crt_corpc_complete().
919+
*/
920+
rc = 0;
921+
if (rpc_priv->crp_flags & CRT_RPC_FLAG_CO_FAILOUT)
922+
co_failout = true;
923+
}
913924

914-
/* NOOP bcast (no child and root excluded) */
915-
if (co_info->co_child_num == 0 && (co_info->co_root_excluded || co_failout))
916-
crt_corpc_complete(rpc_priv);
925+
/* need not call local RPC handler */
926+
if (co_info->co_root_excluded || co_failout) {
927+
/* NOOP bcast (no child and root excluded) */
928+
if (co_info->co_child_num == 0)
929+
crt_corpc_complete(rpc_priv);
917930

918-
if (co_info->co_root_excluded == 1 || co_failout) {
919-
if (co_info->co_grp_priv->gp_self == co_info->co_root) {
920-
/* don't return error for root to avoid RPC_DECREF in
921-
* fail case in crt_req_send.
922-
*/
923-
rc = 0;
924-
}
931+
/* Corresponding the initial ref 1 in crt_rpc_handler_common() ->
932+
* crt_rpc_priv_init(rpc_priv, crt_ctx, true).
933+
* That ref commonly will be released by crt_rpc_common_hdlr() -> crt_handle_rpc(),
934+
* here as will not call crt_rpc_common_hdlr() so drop it explicitly.
935+
*/
936+
if (rpc_priv->crp_srv)
937+
RPC_DECREF(rpc_priv);
925938
D_GOTO(out, rc);
926939
}
927940

src/cart/crt_hg.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,6 +1470,7 @@ crt_hg_reply_send(struct crt_rpc_priv *rpc_priv)
14701470

14711471
D_ASSERT(rpc_priv != NULL);
14721472

1473+
/* corresponds to decref in crt_hg_reply_send_cb */
14731474
RPC_ADDREF(rpc_priv);
14741475
hg_ret = HG_Respond(rpc_priv->crp_hg_hdl, crt_hg_reply_send_cb,
14751476
rpc_priv, &rpc_priv->crp_pub.cr_output);

0 commit comments

Comments
 (0)