Skip to content

Commit 009fec2

Browse files
committed
DAOS-17535 cart: not exclude self when create corpc for IV
Otherwise, it may misguide subsequent crt_get_filtered_grp_rank_list() and regard the IV root to be as non-exist in the ranks list, then fail related IV operation with -DER_NONEXIST. It maybe not a perfected solution for current cart IV logic. But to be some temporary option, it makes CR to be workable when some ranks dead. Add new test case to verify such corner case. Test-tag: pr,cat_recov Signed-off-by: Fan Yong <[email protected]>
1 parent 8126520 commit 009fec2

File tree

2 files changed

+70
-34
lines changed

2 files changed

+70
-34
lines changed

src/cart/crt_iv.c

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2210,15 +2210,13 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id,
22102210
crt_iv_comp_cb_t update_comp_cb, void *cb_arg,
22112211
void *user_priv, int update_rc)
22122212
{
2213-
crt_rpc_t *corpc_req = NULL;
2214-
struct crt_iv_sync_in *input;
2215-
int rc = 0;
2216-
bool delay_completion = false;
2217-
struct iv_sync_cb_info *iv_sync_cb = NULL;
2218-
struct crt_iv_ops *iv_ops;
2219-
crt_bulk_t local_bulk = CRT_BULK_NULL;
2220-
d_rank_list_t excluded_list;
2221-
d_rank_t excluded_ranks[1]; /* Excluding self */
2213+
struct crt_iv_sync_in *input;
2214+
struct crt_iv_ops *iv_ops;
2215+
crt_rpc_t *corpc_req = NULL;
2216+
struct iv_sync_cb_info *iv_sync_cb = NULL;
2217+
crt_bulk_t local_bulk = CRT_BULK_NULL;
2218+
int rc = 0;
2219+
bool delay_completion = false;
22222220

22232221
iv_ops = crt_iv_ops_get(ivns_internal, class_id);
22242222
D_ASSERT(iv_ops != NULL);
@@ -2243,24 +2241,6 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id,
22432241
D_GOTO(exit, rc = -DER_INVAL);
22442242
}
22452243

2246-
/* Exclude self from corpc */
2247-
excluded_list.rl_nr = 1;
2248-
excluded_list.rl_ranks = excluded_ranks;
2249-
excluded_ranks[0] = ivns_internal->cii_grp_priv->gp_self;
2250-
/* Perform refresh on local node */
2251-
if (sync_type->ivs_event == CRT_IV_SYNC_EVENT_UPDATE)
2252-
rc = iv_ops->ivo_on_refresh(ivns_internal, iv_key, 0,
2253-
iv_value, iv_value ? false : true,
2254-
0, user_priv);
2255-
else if (sync_type->ivs_event == CRT_IV_SYNC_EVENT_NOTIFY)
2256-
rc = iv_ops->ivo_on_refresh(ivns_internal, iv_key, 0,
2257-
NULL, iv_value ? false : true,
2258-
0, user_priv);
2259-
else {
2260-
D_ERROR("Unknown ivs_event %d\n", sync_type->ivs_event);
2261-
D_GOTO(exit, rc = -DER_INVAL);
2262-
}
2263-
22642244
local_bulk = CRT_BULK_NULL;
22652245
if (iv_value != NULL && d_sgl_buf_size(iv_value) > crt_gdata.cg_iv_inline_limit) {
22662246
IV_DBG(iv_key, "bulk transfer with size %zu\n",
@@ -2273,13 +2253,9 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id,
22732253
}
22742254
}
22752255

2276-
rc = crt_corpc_req_create(ivns_internal->cii_ctx,
2277-
&ivns_internal->cii_grp_priv->gp_pub,
2278-
&excluded_list,
2279-
CRT_OPC_IV_SYNC,
2280-
local_bulk, NULL, 0,
2281-
ivns_internal->cii_gns.gn_tree_topo,
2282-
&corpc_req);
2256+
rc = crt_corpc_req_create(ivns_internal->cii_ctx, &ivns_internal->cii_grp_priv->gp_pub,
2257+
NULL, CRT_OPC_IV_SYNC, local_bulk, NULL, 0,
2258+
ivns_internal->cii_gns.gn_tree_topo, &corpc_req);
22832259
if (rc != 0) {
22842260
D_ERROR("crt_corpc_req_create(): "DF_RC"\n", DP_RC(rc));
22852261
D_GOTO(exit, rc);

src/tests/suite/daos_cr.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/**
22
* (C) Copyright 2023-2024 Intel Corporation.
3+
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
34
*
45
* SPDX-License-Identifier: BSD-2-Clause-Patent
56
*/
@@ -3785,6 +3786,62 @@ cr_maintenance_mode(void **state)
37853786
cr_cleanup(arg, &pool, 1);
37863787
}
37873788

3789+
/*
3790+
* 1. Exclude rank 0.
3791+
* 2. Create pool without inconsistency.
3792+
* 3. Start checker without options.
3793+
* 4. Query checker, it should be completed instead of being blocked.
3794+
* 5. Switch to normal mode and cleanup.
3795+
*/
3796+
static void
3797+
cr_lost_rank0(void **state)
3798+
{
3799+
test_arg_t *arg = *state;
3800+
struct test_pool pool = {0};
3801+
struct daos_check_info dci = {0};
3802+
int rc;
3803+
3804+
print_message("CR29: run CR with rank 0 excluded at the beginning\n");
3805+
3806+
print_message("CR: excluding the rank 0 ...\n");
3807+
rc = dmg_system_exclude_rank(dmg_config_file, 0);
3808+
assert_rc_equal(rc, 0);
3809+
3810+
rc = cr_pool_create(state, &pool, false, TCC_NONE);
3811+
assert_rc_equal(rc, 0);
3812+
3813+
rc = cr_system_stop(false);
3814+
assert_rc_equal(rc, 0);
3815+
3816+
rc = cr_mode_switch(true);
3817+
assert_rc_equal(rc, 0);
3818+
3819+
rc = cr_check_start(TCSF_RESET, 0, NULL, NULL);
3820+
assert_rc_equal(rc, 0);
3821+
3822+
cr_ins_wait(1, &pool.pool_uuid, &dci);
3823+
3824+
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
3825+
assert_rc_equal(rc, 0);
3826+
3827+
rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 0, NULL, NULL, NULL);
3828+
assert_rc_equal(rc, 0);
3829+
3830+
/* Reint the rank for subsequent test. */
3831+
rc = cr_rank_reint(0, true);
3832+
assert_rc_equal(rc, 0);
3833+
3834+
rc = cr_mode_switch(false);
3835+
assert_rc_equal(rc, 0);
3836+
3837+
rc = cr_system_start();
3838+
assert_rc_equal(rc, 0);
3839+
3840+
cr_dci_fini(&dci);
3841+
cr_cleanup(arg, &pool, 1);
3842+
}
3843+
3844+
/* clang-format off */
37883845
static const struct CMUnitTest cr_tests[] = {
37893846
{ "CR1: start checker for specified pools",
37903847
cr_start_specified, async_disable, test_case_teardown},
@@ -3842,7 +3899,10 @@ static const struct CMUnitTest cr_tests[] = {
38423899
cr_handle_fail_pool2, async_disable, test_case_teardown},
38433900
{ "CR28: maintenance mode after dry-run check",
38443901
cr_maintenance_mode, async_disable, test_case_teardown},
3902+
{ "CR29: run CR with rank 0 excluded at the beginning",
3903+
cr_lost_rank0, async_disable, test_case_teardown},
38453904
};
3905+
/* clang-format on */
38463906

38473907
static int
38483908
cr_setup(void **state)

0 commit comments

Comments
 (0)