Skip to content

Commit 498e164

Browse files
committed
DAOS-17135 cart: not exclude self when create corpc for IV
Otherwise, it may misguide subsequent crt_get_filtered_grp_rank_list() and regard the IV root to be as non-exist in the ranks list, then fail related IV operation with -DER_NONEXIST. It maybe not a perfected solution for current cart IV logic. But to be some temporary option, it makes CR to be workable when some ranks dead. Add new test case to verfiy such corner case. Signed-off-by: Fan Yong <fan.yong@hpe.com>
1 parent cfdd691 commit 498e164

File tree

2 files changed

+62
-13
lines changed

2 files changed

+62
-13
lines changed

src/cart/crt_iv.c

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,8 +2217,6 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id,
22172217
struct iv_sync_cb_info *iv_sync_cb = NULL;
22182218
struct crt_iv_ops *iv_ops;
22192219
crt_bulk_t local_bulk = CRT_BULK_NULL;
2220-
d_rank_list_t excluded_list;
2221-
d_rank_t excluded_ranks[1]; /* Excluding self */
22222220

22232221
iv_ops = crt_iv_ops_get(ivns_internal, class_id);
22242222
D_ASSERT(iv_ops != NULL);
@@ -2243,10 +2241,6 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id,
22432241
D_GOTO(exit, rc = -DER_INVAL);
22442242
}
22452243

2246-
/* Exclude self from corpc */
2247-
excluded_list.rl_nr = 1;
2248-
excluded_list.rl_ranks = excluded_ranks;
2249-
excluded_ranks[0] = ivns_internal->cii_grp_priv->gp_self;
22502244
/* Perform refresh on local node */
22512245
if (sync_type->ivs_event == CRT_IV_SYNC_EVENT_UPDATE)
22522246
rc = iv_ops->ivo_on_refresh(ivns_internal, iv_key, 0,
@@ -2273,13 +2267,9 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id,
22732267
}
22742268
}
22752269

2276-
rc = crt_corpc_req_create(ivns_internal->cii_ctx,
2277-
&ivns_internal->cii_grp_priv->gp_pub,
2278-
&excluded_list,
2279-
CRT_OPC_IV_SYNC,
2280-
local_bulk, NULL, 0,
2281-
ivns_internal->cii_gns.gn_tree_topo,
2282-
&corpc_req);
2270+
rc = crt_corpc_req_create(ivns_internal->cii_ctx, &ivns_internal->cii_grp_priv->gp_pub,
2271+
NULL, CRT_OPC_IV_SYNC, local_bulk, NULL, 0,
2272+
ivns_internal->cii_gns.gn_tree_topo, &corpc_req);
22832273
if (rc != 0) {
22842274
D_ERROR("crt_corpc_req_create(): "DF_RC"\n", DP_RC(rc));
22852275
D_GOTO(exit, rc);

src/tests/suite/daos_cr.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3753,6 +3753,63 @@ cr_maintenance_mode(void **state)
37533753
cr_cleanup(arg, &pool, 1);
37543754
}
37553755

3756+
/*
3757+
* 1. Exclude rank 0.
3758+
* 2. Create pool without inconsistency.
3759+
* 3. Start checker without options.
3760+
* 4. Query checker, it should be completed instead of being blocked.
3761+
* 5. Switch to normal mode and cleanup.
3762+
*/
3763+
static void
3764+
cr_lost_rank0(void **state)
3765+
{
3766+
test_arg_t *arg = *state;
3767+
struct test_pool pool = { 0 };
3768+
struct daos_check_info dci = { 0 };
3769+
int rc;
3770+
3771+
FAULT_INJECTION_REQUIRED();
3772+
3773+
print_message("CR29: run CR with rank 0 excluded at the beginning\n");
3774+
3775+
print_message("CR: excluding the rank 0 ...\n");
3776+
rc = dmg_system_exclude_rank(dmg_config_file, 0);
3777+
assert_rc_equal(rc, 0);
3778+
3779+
rc = cr_pool_create(state, &pool, false, TCC_NONE);
3780+
assert_rc_equal(rc, 0);
3781+
3782+
rc = cr_system_stop(false);
3783+
assert_rc_equal(rc, 0);
3784+
3785+
rc = cr_mode_switch(true);
3786+
assert_rc_equal(rc, 0);
3787+
3788+
rc = cr_check_start(TCSF_RESET, 0, NULL, NULL);
3789+
assert_rc_equal(rc, 0);
3790+
3791+
cr_ins_wait(1, &pool.pool_uuid, &dci);
3792+
3793+
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
3794+
assert_rc_equal(rc, 0);
3795+
3796+
rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 0, NULL, NULL, NULL);
3797+
assert_rc_equal(rc, 0);
3798+
3799+
/* Reint the rank for subsequent test. */
3800+
rc = cr_rank_reint(0);
3801+
assert_rc_equal(rc, 0);
3802+
3803+
rc = cr_mode_switch(false);
3804+
assert_rc_equal(rc, 0);
3805+
3806+
rc = cr_system_start();
3807+
assert_rc_equal(rc, 0);
3808+
3809+
cr_dci_fini(&dci);
3810+
cr_cleanup(arg, &pool, 1);
3811+
}
3812+
37563813
static const struct CMUnitTest cr_tests[] = {
37573814
{ "CR1: start checker for specified pools",
37583815
cr_start_specified, async_disable, test_case_teardown},
@@ -3810,6 +3867,8 @@ static const struct CMUnitTest cr_tests[] = {
38103867
cr_handle_fail_pool2, async_disable, test_case_teardown},
38113868
{ "CR28: maintenance mode after dry-run check",
38123869
cr_maintenance_mode, async_disable, test_case_teardown},
3870+
{ "CR29: run CR with rank 0 excluded at the beginning",
3871+
cr_lost_rank0, async_disable, test_case_teardown},
38133872
};
38143873

38153874
static int

0 commit comments

Comments
 (0)