From 2fc77aeab144a2643946476de1fdab0031276524 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Fri, 18 Apr 2025 18:01:25 +0800 Subject: [PATCH] DAOS-17135 cart: not exclude self when create corpc for IV Otherwise, it may misguide subsequent crt_get_filtered_grp_rank_list() and regard the IV root to be as non-exist in the ranks list, then fail related IV operation with -DER_NONEXIST. It maybe not a perfected solution for current cart IV logic. But to be some temporary option, it makes CR to be workable when some ranks dead. Add new test case to verfiy such corner case. Signed-off-by: Fan Yong --- src/cart/crt_iv.c | 30 +++++++------------ src/tests/suite/daos_cr.c | 62 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 20 deletions(-) diff --git a/src/cart/crt_iv.c b/src/cart/crt_iv.c index c1d9c9e85cb..4b249a157d6 100644 --- a/src/cart/crt_iv.c +++ b/src/cart/crt_iv.c @@ -2210,15 +2210,13 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id, crt_iv_comp_cb_t update_comp_cb, void *cb_arg, void *user_priv, int update_rc) { - crt_rpc_t *corpc_req = NULL; - struct crt_iv_sync_in *input; - int rc = 0; - bool delay_completion = false; - struct iv_sync_cb_info *iv_sync_cb = NULL; - struct crt_iv_ops *iv_ops; - crt_bulk_t local_bulk = CRT_BULK_NULL; - d_rank_list_t excluded_list; - d_rank_t excluded_ranks[1]; /* Excluding self */ + struct crt_iv_sync_in *input; + struct crt_iv_ops *iv_ops; + crt_rpc_t *corpc_req = NULL; + struct iv_sync_cb_info *iv_sync_cb = NULL; + crt_bulk_t local_bulk = CRT_BULK_NULL; + int rc = 0; + bool delay_completion = false; iv_ops = crt_iv_ops_get(ivns_internal, class_id); D_ASSERT(iv_ops != NULL); @@ -2243,10 +2241,6 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id, D_GOTO(exit, rc = -DER_INVAL); } - /* Exclude self from corpc */ - excluded_list.rl_nr = 1; - excluded_list.rl_ranks = excluded_ranks; - excluded_ranks[0] = ivns_internal->cii_grp_priv->gp_self; /* Perform refresh on local node */ if (sync_type->ivs_event == CRT_IV_SYNC_EVENT_UPDATE) rc = iv_ops->ivo_on_refresh(ivns_internal, iv_key, 0, @@ -2273,13 +2267,9 @@ crt_ivsync_rpc_issue(struct crt_ivns_internal *ivns_internal, uint32_t class_id, } } - rc = crt_corpc_req_create(ivns_internal->cii_ctx, - &ivns_internal->cii_grp_priv->gp_pub, - &excluded_list, - CRT_OPC_IV_SYNC, - local_bulk, NULL, 0, - ivns_internal->cii_gns.gn_tree_topo, - &corpc_req); + rc = crt_corpc_req_create(ivns_internal->cii_ctx, &ivns_internal->cii_grp_priv->gp_pub, + NULL, CRT_OPC_IV_SYNC, local_bulk, NULL, 0, + ivns_internal->cii_gns.gn_tree_topo, &corpc_req); if (rc != 0) { D_ERROR("crt_corpc_req_create(): "DF_RC"\n", DP_RC(rc)); D_GOTO(exit, rc); diff --git a/src/tests/suite/daos_cr.c b/src/tests/suite/daos_cr.c index 12b4a9958dc..b59a093b79d 100644 --- a/src/tests/suite/daos_cr.c +++ b/src/tests/suite/daos_cr.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2023-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -3785,6 +3786,64 @@ cr_maintenance_mode(void **state) cr_cleanup(arg, &pool, 1); } +/* + * 1. Exclude rank 0. + * 2. Create pool without inconsistency. + * 3. Start checker without options. + * 4. Query checker, it should be completed instead of being blocked. + * 5. Switch to normal mode and cleanup. + */ +static void +cr_lost_rank0(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pool = {0}; + struct daos_check_info dci = {0}; + int rc; + + FAULT_INJECTION_REQUIRED(); + + print_message("CR29: run CR with rank 0 excluded at the beginning\n"); + + print_message("CR: excluding the rank 0 ...\n"); + rc = dmg_system_exclude_rank(dmg_config_file, 0); + assert_rc_equal(rc, 0); + + rc = cr_pool_create(state, &pool, false, TCC_NONE); + assert_rc_equal(rc, 0); + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, NULL); + assert_rc_equal(rc, 0); + + cr_ins_wait(1, &pool.pool_uuid, &dci); + + rc = cr_ins_verify(&dci, TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 0, NULL, NULL, NULL); + assert_rc_equal(rc, 0); + + /* Reint the rank for subsequent test. */ + rc = cr_rank_reint(0, true); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + cr_dci_fini(&dci); + cr_cleanup(arg, &pool, 1); +} + +/* clang-format off */ static const struct CMUnitTest cr_tests[] = { { "CR1: start checker for specified pools", cr_start_specified, async_disable, test_case_teardown}, @@ -3842,7 +3901,10 @@ static const struct CMUnitTest cr_tests[] = { cr_handle_fail_pool2, async_disable, test_case_teardown}, { "CR28: maintenance mode after dry-run check", cr_maintenance_mode, async_disable, test_case_teardown}, + { "CR29: run CR with rank 0 excluded at the beginning", + cr_lost_rank0, async_disable, test_case_teardown}, }; +/* clang-format on */ static int cr_setup(void **state)