Skip to content

Commit eb750f9

Browse files
committed
Merge branch 'master' into liw/pool-query-out-pad
Features: control pool
2 parents 1a4d6cb + 8a24f84 commit eb750f9

File tree

9 files changed

+271
-101
lines changed

9 files changed

+271
-101
lines changed

src/chk/chk_common.c

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2022-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -1234,6 +1234,20 @@ chk_ins_merge_info(uint32_t *status_dst, uint32_t status_src, uint32_t *phase_ds
12341234
*status_dst = status_src;
12351235
}
12361236

1237+
void
1238+
chk_ins_cleanup(struct chk_instance *ins)
1239+
{
1240+
chk_stop_sched(ins);
1241+
ins->ci_inited = 0;
1242+
1243+
chk_iv_ns_cleanup(&ins->ci_iv_ns);
1244+
1245+
if (ins->ci_iv_group != NULL) {
1246+
crt_group_secondary_destroy(ins->ci_iv_group);
1247+
ins->ci_iv_group = NULL;
1248+
}
1249+
}
1250+
12371251
int
12381252
chk_ins_init(struct chk_instance **p_ins)
12391253
{
@@ -1300,11 +1314,8 @@ chk_ins_fini(struct chk_instance **p_ins)
13001314
if (ins == NULL)
13011315
return;
13021316

1303-
ins->ci_inited = 0;
1304-
chk_iv_ns_cleanup(&ins->ci_iv_ns);
1305-
1306-
if (ins->ci_iv_group != NULL)
1307-
crt_group_secondary_destroy(ins->ci_iv_group);
1317+
D_ASSERT(ins->ci_iv_ns == NULL);
1318+
D_ASSERT(ins->ci_iv_group == NULL);
13081319

13091320
d_rank_list_free(ins->ci_ranks);
13101321
D_ASSERT(d_list_empty(&ins->ci_dead_ranks));

src/chk/chk_engine.c

Lines changed: 58 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2345,6 +2345,7 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic
23452345
if (rc != 0)
23462346
goto out_stop;
23472347

2348+
ins->ci_pause = 0;
23482349
ins->ci_sched_running = 1;
23492350

23502351
rc = dss_ult_create(chk_engine_sched, ins, DSS_XS_SYS, 0, DSS_DEEP_STACK_SZ,
@@ -2407,6 +2408,8 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags)
24072408
int i;
24082409
int active = false;
24092410

2411+
CHK_IS_READY(ins);
2412+
24102413
if (gen != 0 && gen != cbk->cb_gen)
24112414
D_GOTO(log, rc = -DER_NOTAPPLICABLE);
24122415

@@ -2596,6 +2599,8 @@ chk_engine_query(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *ins_status
25962599
int rc = 0;
25972600
int i;
25982601

2602+
CHK_IS_READY(ins);
2603+
25992604
/*
26002605
* We will support to check query from new check leader under the case of old leader
26012606
* crashed, that may have different check generation. So do not check "cb_gen" here,
@@ -2648,6 +2653,8 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version)
26482653
d_rank_list_t *rank_list = NULL;
26492654
int rc = 0;
26502655

2656+
CHK_IS_READY(ins);
2657+
26512658
if (cbk->cb_gen != gen)
26522659
D_GOTO(out, rc = -DER_NOTAPPLICABLE);
26532660

@@ -2745,6 +2752,8 @@ chk_engine_act(uint64_t gen, uint64_t seq, uint32_t act)
27452752
struct chk_instance *ins = chk_engine;
27462753
int rc;
27472754

2755+
CHK_IS_READY(ins);
2756+
27482757
if (ins->ci_bk.cb_gen != gen)
27492758
D_GOTO(out, rc = -DER_NOTAPPLICABLE);
27502759

@@ -2874,6 +2883,8 @@ chk_engine_cont_list(uint64_t gen, uuid_t pool_uuid, uuid_t **conts, uint32_t *c
28742883
int i = 0;
28752884
int rc = 0;
28762885

2886+
CHK_IS_READY(ins);
2887+
28772888
if (cbk->cb_gen != gen)
28782889
D_GOTO(out, rc = -DER_NOTAPPLICABLE);
28792890

@@ -2929,6 +2940,8 @@ chk_engine_pool_start(uint64_t gen, uuid_t uuid, uint32_t phase, uint32_t flags)
29292940
d_iov_t kiov;
29302941
int rc;
29312942

2943+
CHK_IS_READY(ins);
2944+
29322945
if (ins->ci_bk.cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING)
29332946
D_GOTO(out, rc = -DER_SHUTDOWN);
29342947

@@ -3047,6 +3060,8 @@ chk_engine_pool_mbs(uint64_t gen, uuid_t uuid, uint32_t phase, const char *label
30473060
int rc;
30483061
int i;
30493062

3063+
CHK_IS_READY(ins);
3064+
30503065
if (ins->ci_bk.cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING)
30513066
D_GOTO(out, rc = -DER_SHUTDOWN);
30523067

@@ -3158,6 +3173,8 @@ chk_engine_set_policy(uint64_t gen, uint32_t policy_nr, struct chk_policy *polic
31583173
struct chk_property *prop = &ins->ci_prop;
31593174
int rc = 0;
31603175

3176+
CHK_IS_READY(ins);
3177+
31613178
/* Do nothing if no (engine) check instance is running. */
31623179
if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE || cbk->cb_gen != gen ||
31633180
cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING)
@@ -3295,6 +3312,8 @@ chk_engine_notify(struct chk_iv *iv)
32953312
struct chk_pool_rec *cpr;
32963313
int rc = 0;
32973314

3315+
CHK_IS_READY(ins);
3316+
32983317
if (cbk->cb_gen != iv->ci_gen)
32993318
D_GOTO(out, rc = -DER_NOTAPPLICABLE);
33003319

@@ -3512,31 +3531,16 @@ chk_engine_rejoin(void *args)
35123531
DF_ENGINE" rejoin on rank %u with iv "DF_UUIDF": "DF_RC"\n",
35133532
DP_ENGINE(ins), myrank, DP_UUID(cbk->cb_iv_uuid), DP_RC(rc));
35143533
ins->ci_rejoining = 0;
3515-
ins->ci_starting = 0;
3516-
ins->ci_inited = 1;
3517-
}
3518-
3519-
void
3520-
chk_engine_pause(void)
3521-
{
3522-
struct chk_instance *ins = chk_engine;
3523-
3524-
chk_stop_sched(ins);
3525-
D_ASSERT(d_list_empty(&ins->ci_pool_list));
3534+
ins->ci_starting = 0;
35263535
}
35273536

35283537
int
3529-
chk_engine_init(void)
3538+
chk_engine_setup(void)
35303539
{
3531-
struct chk_traverse_pools_args ctpa = { 0 };
3532-
struct chk_bookmark *cbk;
3533-
int rc;
3534-
3535-
rc = chk_ins_init(&chk_engine);
3536-
if (rc != 0)
3537-
goto fini;
3538-
3539-
chk_report_seq_init(chk_engine);
3540+
struct chk_instance *ins = chk_engine;
3541+
struct chk_bookmark *cbk = &ins->ci_bk;
3542+
struct chk_traverse_pools_args ctpa = {0};
3543+
int rc;
35403544

35413545
/*
35423546
* DAOS global consistency check depends on all related engines' local
@@ -3545,7 +3549,6 @@ chk_engine_init(void)
35453549
* related local inconsistency firstly.
35463550
*/
35473551

3548-
cbk = &chk_engine->ci_bk;
35493552
rc = chk_bk_fetch_engine(cbk);
35503553
if (rc == -DER_NONEXIST)
35513554
goto prop;
@@ -3569,37 +3572,46 @@ chk_engine_init(void)
35693572
cbk->cb_time.ct_stop_time = time(NULL);
35703573
rc = chk_bk_update_engine(cbk);
35713574
if (rc != 0) {
3572-
D_ERROR(DF_ENGINE" failed to reset status as 'PAUSED': "DF_RC"\n",
3573-
DP_ENGINE(chk_engine), DP_RC(rc));
3575+
D_ERROR(DF_ENGINE " failed to reset status as 'PAUSED': " DF_RC "\n",
3576+
DP_ENGINE(ins), DP_RC(rc));
35743577
goto fini;
35753578
}
35763579

35773580
ctpa.ctpa_gen = cbk->cb_gen;
3578-
ctpa.ctpa_ins = chk_engine;
3581+
ctpa.ctpa_ins = ins;
35793582
rc = chk_traverse_pools(chk_pools_pause_cb, &ctpa);
35803583
/*
35813584
* Failed to reset pool status will not affect next check start, so it is not fatal,
35823585
* but related check query result may be confused for user.
35833586
*/
35843587
if (rc != 0)
3585-
D_WARN(DF_ENGINE" failed to reset pools status as 'PAUSED': "DF_RC"\n",
3586-
DP_ENGINE(chk_engine), DP_RC(rc));
3588+
D_WARN(DF_ENGINE " failed to reset pools status as 'PAUSED': " DF_RC "\n",
3589+
DP_ENGINE(ins), DP_RC(rc));
35873590
}
35883591

35893592
prop:
3590-
rc = chk_prop_fetch(&chk_engine->ci_prop, &chk_engine->ci_ranks);
3593+
rc = chk_prop_fetch(&ins->ci_prop, &ins->ci_ranks);
35913594
if (rc == -DER_NONEXIST)
35923595
rc = 0;
35933596
fini:
3594-
if (rc != 0)
3595-
chk_ins_fini(&chk_engine);
3597+
if (rc != 0) {
3598+
chk_ins_fini(&ins);
3599+
} else {
3600+
chk_report_seq_init(ins);
3601+
ins->ci_inited = 1;
3602+
ins->ci_pause = 0;
3603+
}
3604+
35963605
return rc;
35973606
}
35983607

35993608
void
3600-
chk_engine_fini(void)
3609+
chk_engine_cleanup(void)
36013610
{
3602-
chk_ins_fini(&chk_engine);
3611+
struct chk_instance *ins = chk_engine;
3612+
3613+
chk_ins_cleanup(ins);
3614+
D_ASSERT(d_list_empty(&ins->ci_pool_list));
36033615
}
36043616

36053617
int
@@ -3609,6 +3621,8 @@ chk_engine_pool_stop(uuid_t pool_uuid, bool destroy)
36093621
uint32_t phase;
36103622
int rc = 0;
36113623

3624+
CHK_IS_READY(chk_engine);
3625+
36123626
if (destroy) {
36133627
status = CHK__CHECK_POOL_STATUS__CPS_CHECKED;
36143628
phase = CHK__CHECK_SCAN_PHASE__CSP_DONE;
@@ -3624,3 +3638,15 @@ chk_engine_pool_stop(uuid_t pool_uuid, bool destroy)
36243638

36253639
return rc;
36263640
}
3641+
3642+
int
3643+
chk_engine_init(void)
3644+
{
3645+
return chk_ins_init(&chk_engine);
3646+
}
3647+
3648+
void
3649+
chk_engine_fini(void)
3650+
{
3651+
chk_ins_fini(&chk_engine);
3652+
}

src/chk/chk_internal.h

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,8 @@ uint32_t chk_pool_merge_status(uint32_t status_a, uint32_t status_b);
771771
void chk_ins_merge_info(uint32_t *status_dst, uint32_t status_src, uint32_t *phase_dst,
772772
uint32_t phase_src, uint64_t *gen_dst, uint64_t gen_src);
773773

774+
void chk_ins_cleanup(struct chk_instance *ins);
775+
774776
int chk_ins_init(struct chk_instance **p_ins);
775777

776778
void chk_ins_fini(struct chk_instance **p_ins);
@@ -806,7 +808,9 @@ int chk_engine_notify(struct chk_iv *iv);
806808

807809
void chk_engine_rejoin(void *args);
808810

809-
void chk_engine_pause(void);
811+
int chk_engine_setup(void);
812+
813+
void chk_engine_cleanup(void);
810814

811815
int chk_engine_init(void);
812816

@@ -833,7 +837,9 @@ int chk_leader_notify(struct chk_iv *iv);
833837
int chk_leader_rejoin(uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, int *pool_nr,
834838
uuid_t **pools);
835839

836-
void chk_leader_pause(void);
840+
int chk_leader_setup(void);
841+
842+
void chk_leader_cleanup(void);
837843

838844
int chk_leader_init(void);
839845

@@ -912,9 +918,16 @@ int chk_prop_update(struct chk_property *cpp, d_rank_list_t *rank_list);
912918

913919
int chk_traverse_pools(sys_db_trav_cb_t cb, void *args);
914920

915-
void chk_vos_init(void);
921+
void chk_vos_setup(void);
922+
923+
void chk_vos_cleanup(void);
924+
925+
#define CHK_IS_READY(ins) \
926+
do { \
927+
if (unlikely((ins)->ci_inited == 0)) \
928+
return -DER_UNINIT; \
929+
} while (0)
916930

917-
void chk_vos_fini(void);
918931
/* clang-format on */
919932

920933
static inline bool
@@ -1228,7 +1241,9 @@ chk_stop_sched(struct chk_instance *ins)
12281241
static inline int
12291242
chk_ins_can_start(struct chk_instance *ins)
12301243
{
1231-
if (unlikely(!ins->ci_inited))
1244+
CHK_IS_READY(ins);
1245+
1246+
if (!ins->ci_is_leader && ins->ci_rejoining)
12321247
return -DER_AGAIN;
12331248

12341249
if (ins->ci_starting)

0 commit comments

Comments
 (0)