Skip to content

Commit cd97fbb

Browse files
authored
DAOS-18355 chk: check leader waits all check engines before exited (#17315)
In old implementation, when the PS leader notifies the check leader that related pool has been checked, the check leader will mark such pool as 'done'. If all required pools have been marked as 'done', then the check leader will exit. But at that time, the check engine on related PS leader may not complete yet. There are something to be processed (such as restart pool server) after the checking the pool. The check engine will notify the check leader via CHK IV when exit. But the check leader does not wait such notification. Under such case, if someone tries to trigger new check instance, it will create new IV namespace. That will cause some check engines and the check leader to use different IV namespace, as to the CHK IV logic cannot recognize the leadership correctly. The patch adjust the leader exit logic: the leader scheduler needs to wait all check engines' notification before exit. Signed-off-by: Fan Yong <[email protected]>
1 parent e9c8283 commit cd97fbb

File tree

4 files changed

+41
-41
lines changed

4 files changed

+41
-41
lines changed

src/chk/chk_engine.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2022-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -1981,7 +1981,7 @@ chk_engine_sched(void *args)
19811981
D_GOTO(out, rc);
19821982
}
19831983

1984-
if (ins_phase > cbk->cb_phase) {
1984+
if (ins_phase != CHK_INVAL_PHASE && ins_phase > cbk->cb_phase) {
19851985
D_INFO(DF_ENGINE" on rank %u moves from phase %u to phase %u\n",
19861986
DP_ENGINE(ins), myrank, cbk->cb_phase, ins_phase);
19871987

src/chk/chk_internal.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2022-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -1173,6 +1173,14 @@ chk_pools_find_slowest(struct chk_instance *ins, int *done)
11731173
phase = cpr->cpr_bk.cb_phase;
11741174
}
11751175

1176+
/* All pools have been done, some check engines are still running, leader needs to wait. */
1177+
if (ins->ci_orphan_done && *done > 0 && !d_list_empty(&ins->ci_rank_list)) {
1178+
D_ASSERT(ins->ci_is_leader);
1179+
1180+
phase = CHK_INVAL_PHASE;
1181+
*done = 0;
1182+
}
1183+
11761184
return phase;
11771185
}
11781186

src/chk/chk_leader.c

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2022-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -226,9 +226,10 @@ static void
226226
chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_status,
227227
uint32_t pool_status, bool bcast)
228228
{
229-
struct chk_bookmark *cbk = &ins->ci_bk;
230-
struct chk_iv iv = { 0 };
231-
int rc = 0;
229+
struct chk_dead_rank *cdr;
230+
struct chk_bookmark *cbk = &ins->ci_bk;
231+
struct chk_iv iv = {0};
232+
int rc = 0;
232233

233234
ins->ci_sched_exiting = 1;
234235

@@ -237,8 +238,7 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu
237238
chk_pool_stop_all(ins, pool_status, NULL);
238239

239240
if ((bcast && ins_status == CHK__CHECK_INST_STATUS__CIS_FAILED) ||
240-
ins_status == CHK__CHECK_INST_STATUS__CIS_IMPLICATED ||
241-
unlikely(ins_status == CHK__CHECK_INST_STATUS__CIS_COMPLETED && !ins->ci_orphan_done)) {
241+
ins_status == CHK__CHECK_INST_STATUS__CIS_IMPLICATED || !ins->ci_orphan_done) {
242242
iv.ci_gen = cbk->cb_gen;
243243
iv.ci_phase = ins_phase != CHK_INVAL_PHASE ? ins_phase : cbk->cb_phase;
244244
iv.ci_ins_status = ins_status;
@@ -264,6 +264,10 @@ chk_leader_exit(struct chk_instance *ins, uint32_t ins_phase, uint32_t ins_statu
264264
DP_LEADER(ins), ins_status, DP_RC(rc));
265265
}
266266

267+
while ((cdr = d_list_pop_entry(&ins->ci_dead_ranks, struct chk_dead_rank, cdr_link)) !=
268+
NULL)
269+
D_FREE(cdr);
270+
267271
ins->ci_sched_exiting = 0;
268272
}
269273

@@ -305,31 +309,24 @@ chk_leader_post_repair(struct chk_instance *ins, struct chk_pool_rec *cpr,
305309
DP_UUID(cpr->cpr_uuid), rc);
306310
}
307311

308-
/*
309-
* If the operation failed and 'failout' is set, then do nothing here.
310-
* chk_leader_exit will handle all the IV and bookmark related things.
311-
*/
312-
if (*result == 0 || !(ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT)) {
313-
if (notify) {
314-
iv.ci_gen = cbk->cb_gen;
315-
uuid_copy(iv.ci_uuid, cpr->cpr_uuid);
316-
iv.ci_ins_status = ins->ci_bk.cb_ins_status;
317-
iv.ci_phase = cbk->cb_phase;
318-
iv.ci_pool_status = cbk->cb_pool_status;
319-
320-
/* Synchronously notify the engines that check on the pool got failure. */
321-
rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE,
322-
CRT_IV_SYNC_EAGER, true);
323-
D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO,
324-
DF_LEADER" notify the engines that the check for pool "
325-
DF_UUIDF" is done with status %u: rc = %d\n",
326-
DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), iv.ci_pool_status, rc);
327-
if (rc == 0)
328-
cpr->cpr_notified_exit = 1;
329-
}
312+
if (notify) {
313+
uuid_copy(iv.ci_uuid, cpr->cpr_uuid);
314+
iv.ci_gen = cbk->cb_gen;
315+
iv.ci_ins_status = ins->ci_bk.cb_ins_status;
316+
iv.ci_phase = cbk->cb_phase;
317+
iv.ci_pool_status = cbk->cb_pool_status;
318+
319+
rc = chk_iv_update(ins->ci_iv_ns, &iv, CRT_IV_SHORTCUT_NONE, CRT_IV_SYNC_EAGER,
320+
true);
321+
DL_CDEBUG(rc != 0, DLOG_WARN, DLOG_INFO, rc,
322+
DF_LEADER " notify engines that check pool " DF_UUIDF " done, status %u",
323+
DP_LEADER(ins), DP_UUID(cpr->cpr_uuid), iv.ci_pool_status);
324+
if (rc == 0)
325+
cpr->cpr_notified_exit = 1;
326+
}
330327

328+
if (!(ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_FAILOUT))
331329
*result = 0;
332-
}
333330

334331
if (update) {
335332
rc = chk_bk_update_leader(&ins->ci_bk);
@@ -2284,7 +2281,8 @@ chk_leader_sched(void *args)
22842281

22852282
ins_phase = chk_pools_find_slowest(ins, &done);
22862283

2287-
if (ins_phase >= CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS && !ins->ci_orphan_done &&
2284+
if (ins_phase != CHK_INVAL_PHASE &&
2285+
ins_phase >= CHK__CHECK_SCAN_PHASE__CSP_POOL_MBS && !ins->ci_orphan_done &&
22882286
!DAOS_FAIL_CHECK(DAOS_CHK_SYNC_ORPHAN_PROCESS)) {
22892287
iv.ci_gen = cbk->cb_gen;
22902288
iv.ci_phase = ins_phase;
@@ -2316,7 +2314,7 @@ chk_leader_sched(void *args)
23162314
D_GOTO(out, rc);
23172315
}
23182316

2319-
if (cbk->cb_phase == CHK_INVAL_PHASE || cbk->cb_phase < ins_phase) {
2317+
if (ins_phase != CHK_INVAL_PHASE && ins_phase > cbk->cb_phase) {
23202318
D_INFO(DF_LEADER" moves from phase %u to phase %u\n",
23212319
DP_LEADER(ins), cbk->cb_phase, ins_phase);
23222320

src/tests/suite/daos_cr.c

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2023-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -3427,12 +3427,6 @@ cr_fail_sync_orphan(void **state)
34273427
rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 0, NULL, NULL, NULL);
34283428
assert_rc_equal(rc, 0);
34293429

3430-
/* Check leader may be completed earlier than check engines in this case, double check. */
3431-
cr_ins_wait(0, NULL, &dci);
3432-
3433-
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
3434-
assert_rc_equal(rc, 0);
3435-
34363430
cr_debug_set_params(arg, 0);
34373431

34383432
rc = cr_mode_switch(false);

0 commit comments

Comments
 (0)