Skip to content

Commit 337577e

Browse files
committed
DAOS-18487 rebuild: disallow reint/extend when with DOWN targets
If not in delay_rebuild mode disallow reint/extend if with DOWN targets, user should try later after rebuild done. Features: rebuild Signed-off-by: Xuezhao Liu <xuezhao.liu@hpe.com>
1 parent 5a7d21c commit 337577e

File tree

1 file changed

+79
-1
lines changed

1 file changed

+79
-1
lines changed

src/pool/srv_pool.c

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* (C) Copyright 2016-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
* (C) Copyright 2025 Google LLC
55
*
66
* SPDX-License-Identifier: BSD-2-Clause-Patent
@@ -7920,6 +7920,68 @@ pool_discard(crt_context_t ctx, struct pool_svc *svc, struct pool_target_addr_li
79207920
return rc;
79217921
}
79227922

7923+
static bool
7924+
ds_pool_reint_extend_allowed(struct pool_svc *svc)
7925+
{
7926+
struct ds_pool *pool = svc->ps_pool;
7927+
struct pool_target *targets = NULL;
7928+
uint32_t down_tgts_cnt = 0;
7929+
uint64_t sys_self_heal = 0;
7930+
int rc;
7931+
bool ret = true;
7932+
7933+
ABT_rwlock_rdlock(pool->sp_lock);
7934+
rc = pool_map_find_down_tgts(pool->sp_map, &targets, &down_tgts_cnt);
7935+
ABT_rwlock_unlock(pool->sp_lock);
7936+
if (rc) {
7937+
DL_ERROR(rc, DF_UUID " pool_map_find_down_tgts failed", DP_UUID(pool->sp_uuid));
7938+
ret = false;
7939+
goto out;
7940+
}
7941+
if (targets == NULL || down_tgts_cnt == 0) {
7942+
D_DEBUG(DB_REBUILD, DF_UUID " no DOWN tgt, REINT/EXTEND allowed",
7943+
DP_UUID(pool->sp_uuid));
7944+
goto out;
7945+
}
7946+
7947+
if (pool->sp_self_heal & DAOS_SELF_HEAL_DELAY_REBUILD) {
7948+
D_DEBUG(DB_REBUILD,
7949+
DF_UUID " with DOWN tgt in delay_rebuild node, "
7950+
"REINT/EXTEND allowed",
7951+
DP_UUID(pool->sp_uuid));
7952+
goto out;
7953+
}
7954+
7955+
rc = ds_mgmt_get_self_heal_policy(pool_svc_abort_gshp, svc, &sys_self_heal);
7956+
if (rc != 0) {
7957+
DL_ERROR(rc, DF_UUID ": failed to get self-heal policy", DP_UUID(svc->ps_uuid));
7958+
/*
7959+
* Another PS replica might be able reach the MS. If I'm
7960+
* not the PS leader of the specified term, this
7961+
* rdb_resign call does nothing.
7962+
*/
7963+
rdb_resign(svc->ps_rsvc.s_db, svc->ps_rsvc.s_term);
7964+
ret = false;
7965+
goto out;
7966+
}
7967+
7968+
if (!(sys_self_heal & DS_MGMT_SELF_HEAL_POOL_REBUILD)) {
7969+
D_DEBUG(DB_REBUILD,
7970+
DF_UUID " with DOWN tgt, pool_rebuild disabled in "
7971+
"sys_self_heal, REINT/EXTEND allowed",
7972+
DP_UUID(pool->sp_uuid));
7973+
goto out;
7974+
}
7975+
7976+
ret = false;
7977+
D_INFO(DF_UUID " with %d DOWN tgts, REINT/EXTEND disallowed until rebuild done.",
7978+
DP_UUID(pool->sp_uuid), down_tgts_cnt);
7979+
7980+
out:
7981+
D_FREE(targets);
7982+
return ret;
7983+
}
7984+
79237985
void
79247986
ds_pool_extend_handler(crt_rpc_t *rpc)
79257987
{
@@ -7954,6 +8016,13 @@ ds_pool_extend_handler(crt_rpc_t *rpc)
79548016
if (rc != 0)
79558017
goto out;
79568018

8019+
if (!ds_pool_reint_extend_allowed(svc)) {
8020+
rc = -DER_BUSY;
8021+
DL_ERROR(rc, DF_UUID "pool extend not allowed, wait rebuild done and try later.",
8022+
DP_UUID(pool_uuid));
8023+
goto failed;
8024+
}
8025+
79578026
rc = pool_discard(rpc->cr_ctx, svc, &tgt_addr_list, false);
79588027
if (rc) {
79598028
DL_ERROR(rc, DF_UUID ": pool_discard failed.", DP_UUID(in->pei_op.pi_uuid));
@@ -8116,6 +8185,15 @@ pool_update_handler(crt_rpc_t *rpc, int handler_version)
81168185
goto out;
81178186

81188187
if (opc_get(rpc->cr_opc) == POOL_REINT) {
8188+
if (!ds_pool_reint_extend_allowed(svc)) {
8189+
rc = -DER_BUSY;
8190+
DL_ERROR(rc,
8191+
DF_UUID "pool reintegration not allowed, "
8192+
"wait rebuild done and try later.",
8193+
DP_UUID(in->pti_op.pi_uuid));
8194+
goto out_svc;
8195+
}
8196+
81198197
if (svc->ps_pool->sp_reint_mode == DAOS_REINT_MODE_DATA_SYNC) {
81208198
rc = pool_discard(rpc->cr_ctx, svc, &list, true);
81218199
} else if (svc->ps_pool->sp_reint_mode == DAOS_REINT_MODE_INCREMENTAL) {

0 commit comments

Comments
 (0)