Skip to content

Commit 8fabe6c

Browse files
committed
DAOS-18361 chk: handle CHK engine side inconsistency in parallel
On CHK engine side, most of inconsistencies can be handled in parallel. For each of them, create dedicated ULT to handle the inconsistency and report (including interaction) to CHK leader independently. So even if some ULT was blocked for some reason, such as waiting for interaction, it will not affect the other inconsistencies to be handled in parallel. Test-tag: recovery Signed-off-by: Fan Yong <fan.yong@hpe.com>
1 parent e741d4e commit 8fabe6c

File tree

5 files changed

+582
-182
lines changed

5 files changed

+582
-182
lines changed

src/chk/chk_common.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ chk_pool_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov,
7676
D_INIT_LIST_HEAD(&cpr->cpr_shutdown_link);
7777
D_INIT_LIST_HEAD(&cpr->cpr_shard_list);
7878
D_INIT_LIST_HEAD(&cpr->cpr_pending_list);
79+
D_INIT_LIST_HEAD(&cpr->cpr_ult_list);
7980
cpr->cpr_refs = 1;
8081
uuid_copy(cpr->cpr_uuid, cpb->cpb_uuid);
8182
cpr->cpr_thread = ABT_THREAD_NULL;
@@ -930,6 +931,27 @@ chk_pool_shard_cleanup(struct chk_instance *ins)
930931
}
931932
}
932933

934+
int
935+
chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr)
936+
{
937+
d_iov_t kiov;
938+
d_iov_t riov;
939+
int rc;
940+
941+
d_iov_set(&riov, NULL, 0);
942+
d_iov_set(&kiov, &seq, sizeof(seq));
943+
944+
ABT_rwlock_rdlock(ins->ci_abt_lock);
945+
rc = dbtree_lookup(ins->ci_pending_hdl, &kiov, &riov);
946+
ABT_rwlock_unlock(ins->ci_abt_lock);
947+
if (rc == 0)
948+
*cpr = (struct chk_pending_rec *)riov.iov_buf;
949+
else
950+
*cpr = NULL;
951+
952+
return rc;
953+
}
954+
933955
int
934956
chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid,
935957
uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options,
@@ -985,12 +1007,14 @@ chk_pending_del(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec *
9851007
d_iov_set(&kiov, &seq, sizeof(seq));
9861008

9871009
ABT_rwlock_wrlock(ins->ci_abt_lock);
988-
rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, &riov);
1010+
rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, cpr == NULL ? NULL : &riov);
9891011
ABT_rwlock_unlock(ins->ci_abt_lock);
990-
if (rc == 0)
991-
*cpr = (struct chk_pending_rec *)riov.iov_buf;
992-
else
993-
*cpr = NULL;
1012+
if (cpr != NULL) {
1013+
if (rc == 0)
1014+
*cpr = (struct chk_pending_rec *)riov.iov_buf;
1015+
else
1016+
*cpr = NULL;
1017+
}
9941018

9951019
D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG,
9961020
"Del pending record with gen "DF_X64", seq "DF_X64": "DF_RC"\n",

0 commit comments

Comments
 (0)