Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 4 additions & 13 deletions src/chk/chk_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1073,8 +1073,7 @@ chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_p
}

int
chk_prop_prepare(d_rank_t leader, uint32_t flags, int phase,
uint32_t policy_nr, struct chk_policy *policies,
chk_prop_prepare(d_rank_t leader, uint32_t flags, uint32_t policy_nr, struct chk_policy *policies,
d_rank_list_t *ranks, struct chk_property *prop)
{
int rc = 0;
Expand All @@ -1086,11 +1085,8 @@ chk_prop_prepare(d_rank_t leader, uint32_t flags, int phase,
prop->cp_flags &= ~CHK__CHECK_FLAG__CF_FAILOUT;
if (flags & CHK__CHECK_FLAG__CF_NO_AUTO)
prop->cp_flags &= ~CHK__CHECK_FLAG__CF_AUTO;
prop->cp_flags |= flags & ~(CHK__CHECK_FLAG__CF_RESET |
CHK__CHECK_FLAG__CF_ORPHAN_POOL |
CHK__CHECK_FLAG__CF_NO_FAILOUT |
CHK__CHECK_FLAG__CF_NO_AUTO);
prop->cp_phase = phase;
prop->cp_flags |= flags & ~(CHK__CHECK_FLAG__CF_RESET | CHK__CHECK_FLAG__CF_ORPHAN_POOL |
CHK__CHECK_FLAG__CF_NO_FAILOUT | CHK__CHECK_FLAG__CF_NO_AUTO);
if (ranks != NULL)
prop->cp_rank_nr = ranks->rl_nr;

Expand Down Expand Up @@ -1240,12 +1236,7 @@ chk_ins_cleanup(struct chk_instance *ins)
chk_stop_sched(ins);
ins->ci_inited = 0;

chk_iv_ns_cleanup(&ins->ci_iv_ns);

if (ins->ci_iv_group != NULL) {
crt_group_secondary_destroy(ins->ci_iv_group);
ins->ci_iv_group = NULL;
}
chk_iv_ns_destroy(ins);
}

int
Expand Down
191 changes: 81 additions & 110 deletions src/chk/chk_engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -2045,9 +2045,8 @@ chk_engine_sched(void *args)

static int
chk_engine_start_prep(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *ranks,
uint32_t policy_nr, struct chk_policy *policies, int pool_nr,
uuid_t pools[], uint64_t gen, int phase, uint32_t api_flags,
d_rank_t leader, uint32_t flags)
uint32_t policy_nr, struct chk_policy *policies, int pool_nr, uuid_t pools[],
uint64_t gen, uint32_t api_flags, d_rank_t leader, uint32_t flags)
{
struct chk_traverse_pools_args ctpa = { 0 };
struct chk_bookmark *cbk = &ins->ci_bk;
Expand Down Expand Up @@ -2134,8 +2133,7 @@ chk_engine_start_prep(struct chk_instance *ins, uint32_t rank_nr, d_rank_t *rank

init:
if (!chk_is_on_leader(gen, leader, true)) {
rc = chk_prop_prepare(leader, api_flags, phase, policy_nr, policies, rank_list,
prop);
rc = chk_prop_prepare(leader, api_flags, policy_nr, policies, rank_list, prop);
if (rc != 0)
goto out;

Expand Down Expand Up @@ -2263,16 +2261,15 @@ chk_engine_pool_filter(uuid_t uuid, void *arg, int *phase)
int
chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t policy_nr,
struct chk_policy *policies, int pool_nr, uuid_t pools[], uint32_t api_flags,
int phase, d_rank_t leader, uint32_t flags, uuid_t iv_uuid,
uint32_t ns_ver, d_rank_t leader, uint32_t flags, uuid_t iv_uuid,
struct ds_pool_clues *clues)
{
struct chk_instance *ins = chk_engine;
struct chk_bookmark *cbk = &ins->ci_bk;
struct umem_attr uma = { 0 };
char uuid_str[DAOS_UUID_STR_SIZE];
d_rank_t myrank = dss_self_rank();
int rc;
int rc1;
struct chk_instance *ins = chk_engine;
struct chk_bookmark *cbk = &ins->ci_bk;
struct umem_attr uma = {0};
d_rank_t myrank = dss_self_rank();
int rc;
int rc1;

rc = chk_ins_can_start(ins);
if (rc != 0)
Expand All @@ -2294,12 +2291,7 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic
if (ins->ci_sched != ABT_THREAD_NULL)
ABT_thread_free(&ins->ci_sched);

chk_iv_ns_cleanup(&ins->ci_iv_ns);

if (ins->ci_iv_group != NULL) {
crt_group_secondary_destroy(ins->ci_iv_group);
ins->ci_iv_group = NULL;
}
chk_iv_ns_destroy(ins);

uma.uma_id = UMEM_CLASS_VMEM;

Expand All @@ -2313,27 +2305,20 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic
if (rc != 0)
goto out_tree;

rc = chk_engine_start_prep(ins, rank_nr, ranks, policy_nr, policies,
pool_nr, pools, gen, phase, api_flags, leader, flags);
rc = chk_engine_start_prep(ins, rank_nr, ranks, policy_nr, policies, pool_nr, pools, gen,
api_flags, leader, flags);
if (rc != 0)
goto out_tree;

if (chk_is_on_leader(gen, leader, true)) {
ins->ci_iv_ns = chk_leader_get_iv_ns();
if (unlikely(ins->ci_iv_ns == NULL))
goto out_tree;
D_ASSERT(ins->ci_iv_ns != NULL);

ins->ci_ns_ver = ns_ver;
} else {
uuid_unparse_lower(iv_uuid, uuid_str);
rc = crt_group_secondary_create(uuid_str, NULL, ins->ci_ranks, &ins->ci_iv_group);
rc = chk_iv_ns_create(ins, iv_uuid, leader, ns_ver);
if (rc != 0)
goto out_tree;

rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, iv_uuid, ins->ci_iv_group,
&ins->ci_iv_id, &ins->ci_iv_ns);
if (rc != 0)
goto out_group;

ds_iv_ns_update(ins->ci_iv_ns, leader, ins->ci_iv_ns->iv_master_term + 1);
}

uuid_copy(cbk->cb_iv_uuid, iv_uuid);
Expand Down Expand Up @@ -2367,30 +2352,26 @@ chk_engine_start(uint64_t gen, uint32_t rank_nr, d_rank_t *ranks, uint32_t polic
D_WARN(DF_ENGINE" failed to update engine bookmark: "DF_RC"\n",
DP_ENGINE(ins), DP_RC(rc1));
}
chk_iv_ns_cleanup(&ins->ci_iv_ns);
out_group:
if (ins->ci_iv_group != NULL) {
crt_group_secondary_destroy(ins->ci_iv_group);
ins->ci_iv_group = NULL;
}
chk_iv_ns_destroy(ins);
out_tree:
chk_destroy_pending_tree(ins);
chk_destroy_pool_tree(ins);
out_done:
ins->ci_starting = 0;
out_log:
if (rc >= 0) {
D_INFO(DF_ENGINE " %s on rank %u with api_flags %x, phase %d, leader %u, "
"flags %x, iv "DF_UUIDF": rc %d\n",
D_INFO(DF_ENGINE " %s on rank %u with api_flags %x, ns_ver %d, leader %u, "
"flags %x, iv " DF_UUIDF ": rc %d\n",
DP_ENGINE(ins), chk_is_ins_reset(ins, api_flags) ? "start" : "resume",
myrank, api_flags, phase, leader, flags, DP_UUID(iv_uuid), rc);
myrank, api_flags, ns_ver, leader, flags, DP_UUID(iv_uuid), rc);

chk_ranks_dump(ins->ci_ranks->rl_nr, ins->ci_ranks->rl_ranks);
chk_pools_dump(&ins->ci_pool_list, pool_nr, pools);
} else {
D_ERROR(DF_ENGINE" failed to start on rank %u with %d pools, api_flags %x, "
"phase %d, leader %u, flags %x, gen "DF_X64", iv "DF_UUIDF": "DF_RC"\n",
DP_ENGINE(ins), myrank, pool_nr, api_flags, phase, leader, flags, gen,
D_ERROR(DF_ENGINE " failed to start on rank %u with %d pools, api_flags %x, "
"ns_ver %d, leader %u, flags %x, gen " DF_X64 ", iv " DF_UUIDF
": " DF_RC "\n",
DP_ENGINE(ins), myrank, pool_nr, api_flags, ns_ver, leader, flags, gen,
DP_UUID(iv_uuid), DP_RC(rc));
}

Expand All @@ -2416,7 +2397,7 @@ chk_engine_stop(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *flags)
if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE)
D_GOTO(log, rc = -DER_NOTAPPLICABLE);

if (ins->ci_starting)
if (ins->ci_starting || ins->ci_rejoining)
D_GOTO(log, rc = -DER_BUSY);

if (ins->ci_stopping || ins->ci_sched_exiting)
Expand Down Expand Up @@ -2647,34 +2628,44 @@ chk_engine_query(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *ins_status
int
chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version)
{
struct chk_instance *ins = chk_engine;
struct chk_property *prop = &ins->ci_prop;
struct chk_bookmark *cbk = &ins->ci_bk;
d_rank_list_t *rank_list = NULL;
int rc = 0;
struct chk_instance *ins = chk_engine;
struct chk_property *prop = &ins->ci_prop;
struct chk_bookmark *cbk = &ins->ci_bk;
int rc = 0;

CHK_IS_READY(ins);

if (cbk->cb_gen != gen)
D_GOTO(out, rc = -DER_NOTAPPLICABLE);

rc = chk_prop_fetch(prop, &rank_list);
if (rc != 0)
goto out;
/* For check engine on the leader, reload rank list that has been refreshed by leader. */
if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true)) {
d_rank_list_free(ins->ci_ranks);
ins->ci_ranks = NULL;
}

D_ASSERT(rank_list != NULL);
if (ins->ci_ranks == NULL) {
rc = chk_prop_fetch(prop, &ins->ci_ranks);
if (rc != 0)
goto out;

/* For check engine on the leader, related rank has already been marked as "dead". */
if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true))
goto group;
/* For check engine on the leader, it's done. */
if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true)) {
ins->ci_ns_ver = version;
goto out;
}
}

if (unlikely(ins->ci_ranks == NULL))
D_GOTO(out, rc = -DER_NOTAPPLICABLE);

if (!chk_remove_rank_from_list(rank_list, rank))
if (!chk_remove_rank_from_list(ins->ci_ranks, rank))
D_GOTO(out, rc = -DER_NOTAPPLICABLE);

prop->cp_rank_nr--;
rc = chk_prop_update(prop, rank_list);
if (rc != 0)
goto out;
rc = chk_prop_update(prop, ins->ci_ranks);
if (rc == 0)
rc = chk_iv_ns_update(ins, version);

/*
* NOTE: If the rank dead before DAOS check start, then subsequent check start will
Expand All @@ -2695,19 +2686,7 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version)
* sometime later as the DAOS check going.
*/

group:
if (ins->ci_iv_group != NULL)
rc = crt_group_secondary_modify(ins->ci_iv_group, rank_list, rank_list,
CRT_GROUP_MOD_OP_REPLACE, version);

out:
if (rc == 0) {
d_rank_list_free(ins->ci_ranks);
ins->ci_ranks = rank_list;
rank_list = NULL;
}

d_rank_list_free(rank_list);
if (rc != -DER_NOTAPPLICABLE)
D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO,
DF_ENGINE" on rank %u mark rank %u as dead with gen "
Expand Down Expand Up @@ -3383,19 +3362,19 @@ chk_engine_notify(struct chk_iv *iv)
void
chk_engine_rejoin(void *args)
{
struct chk_instance *ins = chk_engine;
struct chk_property *prop = &ins->ci_prop;
struct chk_bookmark *cbk = &ins->ci_bk;
uuid_t *pools = NULL;
struct chk_iv iv = { 0 };
struct umem_attr uma = { 0 };
char uuid_str[DAOS_UUID_STR_SIZE];
d_rank_t myrank = dss_self_rank();
uint32_t pool_nr = 0;
uint32_t flags = 0;
int rc = 0;
int rc1;
bool need_join = false;
struct chk_instance *ins = chk_engine;
struct chk_property *prop = &ins->ci_prop;
struct chk_bookmark *cbk = &ins->ci_bk;
d_rank_list_t *ranks = NULL;
uuid_t *pools = NULL;
struct chk_iv iv = {0};
struct umem_attr uma = {0};
d_rank_t myrank = dss_self_rank();
uint32_t pool_nr = 0;
uint32_t flags = 0;
int rc = 0;
int rc1;
bool need_join = false;

if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE)
goto out_log;
Expand All @@ -3404,7 +3383,7 @@ chk_engine_rejoin(void *args)
cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_PAUSED)
goto out_log;

/* We do NOT support leader (and its associated engine ) to rejoin former check instance. */
/* We do NOT support leader (and its associated engine) to rejoin former check instance. */
if (chk_is_on_leader(cbk->cb_gen, prop->cp_leader, true))
goto out_log;

Expand Down Expand Up @@ -3439,22 +3418,10 @@ chk_engine_rejoin(void *args)
if (rc != 0)
goto out_tree;

uuid_unparse_lower(cbk->cb_iv_uuid, uuid_str);
rc = crt_group_secondary_create(uuid_str, NULL, ins->ci_ranks, &ins->ci_iv_group);
if (rc != 0)
goto out_tree;

rc = ds_iv_ns_create(dss_get_module_info()->dmi_ctx, cbk->cb_iv_uuid, ins->ci_iv_group,
&ins->ci_iv_id, &ins->ci_iv_ns);
if (rc != 0)
goto out_group;

ds_iv_ns_update(ins->ci_iv_ns, prop->cp_leader, ins->ci_iv_ns->iv_master_term + 1);

again:
/* Ask leader whether this engine can rejoin or not. */
rc = chk_rejoin_remote(prop->cp_leader, cbk->cb_gen, myrank, cbk->cb_iv_uuid, &flags,
&pool_nr, &pools);
&ins->ci_ns_ver, &pool_nr, &pools, &ranks);
if (rc != 0) {
if ((rc == -DER_OOG || rc == -DER_GRPVER) && !ins->ci_pause) {
D_INFO(DF_ENGINE" Someone is not ready %d, let's rejoin after 1 sec\n",
Expand All @@ -3464,14 +3431,22 @@ chk_engine_rejoin(void *args)
goto again;
}

goto out_iv;
goto out_tree;
}

if (pool_nr == 0) {
if (ranks == NULL || pool_nr == 0) {
need_join = false;
D_GOTO(out_iv, rc = 1);
D_GOTO(out_tree, rc = 1);
}

d_rank_list_free(ins->ci_ranks);
ins->ci_ranks = ranks;
ranks = NULL;

rc = chk_iv_ns_create(ins, cbk->cb_iv_uuid, prop->cp_leader, ins->ci_ns_ver);
if (rc != 0)
goto out_tree;

rc = chk_pools_load_list(ins, cbk->cb_gen, 0, pool_nr, pools, NULL);
if (rc != 0)
goto out_notify;
Expand Down Expand Up @@ -3515,17 +3490,13 @@ chk_engine_rejoin(void *args)
D_CDEBUG(rc1 != 0, DLOG_ERR, DLOG_INFO,
DF_ENGINE" on rank %u notify leader for its exit, status %u: rc1 = %d\n",
DP_ENGINE(ins), myrank, cbk->cb_ins_status, rc1);
out_iv:
chk_iv_ns_cleanup(&ins->ci_iv_ns);
out_group:
if (ins->ci_iv_group != NULL) {
crt_group_secondary_destroy(ins->ci_iv_group);
ins->ci_iv_group = NULL;
}
chk_iv_ns_destroy(ins);
out_tree:
chk_destroy_pending_tree(ins);
chk_destroy_pool_tree(ins);
out_log:
d_rank_list_free(ranks);
D_FREE(pools);
if (need_join)
D_CDEBUG(rc < 0, DLOG_ERR, DLOG_INFO,
DF_ENGINE" rejoin on rank %u with iv "DF_UUIDF": "DF_RC"\n",
Expand Down
Loading
Loading