Skip to content

Commit bbbc63b

Browse files
committed
DAOS-18238 chk: handle CRT_EVS_GRPMOD event from CaRT PG - b26
To guarantee that the rank deatch event will not be omitted, related CR logic needs handle the event from both SWIM and CaRT PG, although there will be a lot of useless event callback. Test-tag: recovery Signed-off-by: Fan Yong <fan.yong@hpe.com>
1 parent a8854ec commit bbbc63b

File tree

2 files changed

+45
-4
lines changed

2 files changed

+45
-4
lines changed

src/chk/chk_iv.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,10 +288,16 @@ chk_iv_update(struct chk_instance *ins, struct chk_iv *iv, uint32_t shortcut, ui
288288
/* Wait chk_deak_rank_ult to sync the IV namespace. */
289289
while (ver == ins->ci_ns_ver && ins->ci_skip_oog == 0 && ins->ci_pause == 0) {
290290
dss_sleep(500);
291-
if (++wait_cnt % 40 == 0)
291+
if (++wait_cnt % 40 == 0) {
292292
D_WARN("CHK iv " DF_X64 "/" DF_X64 " is blocked because of DER_OOG "
293293
"for %d seconds.\n",
294294
iv->ci_gen, iv->ci_seq, wait_cnt / 2);
295+
/*
296+
* Let's retry IV in case of related dead rank recovered back before
297+
* being handled by chk_dead_rank_ult, although it is rare.
298+
*/
299+
break;
300+
}
295301
}
296302

297303
if (ins->ci_pause || ins->ci_skip_oog)

src/chk/chk_leader.c

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3258,10 +3258,17 @@ chk_leader_query(int pool_nr, uuid_t pools[], chk_query_head_cb_t head_cb,
32583258

32593259
while (ver == ins->ci_ns_ver && ins->ci_skip_oog == 0 && ins->ci_pause == 0) {
32603260
dss_sleep(500);
3261-
if (++wait_cnt % 40 == 0)
3261+
if (++wait_cnt % 40 == 0) {
32623262
D_WARN("Leader (" DF_X64 ") query is blocked because of %d for "
32633263
"about %d seconds.\n",
32643264
gen, rc, wait_cnt / 2);
3265+
/*
3266+
* Let's retry query in case of related dead rank recovered back
3267+
* before being handled by chk_dead_rank_ult, although it is rare.
3268+
*/
3269+
break;
3270+
}
3271+
32653272
if (rc != -DER_OOG)
32663273
break;
32673274
}
@@ -3771,7 +3778,7 @@ chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src
37713778

37723779
/* Ignore the event that is not applicable to current rank. */
37733780

3774-
if (src != CRT_EVS_SWIM)
3781+
if (src != CRT_EVS_SWIM && src != CRT_EVS_GRPMOD)
37753782
D_GOTO(out, rc = -DER_NOTAPPLICABLE);
37763783

37773784
if (type != CRT_EVT_DEAD && type != CRT_EVT_ALIVE)
@@ -3783,14 +3790,37 @@ chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src
37833790
D_GOTO(out, rc = -DER_NOMEM);
37843791

37853792
cdr->cdr_rank = rank;
3793+
} else if (d_list_empty(&ins->ci_dead_ranks)) {
3794+
D_GOTO(out, rc = -DER_NOTAPPLICABLE);
37863795
}
37873796

37883797
ABT_mutex_lock(ins->ci_abt_mutex);
37893798
if (cdr != NULL) {
3799+
struct chk_dead_rank *tmp;
3800+
37903801
/*
3791-
* The event may be triggered on non-system SX. Let's notify the leader scheduler
3802+
* The event may be triggered on non-system SX (SWIM). Let's ask chk_dead_rank_ult
37923803
* to handle that on system XS.
3804+
*
3805+
* The callback for one rank dead event maybe triggered twice from multiple source:
3806+
* SWIM and PG memberskip changes. Let's only add once into the ins->ci_dead_ranks.
3807+
*
3808+
* Generally, ins->ci_dead_ranks is very short. Then it is very fast to go through
3809+
* the whole list.
37933810
*/
3811+
d_list_for_each_entry(tmp, &ins->ci_dead_ranks, cdr_link) {
3812+
if (tmp->cdr_rank == rank) {
3813+
/* Repeated one, ignore it. */
3814+
D_FREE(cdr);
3815+
D_GOTO(unlock, rc = -DER_NOTAPPLICABLE);
3816+
}
3817+
3818+
if (tmp->cdr_rank > rank) {
3819+
d_list_add(&cdr->cdr_link, &tmp->cdr_link);
3820+
D_GOTO(unlock, rc = 0);
3821+
}
3822+
}
3823+
37943824
d_list_add_tail(&cdr->cdr_link, &ins->ci_dead_ranks);
37953825
} else {
37963826
/* Remove former non-handled dead rank from the list. */
@@ -3800,8 +3830,13 @@ chk_rank_event_cb(d_rank_t rank, uint64_t incarnation, enum crt_event_source src
38003830
D_FREE(cdr);
38013831
break;
38023832
}
3833+
3834+
if (cdr->cdr_rank > rank)
3835+
D_GOTO(unlock, rc = -DER_NOTAPPLICABLE);
38033836
}
38043837
}
3838+
3839+
unlock:
38053840
ABT_mutex_unlock(ins->ci_abt_mutex);
38063841

38073842
out:

0 commit comments

Comments
 (0)