Skip to content

Commit 57701fa

Browse files
authored
DAOS-17111 cart: Fix csm_alive_count (#15945)
In swim, csm_alive_count may underflow because some cst->cst_state.sms_status changes in csm overlook the count. Moreover, not counting SUSPECT members seems to be a mistake. Consider a membership of three, {x, y, z}. If x enters a state where it can't receive any SWIM messages, and it picks y in the next period, then it will suspect y, causing csm_alive_count to drop from 3 to 2, which prevents x from declaring an "outage". (In the subsequent period, x will suspect z, causing csm_alive_count to drop from 2 to 1 quickly.) Since x keeps pinging SUSPECT members, it seems reasonable to count them in and expect them to send messages to x until they become DEAD. This patch fixes the underflow, and counts SUSPECT members in addition to ALIVE members in csm_alive_count (renamed to csm_alive_or_suspect_count). Signed-off-by: Li Wei <liwei@hpe.com>
1 parent 66d3573 commit 57701fa

File tree

2 files changed

+46
-19
lines changed

2 files changed

+46
-19
lines changed

src/cart/crt_swim.c

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* (C) Copyright 2019-2024 Intel Corporation.
3+
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
34
*
45
* SPDX-License-Identifier: BSD-2-Clause-Patent
56
*/
@@ -179,6 +180,12 @@ crt_swim_membs_iterate(struct crt_swim_membs *csm, d_hash_traverse_cb_t cb, void
179180
return d_hash_table_traverse(csm->csm_table, cb, arg);
180181
}
181182

183+
static inline bool
184+
crt_swim_status_alive_or_suspect(enum swim_member_status status)
185+
{
186+
return status == SWIM_MEMBER_ALIVE || status == SWIM_MEMBER_SUSPECT;
187+
}
188+
182189
/* Move cst into the csm. */
183190
static int
184191
crt_swim_membs_add(struct crt_swim_membs *csm, struct crt_swim_target *cst)
@@ -214,6 +221,9 @@ crt_swim_membs_add(struct crt_swim_membs *csm, struct crt_swim_target *cst)
214221
if (csm->csm_target == CRT_SWIM_TARGET_INVALID)
215222
csm->csm_target = 0;
216223

224+
if (crt_swim_status_alive_or_suspect(cst->cst_state.sms_status))
225+
csm->csm_alive_or_suspect_count++;
226+
217227
return 0;
218228
}
219229

@@ -256,6 +266,9 @@ crt_swim_membs_del(struct crt_swim_membs *csm, d_rank_t rank)
256266
deleted = d_hash_rec_delete_at(csm->csm_table, &cst->cst_link);
257267
D_ASSERT(deleted);
258268

269+
if (crt_swim_status_alive_or_suspect(cst->cst_state.sms_status))
270+
csm->csm_alive_or_suspect_count--;
271+
259272
return cst;
260273
}
261274

@@ -952,12 +965,12 @@ static int crt_swim_set_member_state(struct swim_context *ctx,
952965
crt_swim_csm_lock(csm);
953966
cst = crt_swim_membs_find(csm, id);
954967
if (cst != NULL && state->sms_incarnation >= cst->cst_state.sms_incarnation) {
955-
if (cst->cst_state.sms_status != SWIM_MEMBER_ALIVE &&
956-
state->sms_status == SWIM_MEMBER_ALIVE)
957-
csm->csm_alive_count++;
958-
else if (cst->cst_state.sms_status == SWIM_MEMBER_ALIVE &&
959-
state->sms_status != SWIM_MEMBER_ALIVE)
960-
csm->csm_alive_count--;
968+
if (!crt_swim_status_alive_or_suspect(cst->cst_state.sms_status) &&
969+
crt_swim_status_alive_or_suspect(state->sms_status))
970+
csm->csm_alive_or_suspect_count++;
971+
else if (crt_swim_status_alive_or_suspect(cst->cst_state.sms_status) &&
972+
!crt_swim_status_alive_or_suspect(state->sms_status))
973+
csm->csm_alive_or_suspect_count--;
961974
state_prev = cst->cst_state;
962975
cst->cst_state = *state;
963976
rc = 0;
@@ -1057,7 +1070,7 @@ static int64_t crt_swim_progress_cb(crt_context_t crt_ctx, int64_t timeout_us, v
10571070
* The max_delay should be less suspicion timeout to guarantee
10581071
* the already suspected members will not be expired.
10591072
*/
1060-
if (csm->csm_alive_count > 2) {
1073+
if (csm->csm_alive_or_suspect_count > 2) {
10611074
uint64_t hlc1 = csm->csm_last_unpack_hlc;
10621075
uint64_t hlc2 = d_hlc_get();
10631076
uint64_t delay = d_hlc2msec(hlc2 - hlc1);
@@ -1150,7 +1163,7 @@ int crt_swim_init(int crt_ctx_idx)
11501163

11511164
csm->csm_crt_ctx_idx = crt_ctx_idx;
11521165
csm->csm_last_unpack_hlc = hlc;
1153-
csm->csm_alive_count = 0;
1166+
csm->csm_alive_or_suspect_count = 0;
11541167
csm->csm_nglitches = 0;
11551168
csm->csm_nmessages = 0;
11561169
/*
@@ -1350,31 +1363,42 @@ void crt_swim_disable_all(void)
13501363
old_ctx_idx, NULL);
13511364
}
13521365

1366+
struct crt_swim_suspend_arg {
1367+
struct crt_swim_membs *csm;
1368+
swim_id_t self_id;
1369+
};
1370+
13531371
static int
1354-
crt_swim_suspend_cb(d_list_t *link, void *arg)
1372+
crt_swim_suspend_cb(d_list_t *link, void *varg)
13551373
{
1356-
struct crt_swim_target *cst = crt_swim_target_obj(link);
1357-
swim_id_t *self_id = arg;
1374+
struct crt_swim_target *cst = crt_swim_target_obj(link);
1375+
struct crt_swim_suspend_arg *arg = varg;
13581376

1359-
if (cst->cst_id != *self_id)
1377+
if (cst->cst_id != arg->self_id) {
1378+
if (crt_swim_status_alive_or_suspect(cst->cst_state.sms_status))
1379+
arg->csm->csm_alive_or_suspect_count--;
13601380
cst->cst_state.sms_status = SWIM_MEMBER_INACTIVE;
1381+
}
13611382
return 0;
13621383
}
13631384

13641385
void crt_swim_suspend_all(void)
13651386
{
1366-
struct crt_grp_priv *grp_priv = crt_gdata.cg_grp->gg_primary_grp;
1367-
struct crt_swim_membs *csm = &grp_priv->gp_membs_swim;
1368-
swim_id_t self_id;
1369-
int rc;
1387+
struct crt_grp_priv *grp_priv = crt_gdata.cg_grp->gg_primary_grp;
1388+
struct crt_swim_membs *csm = &grp_priv->gp_membs_swim;
1389+
struct crt_swim_suspend_arg arg;
1390+
int rc;
13701391

13711392
if (!crt_gdata.cg_swim_inited)
13721393
return;
13731394

13741395
csm->csm_ctx->sc_glitch = 1;
1375-
self_id = swim_self_get(csm->csm_ctx);
1396+
1397+
arg.csm = csm;
1398+
arg.self_id = swim_self_get(csm->csm_ctx);
1399+
13761400
crt_swim_csm_lock(csm);
1377-
rc = crt_swim_membs_iterate(csm, crt_swim_suspend_cb, &self_id);
1401+
rc = crt_swim_membs_iterate(csm, crt_swim_suspend_cb, &arg);
13781402
D_ASSERTF(rc == 0, "suspend SWIM members: "DF_RC"\n", DP_RC(rc));
13791403
crt_swim_csm_unlock(csm);
13801404
}
@@ -1625,6 +1649,8 @@ crt_swim_rank_check(struct crt_grp_priv *grp_priv, d_rank_t rank, uint64_t incar
16251649
if (cst->cst_state.sms_incarnation < incarnation) {
16261650
state_prev = cst->cst_state;
16271651
cst->cst_state.sms_incarnation = incarnation;
1652+
if (!crt_swim_status_alive_or_suspect(cst->cst_state.sms_status))
1653+
csm->csm_alive_or_suspect_count++;
16281654
cst->cst_state.sms_status = SWIM_MEMBER_ALIVE;
16291655
state = cst->cst_state;
16301656
updated = true;

src/cart/crt_swim.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* (C) Copyright 2019-2022 Intel Corporation.
3+
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
34
*
45
* SPDX-License-Identifier: BSD-2-Clause-Patent
56
*/
@@ -47,7 +48,7 @@ struct crt_swim_membs {
4748
struct swim_context *csm_ctx;
4849
uint64_t csm_incarnation;
4950
uint64_t csm_last_unpack_hlc;
50-
uint64_t csm_alive_count;
51+
uint64_t csm_alive_or_suspect_count;
5152
int csm_crt_ctx_idx;
5253
int csm_nglitches;
5354
int csm_nmessages;

0 commit comments

Comments
 (0)