Skip to content

Commit c1bb861

Browse files
committed
DAOS-18328 test: Fix cat recovery failure for UCX.
Skip-build-leap15-icc: true Skip-build-el8-gcc: true Skip-func-test-vm: true Skip-unit-tests: true Skip-nlt: true Test-tag: pr daily_regression Test-provider: ucx+ud_x Skip-func-hw-test-medium-ucx-provider: false Skip-func-hw-test-medium-verbs-provider: true Signed-off-by: Joseph Moore <[email protected]>
1 parent 4d70c3b commit c1bb861

File tree

1 file changed

+44
-51
lines changed

1 file changed

+44
-51
lines changed

src/tests/suite/daos_cr.c

Lines changed: 44 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2023-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -314,13 +314,14 @@ cr_rank_exclude(test_arg_t *arg, struct test_pool *pool, int *rank, bool wait)
314314
/* The *rank is stopped, that may cause set_params to timeout, do not wait. */
315315
cr_debug_set_params_nowait(arg, 0);
316316

317-
print_message("CR: excluding the rank %d ...\n", *rank);
318-
rc = dmg_system_exclude_rank(dmg_config_file, *rank);
319317
if (rc == 0 && wait) {
320-
print_message("CR: sleep more than 30 seconds for the rank death event\n");
321-
sleep(60);
318+
print_message("CR: sleep 30 seconds for the rank death event\n");
319+
sleep(30);
322320
}
323321

322+
print_message("CR: excluding the rank %d ...\n", *rank);
323+
rc = dmg_system_exclude_rank(dmg_config_file, *rank);
324+
324325
return rc;
325326
}
326327

@@ -443,14 +444,12 @@ cr_cleanup(test_arg_t *arg, struct test_pool *pools, uint32_t nr)
443444
}
444445

445446
static void
446-
cr_ins_wait(uint32_t pool_nr, uuid_t uuids[], struct daos_check_info *dci, int pre_wait)
447+
cr_ins_wait(uint32_t pool_nr, uuid_t uuids[], struct daos_check_info *dci)
447448
{
448449
int rc;
449450
int i;
450451

451452
print_message("CR: waiting check instance ...\n");
452-
if (pre_wait)
453-
sleep(pre_wait);
454453

455454
for (i = 0; i < CR_WAIT_MAX; i++) {
456455
cr_dci_fini(dci);
@@ -1071,7 +1070,7 @@ test_verify_cont(test_arg_t *arg, struct test_pool *pool, struct test_cont *cont
10711070
rc = cr_check_start(TCSF_RESET, 1, &pool->pool_uuid, "CONT_NONEXIST_ON_PS:CIA_IGNORE");
10721071
assert_rc_equal(rc, 0);
10731072

1074-
cr_ins_wait(1, &pool->pool_uuid, &dci, 0);
1073+
cr_ins_wait(1, &pool->pool_uuid, &dci);
10751074

10761075
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
10771076
assert_rc_equal(rc, 0);
@@ -1137,7 +1136,7 @@ cr_start_specified(void **state)
11371136
rc = cr_check_start(TCSF_RESET, 2, uuids, NULL);
11381137
assert_rc_equal(rc, 0);
11391138

1140-
cr_ins_wait(1, &uuids[0], &dcis[0], 0);
1139+
cr_ins_wait(1, &uuids[0], &dcis[0]);
11411140

11421141
for (i = 1; i < 3; i++) {
11431142
rc = cr_check_query(1, &uuids[i], &dcis[i]);
@@ -1247,7 +1246,7 @@ cr_leader_interaction(void **state)
12471246
}
12481247
assert_rc_equal(rc, 0);
12491248

1250-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
1249+
cr_ins_wait(1, &pool.pool_uuid, &dci);
12511250

12521251
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
12531252
assert_rc_equal(rc, 0);
@@ -1338,7 +1337,7 @@ cr_engine_interaction(void **state)
13381337
}
13391338
assert_rc_equal(rc, 0);
13401339

1341-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
1340+
cr_ins_wait(1, &pool.pool_uuid, &dci);
13421341

13431342
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
13441343
assert_rc_equal(rc, 0);
@@ -1437,7 +1436,7 @@ cr_repair_forall_leader(void **state)
14371436
assert_rc_equal(rc, 0);
14381437

14391438
for (i = 0; i < 2; i++) {
1440-
cr_ins_wait(1, &pools[i].pool_uuid, &dci, 0);
1439+
cr_ins_wait(1, &pools[i].pool_uuid, &dci);
14411440

14421441
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
14431442
assert_rc_equal(rc, 0);
@@ -1554,7 +1553,7 @@ cr_repair_forall_engine(void **state)
15541553
assert_rc_equal(rc, 0);
15551554

15561555
for (i = 0; i < 2; i++) {
1557-
cr_ins_wait(1, &pools[i].pool_uuid, &dci, 0);
1556+
cr_ins_wait(1, &pools[i].pool_uuid, &dci);
15581557

15591558
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
15601559
assert_rc_equal(rc, 0);
@@ -1640,7 +1639,7 @@ cr_stop_leader_interaction(void **state)
16401639
rc = cr_check_stop(0, NULL);
16411640
assert_rc_equal(rc, 0);
16421641

1643-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
1642+
cr_ins_wait(1, &pool.pool_uuid, &dci);
16441643

16451644
rc = cr_ins_verify(&dci, TCIS_STOPPED);
16461645
assert_rc_equal(rc, 0);
@@ -1720,7 +1719,7 @@ cr_stop_engine_interaction(void **state)
17201719
rc = cr_check_stop(0, NULL);
17211720
assert_rc_equal(rc, 0);
17221721

1723-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
1722+
cr_ins_wait(1, &pool.pool_uuid, &dci);
17241723

17251724
rc = cr_ins_verify(&dci, TCIS_STOPPED);
17261725
assert_rc_equal(rc, 0);
@@ -1840,7 +1839,7 @@ cr_stop_specified(void **state)
18401839
}
18411840
assert_rc_equal(rc, 0);
18421841

1843-
cr_ins_wait(1, &uuids[2], &dcis[2], 0);
1842+
cr_ins_wait(1, &uuids[2], &dcis[2]);
18441843

18451844
rc = cr_ins_verify(&dcis[2], TCIS_COMPLETED);
18461845
assert_rc_equal(rc, 0);
@@ -1920,7 +1919,7 @@ cr_auto_reset(void **state)
19201919
rc = cr_check_start(TCSF_RESET, 0, NULL, "POOL_NONEXIST_ON_MS:CIA_IGNORE");
19211920
assert_rc_equal(rc, 0);
19221921

1923-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
1922+
cr_ins_wait(1, &pool.pool_uuid, &dci);
19241923

19251924
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
19261925
assert_rc_equal(rc, 0);
@@ -1953,7 +1952,7 @@ cr_auto_reset(void **state)
19531952
}
19541953
assert_rc_equal(rc, 0);
19551954

1956-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
1955+
cr_ins_wait(1, &pool.pool_uuid, &dci);
19571956

19581957
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
19591958
assert_rc_equal(rc, 0);
@@ -1964,7 +1963,7 @@ cr_auto_reset(void **state)
19641963
rc = cr_check_start(TCSF_NONE, 0, NULL, "POOL_NONEXIST_ON_MS:CIA_DEFAULT");
19651964
assert_rc_equal(rc, 0);
19661965

1967-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
1966+
cr_ins_wait(1, &pool.pool_uuid, &dci);
19681967

19691968
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
19701969
assert_rc_equal(rc, 0);
@@ -2170,7 +2169,7 @@ cr_leader_resume(void **state)
21702169
rc = cr_check_start(TCSF_NONE, 0, NULL, NULL);
21712170
assert_rc_equal(rc, 0);
21722171

2173-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
2172+
cr_ins_wait(1, &pool.pool_uuid, &dci);
21742173

21752174
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
21762175
assert_rc_equal(rc, 0);
@@ -2204,7 +2203,7 @@ cr_leader_resume(void **state)
22042203
rc = cr_check_start(TCSF_RESET, 0, NULL, NULL);
22052204
assert_rc_equal(rc, 0);
22062205

2207-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
2206+
cr_ins_wait(1, &pool.pool_uuid, &dci);
22082207

22092208
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
22102209
assert_rc_equal(rc, 0);
@@ -2295,7 +2294,7 @@ cr_engine_resume(void **state)
22952294
rc = cr_check_start(TCSF_NONE, 0, NULL, NULL);
22962295
assert_rc_equal(rc, 0);
22972296

2298-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
2297+
cr_ins_wait(1, &pool.pool_uuid, &dci);
22992298

23002299
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
23012300
assert_rc_equal(rc, 0);
@@ -2506,7 +2505,7 @@ cr_failout(void **state)
25062505
rc = cr_check_start(TCSF_FAILOUT | TCSF_RESET, 0, NULL, "POOL_BAD_LABEL:CIA_TRUST_PS");
25072506
assert_rc_equal(rc, 0);
25082507

2509-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
2508+
cr_ins_wait(1, &pool.pool_uuid, &dci);
25102509

25112510
rc = cr_ins_verify(&dci, TCIS_FAILED);
25122511
assert_rc_equal(rc, 0);
@@ -2517,7 +2516,7 @@ cr_failout(void **state)
25172516
rc = cr_check_start(TCSF_RESET | TCSF_NO_FAILOUT, 0, NULL, "POOL_BAD_LABEL:CIA_TRUST_PS");
25182517
assert_rc_equal(rc, 0);
25192518

2520-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
2519+
cr_ins_wait(1, &pool.pool_uuid, &dci);
25212520

25222521
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
25232522
assert_rc_equal(rc, 0);
@@ -2588,7 +2587,7 @@ cr_auto_repair(void **state)
25882587
rc = cr_check_start(TCSF_AUTO | TCSF_RESET, 0, NULL, "CONT_BAD_LABEL:CIA_TRUST_TARGET");
25892588
assert_rc_equal(rc, 0);
25902589

2591-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
2590+
cr_ins_wait(1, &pool.pool_uuid, &dci);
25922591

25932592
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
25942593
assert_rc_equal(rc, 0);
@@ -2660,7 +2659,7 @@ cr_orphan_pool(void **state)
26602659
rc = cr_check_start(TCSF_RESET, 1, &pools[0].pool_uuid, NULL);
26612660
assert_rc_equal(rc, 0);
26622661

2663-
cr_ins_wait(1, &pools[0].pool_uuid, &dci, 0);
2662+
cr_ins_wait(1, &pools[0].pool_uuid, &dci);
26642663

26652664
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
26662665
assert_rc_equal(rc, 0);
@@ -2676,7 +2675,7 @@ cr_orphan_pool(void **state)
26762675
rc = cr_check_start(TCSF_ORPHAN, 1, &pools[0].pool_uuid, NULL);
26772676
assert_rc_equal(rc, 0);
26782677

2679-
cr_ins_wait(1, &pools[1].pool_uuid, &dci, 0);
2678+
cr_ins_wait(1, &pools[1].pool_uuid, &dci);
26802679

26812680
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
26822681
assert_rc_equal(rc, 0);
@@ -2764,7 +2763,7 @@ cr_fail_ps_sync(void **state, bool leader)
27642763
rc = cr_check_stop(0, NULL);
27652764
assert_rc_equal(rc, 0);
27662765

2767-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
2766+
cr_ins_wait(1, &pool.pool_uuid, &dci);
27682767

27692768
rc = cr_ins_verify(&dci, TCIS_STOPPED);
27702769
assert_rc_equal(rc, 0);
@@ -2774,7 +2773,7 @@ cr_fail_ps_sync(void **state, bool leader)
27742773
rc = cr_check_start(TCSF_NONE, 0, NULL, NULL);
27752774
assert_rc_equal(rc, 0);
27762775

2777-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
2776+
cr_ins_wait(1, &pool.pool_uuid, &dci);
27782777

27792778
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
27802779
assert_rc_equal(rc, 0);
@@ -2916,7 +2915,7 @@ cr_engine_death(void **state)
29162915
}
29172916
assert_rc_equal(rc, 0);
29182917

2919-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
2918+
cr_ins_wait(1, &pool.pool_uuid, &dci);
29202919

29212920
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
29222921
assert_rc_equal(rc, 0);
@@ -3032,7 +3031,7 @@ cr_engine_rejoin_succ(void **state)
30323031
}
30333032
assert_rc_equal(rc, 0);
30343033

3035-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
3034+
cr_ins_wait(1, &pool.pool_uuid, &dci);
30363035

30373036
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
30383037
assert_rc_equal(rc, 0);
@@ -3043,7 +3042,7 @@ cr_engine_rejoin_succ(void **state)
30433042
rc = cr_check_start(TCSF_RESET, 0, NULL, NULL);
30443043
assert_rc_equal(rc, 0);
30453044

3046-
cr_ins_wait(0, NULL, &dci, 0);
3045+
cr_ins_wait(0, NULL, &dci);
30473046

30483047
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
30493048
assert_rc_equal(rc, 0);
@@ -3137,7 +3136,7 @@ cr_engine_rejoin_fail(void **state)
31373136
}
31383137
assert_rc_equal(rc, 0);
31393138

3140-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
3139+
cr_ins_wait(1, &pool.pool_uuid, &dci);
31413140

31423141
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
31433142
assert_rc_equal(rc, 0);
@@ -3156,7 +3155,7 @@ cr_engine_rejoin_fail(void **state)
31563155
"POOL_LESS_SVC_WITHOUT_QUORUM:CIA_DISCARD,POOL_NONEXIST_ON_MS:CIA_DISCARD");
31573156
assert_rc_equal(rc, 0);
31583157

3159-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
3158+
cr_ins_wait(1, &pool.pool_uuid, &dci);
31603159

31613160
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
31623161
assert_rc_equal(rc, 0);
@@ -3282,7 +3281,7 @@ cr_multiple_pools(void **state)
32823281
}
32833282
assert_rc_equal(rc, 0);
32843283

3285-
cr_ins_wait(1, &uuids[1], &dci, 0);
3284+
cr_ins_wait(1, &uuids[1], &dci);
32863285

32873286
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
32883287
assert_rc_equal(rc, 0);
@@ -3306,7 +3305,7 @@ cr_multiple_pools(void **state)
33063305
rc = cr_check_stop(0, NULL);
33073306
assert_rc_equal(rc, 0);
33083307

3309-
cr_ins_wait(1, &uuids[1], &dci, 0);
3308+
cr_ins_wait(1, &uuids[1], &dci);
33103309

33113310
rc = cr_ins_verify(&dci, TCIS_STOPPED);
33123311
assert_rc_equal(rc, 0);
@@ -3346,7 +3345,7 @@ cr_multiple_pools(void **state)
33463345
goto again;
33473346
}
33483347

3349-
cr_ins_wait(0, NULL, &dci, 0);
3348+
cr_ins_wait(0, NULL, &dci);
33503349

33513350
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
33523351
assert_rc_equal(rc, 0);
@@ -3380,7 +3379,7 @@ cr_multiple_pools(void **state)
33803379
}
33813380
assert_rc_equal(rc, 0);
33823381

3383-
cr_ins_wait(0, NULL, &dci, 0);
3382+
cr_ins_wait(0, NULL, &dci);
33843383

33853384
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
33863385
assert_rc_equal(rc, 0);
@@ -3430,20 +3429,14 @@ cr_fail_sync_orphan(void **state)
34303429
rc = cr_check_start(TCSF_RESET, 0, NULL, NULL);
34313430
assert_rc_equal(rc, 0);
34323431

3433-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
3432+
cr_ins_wait(1, &pool.pool_uuid, &dci);
34343433

34353434
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
34363435
assert_rc_equal(rc, 0);
34373436

34383437
rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_CHECKED, 0, NULL, NULL, NULL);
34393438
assert_rc_equal(rc, 0);
34403439

3441-
/* Check leader may be completed earlier than check engines in this case, double check. */
3442-
cr_ins_wait(0, NULL, &dci, 0);
3443-
3444-
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
3445-
assert_rc_equal(rc, 0);
3446-
34473440
cr_debug_set_params(arg, 0);
34483441

34493442
rc = cr_mode_switch(false);
@@ -3533,7 +3526,7 @@ cr_set_policy_after(void **state)
35333526
assert_rc_equal(rc, 0);
35343527

35353528
for (i = 0; i < 2; i++) {
3536-
cr_ins_wait(1, &pools[i].pool_uuid, &dci, 10);
3529+
cr_ins_wait(1, &pools[i].pool_uuid, &dci);
35373530

35383531
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
35393532
assert_rc_equal(rc, 0);
@@ -3607,7 +3600,7 @@ cr_handle_fail_pool1(void **state)
36073600
rc = cr_check_start(TCSF_RESET, 0, NULL, NULL);
36083601
assert_rc_equal(rc, 0);
36093602

3610-
cr_ins_wait(1, &pool.pool_uuid, &dci, 10);
3603+
cr_ins_wait(1, &pool.pool_uuid, &dci);
36113604

36123605
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
36133606
assert_rc_equal(rc, 0);
@@ -3690,7 +3683,7 @@ cr_handle_fail_pool2(void **state)
36903683
rc = cr_check_start(TCSF_RESET, 0, NULL, NULL);
36913684
assert_rc_equal(rc, 0);
36923685

3693-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
3686+
cr_ins_wait(1, &pool.pool_uuid, &dci);
36943687

36953688
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
36963689
assert_rc_equal(rc, 0);
@@ -3786,7 +3779,7 @@ cr_maintenance_mode(void **state)
37863779
rc = cr_check_start(TCSF_DRYRUN, 0, NULL, NULL);
37873780
assert_rc_equal(rc, 0);
37883781

3789-
cr_ins_wait(1, &pool.pool_uuid, &dci, 0);
3782+
cr_ins_wait(1, &pool.pool_uuid, &dci);
37903783

37913784
rc = cr_ins_verify(&dci, TCIS_COMPLETED);
37923785
assert_rc_equal(rc, 0);
@@ -3899,7 +3892,7 @@ static const struct CMUnitTest cr_tests[] = {
38993892
{ "CR20: check engine death during check",
39003893
cr_engine_death, async_disable, test_case_teardown},
39013894
{ "CR21: check engine rejoins check instance successfully",
3902-
cr_engine_rejoin_succ, async_disable, test_case_teardown},
3895+
cr_engine_rejoin_succ, async_disable, test_case_teardown},
39033896
{ "CR22: check engine fails to rejoin check instance",
39043897
cr_engine_rejoin_fail, async_disable, test_case_teardown},
39053898
{ "CR23: control multiple pools check start/stop sequence",

0 commit comments

Comments
 (0)