Skip to content

Commit 8bf7c2f

Browse files
committed
testsuite: fix race in flux top tests
Problem: On rare occasions some tests in t2801-top-cmd.t fail when running large parallel runs of the testsuite. The suspicion is that the job-list module has not yet synced up to have knowledge of the jobs that have been submitted in the tests. Add a function to wait until jobs in t2801-top-cmd.t are known by job-list and have reached the state we want to ensure tests will pass.
1 parent b066e58 commit 8bf7c2f

File tree

1 file changed

+29
-4
lines changed

1 file changed

+29
-4
lines changed

t/t2801-top-cmd.t

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,26 @@ testssh="${SHARNESS_TEST_SRCDIR}/scripts/tssh"
1212

1313
export FLUX_URI_RESOLVE_LOCAL=t
1414

15+
# To ensure no raciness in tests below, ensure the job-list
16+
# module knows about submitted jobs in desired states
17+
JOB_WAIT_ITERS=100
18+
job_list_wait_state() {
19+
id=$1
20+
expected=$2
21+
local i=0
22+
while [ "$(flux jobs -no {state} ${id})" != "${expected}" ] \
23+
&& [ $i -lt ${JOB_WAIT_ITERS} ]
24+
do
25+
sleep 0.1
26+
i=$((i + 1))
27+
done
28+
if [ "$i" -eq "${JOB_WAIT_ITERS}" ]
29+
then
30+
return 1
31+
fi
32+
return 0
33+
}
34+
1535
test_expect_success 'flux-top -h prints custom usage' '
1636
flux top -h 2>usage &&
1737
grep "Usage:.*TARGET" usage
@@ -48,7 +68,8 @@ test_expect_success 'flux-top summary shows no jobs initially' '
4868
grep "0 failed" nojobs.out
4969
'
5070
test_expect_success 'run a test job to completion' '
51-
flux submit --wait -n1 flux start /bin/true >jobid
71+
flux submit --wait -n1 flux start /bin/true >jobid &&
72+
job_list_wait_state $(cat jobid) INACTIVE
5273
'
5374
test_expect_success 'flux-top summary shows one completed job' '
5475
nnodes=$(flux resource list --format="{nnodes}") &&
@@ -123,7 +144,8 @@ test_expect_success 'flux-top JOBID works' '
123144
test_expect_success 'submit non-batch job and wait for it to start' '
124145
flux submit -n1 \
125146
bash -c "touch job3-has-started && sleep 300" >jobid3 &&
126-
$waitfile job3-has-started
147+
$waitfile job3-has-started &&
148+
job_list_wait_state $(cat jobid3) RUN
127149
'
128150
test_expect_success 'flux-top shows 2 jobs running' '
129151
nnodes=$(flux resource list --format="{nnodes}") &&
@@ -242,8 +264,11 @@ test_expect_success 'configure queues and resource split amongst queues' '
242264
flux module load sched-simple
243265
'
244266
test_expect_success 'submit a bunch of jobs' '
245-
flux submit --cc=0-1 --queue=batch bash -c "sleep 300" &&
246-
flux submit --queue=debug sleep 300
267+
flux submit --cc=0-1 --queue=batch bash -c "sleep 300" > batch.ids &&
268+
flux submit --queue=debug sleep 300 > debug.ids &&
269+
job_list_wait_state $(head -n1 batch.ids) RUN &&
270+
job_list_wait_state $(tail -n1 batch.ids) RUN &&
271+
job_list_wait_state $(cat debug.ids) RUN
247272
'
248273
test_expect_success 'flux-top displays job queues' '
249274
$runpty -f asciicast -o queue.log flux top --test-exit &&

0 commit comments

Comments
 (0)