Skip to content

Commit cc57e0d

Browse files
committed
testsuite: add canceled job epilog double-booking test
Add test to t1024-alloc-check.t that ensures a second job can't be started on the same resources when a sched.cancel message is sent to qmanager during epilog.
1 parent 30a4ed9 commit cc57e0d

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

t/t1024-alloc-check.t

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,33 @@ test_expect_success 'some jobs received timeout exception' '
6060
test_expect_success 'no jobs received alloc-check exception' '
6161
test_must_fail grep "job.exception type=alloc-check" joberr2
6262
'
63+
test_expect_success 'clean up' '
64+
flux cancel --all &&
65+
flux queue idle &&
66+
(flux resource undrain 0 || true)
67+
'
68+
69+
send_sched_cancel() {
70+
local JOB_ID=$1
71+
shift
72+
flux python -c "import flux; from flux.job.JobID import id_parse; flux.Flux().rpc('sched.cancel', {'id': id_parse('$JOB_ID')})"
73+
}
74+
75+
# ensure sched.cancel doesn't free resources when an epilog is pending
76+
test_expect_success 'submit a job that cannot run, cancel it during epilog, submit another ' '
77+
(flux submit -N 1 --flags=waitable --wait-event epilog-start -c 4 /command/that/does/not/exist > ji1 || true ) &&
78+
send_sched_cancel $(cat ji1) &&
79+
flux submit -N 1 --exclusive hostname > ji2 &&
80+
(flux job wait-event $(cat ji1) epilog-finish || true) &&
81+
(flux job info $(cat ji1) eventlog | grep epilog-finish | jq ".timestamp" > time1) &&
82+
(flux job info $(cat ji2) eventlog | grep alloc | jq ".timestamp" > time2) &&
83+
awk -vt1=$(cat time1) -vt2=$(cat time2) "BEGIN {exit (t1 < t2) ? 0 : 1}"
84+
'
85+
6386
test_expect_success 'clean up' '
6487
cleanup_active_jobs
6588
'
89+
6690
test_expect_success 'remove fluxion modules' '
6791
remove_qmanager &&
6892
remove_resource

0 commit comments

Comments
 (0)