Skip to content

Commit 4cd5b39

Browse files
Merge pull request #455 from wking/context-canceled-locked-task-graph
Bug 1873900: pkg/payload/task_graph: Avoid deadlocking on cancel with workCh queue
2 parents 02467f9 + 632e763 commit 4cd5b39

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

pkg/payload/task_graph.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -507,9 +507,18 @@ func RunGraph(ctx context.Context, graph *TaskGraph, maxParallelism int, fn func
507507
case <-ctx.Done():
508508
}
509509
case inflight > 0: // no work available to push; collect results
510-
result := <-resultCh
511-
results[result.index] = &result
512-
inflight--
510+
select {
511+
case result := <-resultCh:
512+
results[result.index] = &result
513+
inflight--
514+
case <-ctx.Done():
515+
select {
516+
case runTask := <-workCh: // workers canceled, so remove any work from the queue ourselves
517+
inflight--
518+
submitted[runTask.index] = false
519+
default:
520+
}
521+
}
513522
default: // no work to push and nothing in flight. We're done
514523
done = true
515524
}

pkg/payload/task_graph_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,26 @@ func TestRunGraph(t *testing.T) {
831831
}
832832
},
833833
},
834+
{
835+
name: "mid-task cancellation with work in queue does not deadlock",
836+
nodes: []*TaskNode{
837+
{Tasks: tasks("a1", "a2", "a3")},
838+
{Tasks: tasks("b")},
839+
},
840+
sleep: time.Millisecond,
841+
parallel: 1,
842+
errorOn: func(t *testing.T, name string, ctx context.Context, cancelFn func()) error {
843+
if err := ctx.Err(); err != nil {
844+
return err
845+
}
846+
if name == "a2" {
847+
cancelFn()
848+
}
849+
return nil
850+
},
851+
want: []string{"a1", "a2"},
852+
wantErrs: []string{"context canceled"},
853+
},
834854
{
835855
name: "task errors in parallel nodes both reported",
836856
nodes: []*TaskNode{

0 commit comments

Comments
 (0)