-
Notifications
You must be signed in to change notification settings - Fork 51
Description
Right now if a sub-test inside RunTest() panics, test run deadlocks, because done channel never gets closed, which causes cleanup from getBuildOutputStream() to block forever.
To reproduce the issue, apply the following patch:
diff --git test/linux_target_test.go test/linux_target_test.go
index 4bcb84d..998fbb6 100644
--- test/linux_target_test.go
+++ test/linux_target_test.go
@@ -1252,6 +1252,10 @@ WantedBy=multi-user.target
}
testEnv.RunTest(ctx, t, func(ctx context.Context, client gwclient.Client) {
+ t.Run("foo", func(t *testing.T) {
+ panic("foo")
+ })
+
req := newSolveRequest(withBuildTarget(testConfig.Target.Container), withSpec(ctx, t, spec))
solveT(ctx, t, client, req)
})Then run:
go test --timeout=10s ./test -run=TestRockylinux9/test_systemd_unit_single -vMinimal, independent reproducer:
package test
import (
"bufio"
"context"
"io"
"os"
"path/filepath"
"testing"
"github.com/moby/buildkit/client"
gwclient "github.com/moby/buildkit/frontend/gateway/client"
"github.com/moby/buildkit/util/progress/progressui"
)
func getBuildOutputStream(ctx context.Context, t *testing.T, done <-chan struct{}) io.Writer {
t.Helper()
f, err := os.OpenFile(filepath.Join(t.TempDir(), "build.log"), os.O_RDWR|os.O_CREATE, 0o600)
if err != nil {
t.Fatalf("Error opening temp file: %v", err)
}
t.Cleanup(func() {
defer f.Close()
select { // TODO: Blocks here.
case <-ctx.Done():
return
case <-done:
}
_, err = f.Seek(0, io.SeekStart)
if err != nil {
t.Log(err)
return
}
scanner := bufio.NewScanner(f)
for scanner.Scan() {
t.Log(scanner.Text())
}
if err := scanner.Err(); err != nil {
t.Log(err)
}
})
return f
}
func displaySolveStatus(ctx context.Context, t *testing.T) chan *client.SolveStatus {
ch := make(chan *client.SolveStatus)
done := make(chan struct{})
output := getBuildOutputStream(ctx, t, done)
display, err := progressui.NewDisplay(output, progressui.AutoMode, progressui.WithPhase(t.Name()))
if err != nil {
t.Fatal(err)
}
go func() {
defer close(done)
_, err := display.UpdateFrom(ctx, ch) // TODO: Blocks here.
if err != nil {
t.Log(err)
}
}()
return ch
}
func build(ch chan *client.SolveStatus, f func(ctx context.Context, gwc gwclient.Client)) {
f(nil, nil)
close(ch)
}
func fakeRunTest(ctx context.Context, t *testing.T, f func(ctx context.Context, gwc gwclient.Client)) {
ch := displaySolveStatus(ctx, t)
build(ch, f)
}
func TestReproducer(t *testing.T) {
ctx := context.Background()
t.Run("foo", func(t *testing.T) {
t.Parallel()
fakeRunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) {
t.Run("panicking-subtest", func(t *testing.T) {
panic("subtest error")
})
})
})
}IIUC, this is what happens:
- A sub-test created with
t.Runpanics. It runs in a separate goroutine as in https://cs.opensource.google/go/go/+/refs/tags/go1.25.5:src/testing/testing.go;l=1997. - On panic, Go runtime starts calling
deferfunction in reverse order to the calls, so first its test cleanup called: https://cs.opensource.google/go/go/+/refs/tags/go1.25.5:src/testing/testing.go;l=1926. - If I understand correctly and from what I have observed, all cleanup functions get called in revert order from being registered, here: https://cs.opensource.google/go/go/+/refs/tags/go1.25.5:src/testing/testing.go;l=1560 if there is no sub-tests or tests running in parallel.
Example:
func TestPanicPropagation(t *testing.T) {
t.Cleanup(func() {
fmt.Println("Grandparent cleanup called")
})
t.Run("parent", func(t *testing.T) {
t.Cleanup(func() {
fmt.Println("Parent cleanup called")
})
t.Run("panicking-test", func(t *testing.T) {
t.Cleanup(func() {
fmt.Println("Panic child cleanup called")
})
panic("panic")
})
t.Run("regular-test", func(t *testing.T) {
t.Cleanup(func() {
fmt.Println("Regular child cleanup called")
})
})
})
t.Run("parent-regular", func(t *testing.T) {
t.Cleanup(func() {
fmt.Println("Parent regular child cleanup called")
})
})
}Gives:
=== RUN TestPanicPropagation
=== RUN TestPanicPropagation/parent
=== RUN TestPanicPropagation/parent/panicking-test
Panic child cleanup called
Parent cleanup called
Grandparent cleanup called
--- FAIL: TestPanicPropagation (0.00s)
--- FAIL: TestPanicPropagation/parent (0.00s)
--- FAIL: TestPanicPropagation/parent/panicking-test (0.00s)
panic: panic [recovered, repanicked]
goroutine 22 [running]:
testing.tRunner.func1.2({0xc52420, 0xeeebe0})
testing/testing.go:1872 +0x237
testing.tRunner.func1()
testing/testing.go:1875 +0x35b
panic({0xc52420?, 0xeeebe0?})
runtime/panic.go:783 +0x132
command-line-arguments.TestPanicPropagation.func2.2(0xc000105180?)
./foo_test.go:102 +0x2f
testing.tRunner(0xc000105180, 0xe133c0)
testing/testing.go:1934 +0xea
created by testing.(*T).Run in goroutine 21
testing/testing.go:1997 +0x465
FAIL command-line-arguments 0.007s
FAILThis means first cleanup to be called is the one in getBuildOutputStream(), blocking until either done is closed or ctx is cancelled.
- While the cleanup is running,
donewould normally be closed bydisplaySolveStatus()oncechis closed byc.Build. However, that never happens asc.Buildblocks callingt.Run(), which panicked and is now running the stuck cleanups. This of course creates a deadlock.
Now that I think about it more, it's not a huge deal overall, since it will just cause the test suite to hang until timeout when panic inside RunTest combined with t.Run occurs, which is probably pretty rare, even though I ran into in pretty quickly. The tests are still going to fail, so we don't risk any false negatives here, just some annoyance.
Still, that seems like a bug in a RunTest. In #895, we explored various solutions.