diff --git a/src/internal/goexperiment/exp_goleakfindergc_off.go b/src/internal/goexperiment/exp_goleakfindergc_off.go new file mode 100644 index 00000000000000..1a141fd5b7cfc7 --- /dev/null +++ b/src/internal/goexperiment/exp_goleakfindergc_off.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build !goexperiment.goroutineleakfindergc + +package goexperiment + +const GoroutineLeakFinderGC = false +const GoroutineLeakFinderGCInt = 0 diff --git a/src/internal/goexperiment/exp_goleakfindergc_on.go b/src/internal/goexperiment/exp_goleakfindergc_on.go new file mode 100644 index 00000000000000..8c816645927656 --- /dev/null +++ b/src/internal/goexperiment/exp_goleakfindergc_on.go @@ -0,0 +1,8 @@ +// Code generated by mkconsts.go. DO NOT EDIT. + +//go:build goexperiment.goroutineleakfindergc + +package goexperiment + +const GoroutineLeakFinderGC = true +const GoroutineLeakFinderGCInt = 1 diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go index dd7a4f446c1a57..4261ab9ca99679 100644 --- a/src/internal/goexperiment/flags.go +++ b/src/internal/goexperiment/flags.go @@ -120,4 +120,7 @@ type Flags struct { // RandomizedHeapBase enables heap base address randomization on 64-bit // platforms. RandomizedHeapBase64 bool + + // GoroutineLeakFinderGC enables the Deadlock GC implementation. + GoroutineLeakFinderGC bool } diff --git a/src/net/http/pprof/pprof.go b/src/net/http/pprof/pprof.go index 635d3ad9d9f132..bfe6111fdf203d 100644 --- a/src/net/http/pprof/pprof.go +++ b/src/net/http/pprof/pprof.go @@ -351,25 +351,27 @@ func collectProfile(p *pprof.Profile) (*profile.Profile, error) { } var profileSupportsDelta = map[handler]bool{ - "allocs": true, - "block": true, - "goroutine": true, - "heap": true, - "mutex": true, - "threadcreate": true, + "allocs": true, + "block": true, + "goroutine": true, + "goroutineleak": true, + "heap": true, + "mutex": true, + "threadcreate": true, } var profileDescriptions = map[string]string{ - "allocs": "A sampling of all past memory allocations", - "block": "Stack traces that led to blocking on synchronization primitives", - "cmdline": "The command line invocation of the current program", - "goroutine": "Stack traces of all current goroutines. Use debug=2 as a query parameter to export in the same format as an unrecovered panic.", - "heap": "A sampling of memory allocations of live objects. You can specify the gc GET parameter to run GC before taking the heap sample.", - "mutex": "Stack traces of holders of contended mutexes", - "profile": "CPU profile. You can specify the duration in the seconds GET parameter. After you get the profile file, use the go tool pprof command to investigate the profile.", - "symbol": "Maps given program counters to function names. Counters can be specified in a GET raw query or POST body, multiple counters are separated by '+'.", - "threadcreate": "Stack traces that led to the creation of new OS threads", - "trace": "A trace of execution of the current program. You can specify the duration in the seconds GET parameter. After you get the trace file, use the go tool trace command to investigate the trace.", + "allocs": "A sampling of all past memory allocations", + "block": "Stack traces that led to blocking on synchronization primitives", + "cmdline": "The command line invocation of the current program", + "goroutine": "Stack traces of all current goroutines. Use debug=2 as a query parameter to export in the same format as an unrecovered panic.", + "goroutineleak": "Stack traces of all leaked goroutines. Use debug=2 as a query parameter to export in the same format as an unrecovered panic.", + "heap": "A sampling of memory allocations of live objects. You can specify the gc GET parameter to run GC before taking the heap sample.", + "mutex": "Stack traces of holders of contended mutexes", + "profile": "CPU profile. You can specify the duration in the seconds GET parameter. After you get the profile file, use the go tool pprof command to investigate the profile.", + "symbol": "Maps given program counters to function names. Counters can be specified in a GET raw query or POST body, multiple counters are separated by '+'.", + "threadcreate": "Stack traces that led to the creation of new OS threads", + "trace": "A trace of execution of the current program. You can specify the duration in the seconds GET parameter. After you get the trace file, use the go tool trace command to investigate the trace.", } type profileEntry struct { diff --git a/src/runtime/chan.go b/src/runtime/chan.go index bb554ebfdb1f3a..3fe5d635333a3f 100644 --- a/src/runtime/chan.go +++ b/src/runtime/chan.go @@ -263,11 +263,11 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { } // No stack splits between assigning elem and enqueuing mysg // on gp.waiting where copystack can find it. - mysg.elem = ep + mysg.elem.set(ep) mysg.waitlink = nil mysg.g = gp mysg.isSelect = false - mysg.c = c + mysg.c.set(c) gp.waiting = mysg gp.param = nil c.sendq.enqueue(mysg) @@ -298,7 +298,7 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { if mysg.releasetime > 0 { blockevent(mysg.releasetime-t0, 2) } - mysg.c = nil + mysg.c.set(nil) releaseSudog(mysg) if closed { if c.closed == 0 { @@ -336,9 +336,9 @@ func send(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { c.sendx = c.recvx // c.sendx = (c.sendx+1) % c.dataqsiz } } - if sg.elem != nil { + if sg.elem.get() != nil { sendDirect(c.elemtype, sg, ep) - sg.elem = nil + sg.elem.set(nil) } gp := sg.g unlockf() @@ -395,7 +395,7 @@ func sendDirect(t *_type, sg *sudog, src unsafe.Pointer) { // Once we read sg.elem out of sg, it will no longer // be updated if the destination's stack gets copied (shrunk). // So make sure that no preemption points can happen between read & use. - dst := sg.elem + dst := sg.elem.get() typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.Size_) // No need for cgo write barrier checks because dst is always // Go memory. @@ -406,7 +406,7 @@ func recvDirect(t *_type, sg *sudog, dst unsafe.Pointer) { // dst is on our stack or the heap, src is on another stack. // The channel is locked, so src will not move during this // operation. - src := sg.elem + src := sg.elem.get() typeBitsBulkBarrier(t, uintptr(dst), uintptr(src), t.Size_) memmove(dst, src, t.Size_) } @@ -441,9 +441,9 @@ func closechan(c *hchan) { if sg == nil { break } - if sg.elem != nil { - typedmemclr(c.elemtype, sg.elem) - sg.elem = nil + if sg.elem.get() != nil { + typedmemclr(c.elemtype, sg.elem.get()) + sg.elem.set(nil) } if sg.releasetime != 0 { sg.releasetime = cputicks() @@ -463,7 +463,7 @@ func closechan(c *hchan) { if sg == nil { break } - sg.elem = nil + sg.elem.set(nil) if sg.releasetime != 0 { sg.releasetime = cputicks() } @@ -642,13 +642,13 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) } // No stack splits between assigning elem and enqueuing mysg // on gp.waiting where copystack can find it. - mysg.elem = ep + mysg.elem.set(ep) mysg.waitlink = nil gp.waiting = mysg mysg.g = gp mysg.isSelect = false - mysg.c = c + mysg.c.set(c) gp.param = nil c.recvq.enqueue(mysg) if c.timer != nil { @@ -680,7 +680,7 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) } success := mysg.success gp.param = nil - mysg.c = nil + mysg.c.set(nil) releaseSudog(mysg) return true, success } @@ -727,14 +727,14 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { typedmemmove(c.elemtype, ep, qp) } // copy data from sender to queue - typedmemmove(c.elemtype, qp, sg.elem) + typedmemmove(c.elemtype, qp, sg.elem.get()) c.recvx++ if c.recvx == c.dataqsiz { c.recvx = 0 } c.sendx = c.recvx // c.sendx = (c.sendx+1) % c.dataqsiz } - sg.elem = nil + sg.elem.set(nil) gp := sg.g unlockf() gp.param = unsafe.Pointer(sg) diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go index 2db86e0562d6ae..156e072a12a6d0 100644 --- a/src/runtime/crash_test.go +++ b/src/runtime/crash_test.go @@ -186,6 +186,23 @@ func buildTestProg(t *testing.T, binary string, flags ...string) (string, error) t.Logf("running %v", cmd) cmd.Dir = "testdata/" + binary cmd = testenv.CleanCmdEnv(cmd) + + // Add the goroutineleakfindergc GOEXPERIMENT unconditionally since some tests depend on it. + // TODO(61405): Remove this once it's enabled by default. + // + // FIXME: Remove this once profiling is enabled and goroutineleakfindergc experiment is phased out. + edited := false + for i := range cmd.Env { + e := cmd.Env[i] + if _, vars, ok := strings.Cut(e, "GOEXPERIMENT="); ok { + cmd.Env[i] = "GOEXPERIMENT=" + vars + ",goroutineleakfindergc" + edited = true + } + } + if !edited { + cmd.Env = append(cmd.Env, "GOEXPERIMENT=goroutineleakfindergc") + } + out, err := cmd.CombinedOutput() if err != nil { target.err = fmt.Errorf("building %s %v: %v\n%s", binary, flags, err, out) diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 1f55717f0a1a60..466e6eb7ab31ae 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1221,7 +1221,7 @@ func (t *SemTable) Enqueue(addr *uint32) { s.releasetime = 0 s.acquiretime = 0 s.ticket = 0 - t.semTable.rootFor(addr).queue(addr, s, false) + t.semTable.rootFor(addr).queue(addr, s, false, false) } // Dequeue simulates dequeuing a waiter for a semaphore (or lock) at addr. diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 0a1e01cbcf9d7c..1e78970b4968ba 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -14,6 +14,7 @@ import ( "math/rand" "os" "reflect" + "regexp" "runtime" "runtime/debug" "slices" @@ -1095,3 +1096,570 @@ func TestDetectFinalizerAndCleanupLeaks(t *testing.T) { t.Fatalf("expected %d symbolized locations, got:\n%s", wantSymbolizedLocations, got) } } + +// This tests the goroutine leak garbage collector. +func TestGoroutineLeakGC(t *testing.T) { + // Goroutine leak test case. + // + // Test cases can be configured with test name, the name of the entry point function, + // a set of expected leaks identified by regular expressions, and the number of times + // the test should be repeated. + // + // Repetitions are used to amortize flakiness in some tests. + type testCase struct { + name string + simple bool + expectedLeaks map[*regexp.Regexp]bool + + // flakyLeaks are goroutine leaks that are too flaky to be reliably detected. + // Still, they might pop up every once in a while. + // If these occur, do not fail the test due to unexpected leaks. + flakyLeaks map[*regexp.Regexp]struct{} + } + + // makeAnyTest is a short-hand for creating test cases. + // Each of the leaks in the list is identified by a regular expression. + // If a leak is flaky, it is added to the flakyLeaks map. + makeAnyTest := func( + name string, + flaky bool, + leaks ...string) testCase { + tc := testCase{ + name: name, + expectedLeaks: make(map[*regexp.Regexp]bool, len(leaks)), + flakyLeaks: make(map[*regexp.Regexp]struct{}, len(leaks)), + } + + for _, leak := range leaks { + if !flaky { + tc.expectedLeaks[regexp.MustCompile(leak)] = false + } else { + tc.flakyLeaks[regexp.MustCompile(leak)] = struct{}{} + } + } + + return tc + } + + // makeTest is a short-hand for creating non-flaky test cases. + makeTest := func(name string, leaks ...string) testCase { + tcase := makeAnyTest(name, false, leaks...) + tcase.simple = true + return tcase + } + + // makeFlakyTest is a short-hand for creating flaky test cases. + makeFlakyTest := func(name string, leaks ...string) testCase { + return makeAnyTest(name, true, leaks...) + } + + goroutineHeader := regexp.MustCompile(`goroutine \d+ \[`) + + // extractLeaks takes the output of a test and splits it into a + // list of strings denoting goroutine leaks. + // + // If the input is: + // + // goroutine 1 [wait reason (leaked)]: + // main.leaked() + // ./testgoroutineleakgc/foo.go:37 +0x100 + // created by main.main() + // ./testgoroutineleakgc/main.go:10 +0x20 + // + // goroutine 2 [wait reason (leaked)]: + // main.leaked2() + // ./testgoroutineleakgc/foo.go:37 +0x100 + // created by main.main() + // ./testgoroutineleakgc/main.go:10 +0x20 + // + // The output is (as a list of strings): + // + // leaked() [wait reason] + // leaked2() [wait reason] + extractLeaks := func(output string) []string { + stacks := strings.Split(output, "\n\ngoroutine") + var leaks []string + for _, stack := range stacks { + lines := strings.Split(stack, "\n") + if len(lines) < 5 { + // Expecting at least the following lines (where n=len(lines)-1): + // + // [0] goroutine n [wait reason (leaked)] + // ... + // [n-3] bottom.leak.frame(...) + // [n-2] ./bottom/leak/frame/source.go:line + // [n-1] created by go.instruction() + // [n] ./go/instruction/source.go:line + continue + } + + if !strings.Contains(lines[0], "(leaked)") { + // Ignore non-leaked goroutines. + continue + } + + // Get the wait reason from the goroutine header. + header := lines[0] + waitReason := goroutineHeader.ReplaceAllString(header, "[") + waitReason = strings.ReplaceAll(waitReason, " (leaked)", "") + + // Get the function name from the stack trace (should be two lines above `created by`). + var funcName string + for i := len(lines) - 1; i >= 0; i-- { + if strings.Contains(lines[i], "created by") { + funcName = strings.TrimPrefix(lines[i-2], "main.") + break + } + } + if funcName == "" { + t.Fatalf("failed to extract function name from stack trace: %s", lines) + } + + leaks = append(leaks, funcName+" "+waitReason) + } + return leaks + } + + // Micro tests involve very simple leaks for each type of concurrency primitive operation. + microTests := []testCase{ + makeTest("NilRecv", + `NilRecv\.func1\(.* \[chan receive \(nil chan\)\]`, + ), + makeTest("NilSend", + `NilSend\.func1\(.* \[chan send \(nil chan\)\]`, + ), + makeTest("SelectNoCases", + `SelectNoCases\.func1\(.* \[select \(no cases\)\]`, + ), + makeTest("ChanRecv", + `ChanRecv\.func1\(.* \[chan receive\]`, + ), + makeTest("ChanSend", + `ChanSend\.func1\(.* \[chan send\]`, + ), + makeTest("Select", + `Select\.func1\(.* \[select\]`, + ), + makeTest("WaitGroup", + `WaitGroup\.func1\(.* \[sync\.WaitGroup\.Wait\]`, + ), + makeTest("MutexStack", + `MutexStack\.func1\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("MutexHeap", + `MutexHeap\.func1.1\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("Cond", + `Cond\.func1\(.* \[sync\.Cond\.Wait\]`, + ), + makeTest("RWMutexRLock", + `RWMutexRLock\.func1\(.* \[sync\.RWMutex\.RLock\]`, + ), + makeTest("RWMutexLock", + `RWMutexLock\.func1\(.* \[sync\.(RW)?Mutex\.Lock\]`, + ), + makeTest("Mixed", + `Mixed\.func1\(.* \[sync\.WaitGroup\.Wait\]`, + `Mixed\.func1.1\(.* \[chan send\]`, + ), + makeTest("NoLeakGlobal"), + } + + // Stress tests are flaky and we do not strictly care about their output. + // They are only intended to stress the goroutine leak detector and profiling + // infrastructure in interesting ways. + stressTestCases := []testCase{ + makeFlakyTest("SpawnGC", + `spawnGC.func1\(.* \[chan receive\]`, + ), + } + + // Common goroutine leak patterns. + // Extracted from "Unveiling and Vanquishing Goroutine Leaks in Enterprise Microservices: A Dynamic Analysis Approach" + // doi:10.1109/CGO57630.2024.10444835 + patternTestCases := []testCase{ + makeTest("NoCloseRange", + `noCloseRange\(.* \[chan send\]`, + `noCloseRange\.func1\(.* \[chan receive\]`, + ), + makeTest("MethodContractViolation", + `worker\.Start\.func1\(.* \[select\]`, + ), + makeTest("DoubleSend", + `DoubleSend\.func3\(.* \[chan send\]`, + ), + makeTest("EarlyReturn", + `earlyReturn\.func1\(.* \[chan send\]`, + ), + makeTest("NCastLeak", + `nCastLeak\.func1\(.* \[chan send\]`, + `NCastLeak\.func2\(.* \[chan receive\]`, + ), + makeTest("Timeout", + // (vsaioc): Timeout is *theoretically* flaky, but the + // pseudo-random choice for select case branches makes it + // practically impossible for it to fail. + `timeout\.func1\(.* \[chan send\]`, + ), + } + + // GoKer tests from "GoBench: A Benchmark Suite of Real-World Go Concurrency Bugs". + // White paper found at https://lujie.ac.cn/files/papers/GoBench.pdf + // doi:10.1109/CGO51591.2021.9370317. + // + // This list is curated for tests that are not excessively flaky. + // Some tests are also excluded because they are redundant. + // + // TODO(vsaioc): Some of these might be removable (their patterns may overlap). + gokerTestCases := []testCase{ + makeTest("Cockroach584", + `Cockroach584\.func2\(.* \[sync\.Mutex\.Lock\]`, + ), + makeFlakyTest("Cockroach1055", + `Cockroach1055\.func2\(.* \[chan receive\]`, + `Cockroach1055\.func2\.2\(.* \[sync\.WaitGroup\.Wait\]`, + `Cockroach1055\.func2\.1\(.* \[chan receive\]`, + `Cockroach1055\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, + ), + makeFlakyTest("Cockroach1462", + `\(\*Stopper_cockroach1462\)\.RunWorker\.func1\(.* \[chan send\]`, + `Cockroach1462\.func2\(.* \[sync\.WaitGroup\.Wait\]`, + ), + makeFlakyTest("Cockroach2448", + `\(\*Store_cockroach2448\)\.processRaft\(.* \[select\]`, + `\(\*state_cockroach2448\)\.start\(.* \[select\]`, + ), + makeFlakyTest("Cockroach3710", + `\(\*Store_cockroach3710\)\.ForceRaftLogScanAndProcess\(.* \[sync\.RWMutex\.RLock\]`, + `\(\*Store_cockroach3710\)\.processRaft\.func1\(.* \[sync\.RWMutex\.Lock\]`, + ), + makeFlakyTest("Cockroach6181", + `testRangeCacheCoalescedRequests_cockroach6181\(.* \[sync\.WaitGroup\.Wait\]`, + `testRangeCacheCoalescedRequests_cockroach6181\.func1\.1\(.* \[sync\.(RW)?Mutex\.Lock\]`, + `testRangeCacheCoalescedRequests_cockroach6181\.func1\.1\(.* \[sync\.RWMutex\.RLock\]`, + ), + makeFlakyTest("Cockroach7504", + `Cockroach7504\.func2\.1.* \[sync\.Mutex\.Lock\]`, + `Cockroach7504\.func2\.2.* \[sync\.Mutex\.Lock\]`, + ), + makeFlakyTest("Cockroach9935", + `\(\*loggingT_cockroach9935\)\.outputLogEntry\(.* \[sync\.Mutex\.Lock\]`, + ), + makeFlakyTest("Cockroach10214", + `Cockroach10214\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, + `Cockroach10214\.func2\.2\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("Cockroach10790", + `\(\*Replica_cockroach10790\)\.beginCmds\.func1\(.* \[chan receive\]`, + ), + makeTest("Cockroach13197", + `\(\*Tx_cockroach13197\)\.awaitDone\(.* \[chan receive\]`, + ), + makeTest("Cockroach13755", + `\(\*Rows_cockroach13755\)\.awaitDone\(.* \[chan receive\]`, + ), + makeFlakyTest("Cockroach16167", + `Cockroach16167\.func2\(.* \[sync\.RWMutex\.RLock\]`, + `\(\*Executor_cockroach16167\)\.Start\(.* \[sync\.RWMutex\.Lock\]`, + ), + makeFlakyTest("Cockroach18101", + `restore_cockroach18101\.func1\(.* \[chan send\]`, + ), + makeTest("Cockroach24808", + `Cockroach24808\.func2\(.* \[chan send\]`, + ), + makeTest("Cockroach25456", + `Cockroach25456\.func2\(.* \[chan receive\]`, + ), + makeTest("Cockroach35073", + `Cockroach35073\.func2.1\(.* \[chan send\]`, + `Cockroach35073\.func2\(.* \[chan send\]`, + ), + makeTest("Cockroach35931", + `Cockroach35931\.func2\(.* \[chan send\]`, + ), + makeTest("Etcd5509", + `Etcd5509\.func2\(.* \[sync\.RWMutex\.Lock\]`, + ), + makeTest("Etcd6708", + `Etcd6708\.func2\(.* \[sync\.RWMutex\.RLock\]`, + ), + makeFlakyTest("Etcd6857", + `\(\*node_etcd6857\)\.Status\(.* \[chan send\]`, + ), + makeFlakyTest("Etcd6873", + `\(\*watchBroadcasts_etcd6873\)\.stop\(.* \[chan receive\]`, + `newWatchBroadcasts_etcd6873\.func1\(.* \[sync\.Mutex\.Lock\]`, + ), + makeFlakyTest("Etcd7492", + `Etcd7492\.func2\(.* \[sync\.WaitGroup\.Wait\]`, + `Etcd7492\.func2\.1\(.* \[chan send\]`, + `\(\*simpleTokenTTLKeeper_etcd7492\)\.run\(.* \[sync\.Mutex\.Lock\]`, + ), + makeFlakyTest("Etcd7902", + `doRounds_etcd7902\.func1\(.* \[chan receive\]`, + `doRounds_etcd7902\.func1\(.* \[sync\.Mutex\.Lock\]`, + `runElectionFunc_etcd7902\(.* \[sync\.WaitGroup\.Wait\]`, + ), + makeTest("Etcd10492", + `Etcd10492\.func2\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("Grpc660", + `\(\*benchmarkClient_grpc660\)\.doCloseLoopUnary\.func1\(.* \[chan send\]`, + ), + makeFlakyTest("Grpc795", + `\(\*Server_grpc795\)\.Serve\(.* \[sync\.Mutex\.Lock\]`, + `testServerGracefulStopIdempotent_grpc795\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("Grpc862", + `DialContext_grpc862\.func2\(.* \[chan receive\]`), + makeTest("Grpc1275", + `testInflightStreamClosing_grpc1275\.func1\(.* \[chan receive\]`), + makeTest("Grpc1424", + `DialContext_grpc1424\.func1\(.* \[chan receive\]`), + makeFlakyTest("Grpc1460", + `\(\*http2Client_grpc1460\)\.keepalive\(.* \[chan receive\]`, + `\(\*http2Client_grpc1460\)\.NewStream\(.* \[sync\.Mutex\.Lock\]`, + ), + makeFlakyTest("Grpc3017", + // grpc/3017 involves a goroutine leak that also simultaneously engages many GC assists. + `Grpc3017\.func2\(.* \[chan receive\]`, + `Grpc3017\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, + `\(\*lbCacheClientConn_grpc3017\)\.RemoveSubConn\.func1\(.* \[sync\.Mutex\.Lock\]`, + ), + makeFlakyTest("Hugo3251", + `Hugo3251\.func2\(.* \[sync\.WaitGroup\.Wait\]`, + `Hugo3251\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, + `Hugo3251\.func2\.1\(.* \[sync\.RWMutex\.RLock\]`, + ), + makeFlakyTest("Istio16224", + `Istio16224\.func2\(.* \[sync\.Mutex\.Lock\]`, + `\(\*controller_istio16224\)\.Run\(.* \[chan send\]`, + `\(\*controller_istio16224\)\.Run\(.* \[chan receive\]`, + ), + makeFlakyTest("Istio17860", + `\(\*agent_istio17860\)\.runWait\(.* \[chan send\]`, + ), + makeFlakyTest("Istio18454", + `\(\*Worker_istio18454\)\.Start\.func1\(.* \[chan receive\]`, + `\(\*Worker_istio18454\)\.Start\.func1\(.* \[chan send\]`, + ), + // NOTE(vsaioc): + // Kubernetes/1321 is excluded due to a race condition in the original program + // that may, in extremely rare cases, lead to nil pointer dereference crashes. + // (Reproducible even with regular GC). Only kept here for posterity. + // + // makeTest(testCase{name: "Kubernetes1321"}, + // `NewMux_kubernetes1321\.gowrap1\(.* \[chan send\]`, + // `testMuxWatcherClose_kubernetes1321\(.* \[sync\.Mutex\.Lock\]`), + makeTest("Kubernetes5316", + `finishRequest_kubernetes5316\.func1\(.* \[chan send\]`, + ), + makeFlakyTest("Kubernetes6632", + `\(\*idleAwareFramer_kubernetes6632\)\.monitor\(.* \[sync\.Mutex\.Lock\]`, + `\(\*idleAwareFramer_kubernetes6632\)\.WriteFrame\(.* \[chan send\]`, + ), + makeFlakyTest("Kubernetes10182", + `\(\*statusManager_kubernetes10182\)\.Start\.func1\(.* \[sync\.Mutex\.Lock\]`, + `\(\*statusManager_kubernetes10182\)\.SetPodStatus\(.* \[chan send\]`, + ), + makeFlakyTest("Kubernetes11298", + `After_kubernetes11298\.func1\(.* \[chan receive\]`, + `After_kubernetes11298\.func1\(.* \[sync\.Cond\.Wait\]`, + `Kubernetes11298\.func2\(.* \[chan receive\]`, + ), + makeFlakyTest("Kubernetes13135", + `Util_kubernetes13135\(.* \[sync\.Mutex\.Lock\]`, + `\(\*WatchCache_kubernetes13135\)\.Add\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("Kubernetes25331", + `\(\*watchChan_kubernetes25331\)\.run\(.* \[chan send\]`, + ), + makeFlakyTest("Kubernetes26980", + `Kubernetes26980\.func2\(.* \[chan receive\]`, + `Kubernetes26980\.func2\.1\(.* \[sync\.Mutex\.Lock\]`, + `\(\*processorListener_kubernetes26980\)\.pop\(.* \[chan receive\]`, + ), + makeFlakyTest("Kubernetes30872", + `\(\*DelayingDeliverer_kubernetes30872\)\.StartWithHandler\.func1\(.* \[sync\.Mutex\.Lock\]`, + `\(\*Controller_kubernetes30872\)\.Run\(.* \[sync\.Mutex\.Lock\]`, + `\(\*NamespaceController_kubernetes30872\)\.Run\.func1\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("Kubernetes38669", + `\(\*cacheWatcher_kubernetes38669\)\.process\(.* \[chan send\]`, + ), + makeFlakyTest("Kubernetes58107", + `\(\*ResourceQuotaController_kubernetes58107\)\.worker\(.* \[sync\.Cond\.Wait\]`, + `\(\*ResourceQuotaController_kubernetes58107\)\.worker\(.* \[sync\.RWMutex\.RLock\]`, + `\(\*ResourceQuotaController_kubernetes58107\)\.Sync\(.* \[sync\.RWMutex\.Lock\]`, + ), + makeFlakyTest("Kubernetes62464", + `\(\*manager_kubernetes62464\)\.reconcileState\(.* \[sync\.RWMutex\.RLock\]`, + `\(\*staticPolicy_kubernetes62464\)\.RemoveContainer\(.* \[sync\.(RW)?Mutex\.Lock\]`, + ), + makeFlakyTest("Kubernetes70277", + `Kubernetes70277\.func2\(.* \[chan receive\]`, + ), + makeFlakyTest("Moby4951", + `\(\*DeviceSet_moby4951\)\.DeleteDevice\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("Moby7559", + `\(\*UDPProxy_moby7559\)\.Run\(.* \[sync\.Mutex\.Lock\]`, + ), + makeTest("Moby17176", + `testDevmapperLockReleasedDeviceDeletion_moby17176\.func1\(.* \[sync\.Mutex\.Lock\]`, + ), + makeFlakyTest("Moby21233", + `\(\*Transfer_moby21233\)\.Watch\.func1\(.* \[chan send\]`, + `\(\*Transfer_moby21233\)\.Watch\.func1\(.* \[select\]`, + `testTransfer_moby21233\(.* \[chan receive\]`, + ), + makeTest("Moby25348", + `\(\*Manager_moby25348\)\.init\(.* \[sync\.WaitGroup\.Wait\]`, + ), + makeFlakyTest("Moby27782", + `\(\*JSONFileLogger_moby27782\)\.readLogs\(.* \[sync\.Cond\.Wait\]`, + `\(\*Watcher_moby27782\)\.readEvents\(.* \[select\]`, + ), + makeFlakyTest("Moby28462", + `monitor_moby28462\(.* \[sync\.Mutex\.Lock\]`, + `\(\*Daemon_moby28462\)\.StateChanged\(.* \[chan send\]`, + ), + makeTest("Moby29733", + `Moby29733\.func2\(.* \[chan receive\]`, + `testActive_moby29733\.func1\(.* \[sync\.Cond\.Wait\]`, + ), + makeTest("Moby30408", + `Moby30408\.func2\(.* \[chan receive\]`, + `testActive_moby30408\.func1\(.* \[sync\.Cond\.Wait\]`, + ), + makeFlakyTest("Moby33781", + `monitor_moby33781\.func1\(.* \[chan send\]`, + ), + makeFlakyTest("Moby36114", + `\(\*serviceVM_moby36114\)\.hotAddVHDsAtStart\(.* \[sync\.Mutex\.Lock\]`, + ), + makeFlakyTest("Serving2137", + `\(\*Breaker_serving2137\)\.concurrentRequest\.func1\(.* \[chan send\]`, + `\(\*Breaker_serving2137\)\.concurrentRequest\.func1\(.* \[sync\.Mutex\.Lock\]`, + `Serving2137\.func2\(.* \[chan receive\]`, + ), + makeTest("Syncthing4829", + `Syncthing4829\.func2\(.* \[sync\.RWMutex\.RLock\]`, + ), + makeTest("Syncthing5795", + `\(\*rawConnection_syncthing5795\)\.Start\.func1.* \[chan receive\]`, + `Syncthing5795\.func2.* \[chan receive\]`, + ), + } + + // Combine all test cases into a single list. + testCases := append(microTests, stressTestCases...) + testCases = append(testCases, patternTestCases...) + testCases = append(testCases, gokerTestCases...) + + // Test cases must not panic or cause fatal exceptions. + failStates := regexp.MustCompile(`fatal|panic`) + + // Build the test program once. + exe, err := buildTestProg(t, "testgoroutineleakgc") + if err != nil { + t.Fatal(fmt.Sprintf("building testgoroutineleakgc failed: %v", err)) + } + + for _, tcase := range testCases { + t.Run(tcase.name, func(t *testing.T) { + t.Parallel() + + cmdEnv := []string{ + "GODEBUG=asyncpreemptoff=1", + "GOEXPERIMENT=greenteagc", + } + + if tcase.simple { + // If the test is simple, set GOMAXPROCS=1 in order to better + // control the behavior of the scheduler. + cmdEnv = append(cmdEnv, "GOMAXPROCS=1") + } + + // Run program and get output trace. + output := runBuiltTestProg(t, exe, tcase.name, cmdEnv...) + if len(output) == 0 { + t.Fatalf("Test produced no output. Is the goroutine leak profile collected?") + } + + // Zero tolerance policy for fatal exceptions or panics. + if failStates.MatchString(output) { + t.Errorf("unexpected fatal exception or panic!\noutput:\n%s\n\n", output) + return + } + + // Extract all the goroutine leaks + foundLeaks := extractLeaks(output) + + // If the test case was not expected to produce leaks, but some were reported, + // stop the test immediately. Zero tolerance policy for false positives. + if len(tcase.expectedLeaks)+len(tcase.flakyLeaks) == 0 && len(foundLeaks) > 0 { + t.Errorf("output:\n%s\n\ngoroutines leaks detected in case with no leaks", output) + } + + unexpectedLeaks := make([]string, 0, len(foundLeaks)) + + // Parse every leak and check if it is expected (maybe as a flaky leak). + LEAKS: + for _, leak := range foundLeaks { + // Check if the leak is expected. + // If it is, check whether it has been encountered before. + var foundNew bool + var leakPattern *regexp.Regexp + + for expectedLeak, ok := range tcase.expectedLeaks { + if expectedLeak.MatchString(leak) { + if !ok { + foundNew = true + } + + leakPattern = expectedLeak + break + } + } + + if foundNew { + // Only bother writing if we found a new leak. + tcase.expectedLeaks[leakPattern] = true + } + + if leakPattern == nil { + // We are dealing with a leak not marked as expected. + // Check if it is a flaky leak. + for flakyLeak := range tcase.flakyLeaks { + if flakyLeak.MatchString(leak) { + // The leak is flaky. Carry on to the next line. + continue LEAKS + } + } + + unexpectedLeaks = append(unexpectedLeaks, leak) + } + } + + missingLeakStrs := make([]string, 0, len(tcase.expectedLeaks)) + for expectedLeak, found := range tcase.expectedLeaks { + if !found { + missingLeakStrs = append(missingLeakStrs, expectedLeak.String()) + } + } + + var errors []error + if len(unexpectedLeaks) > 0 { + errors = append(errors, fmt.Errorf("unexpected goroutine leaks:\n%s\n", strings.Join(unexpectedLeaks, "\n"))) + } + if len(missingLeakStrs) > 0 { + errors = append(errors, fmt.Errorf("missing expected leaks:\n%s\n", strings.Join(missingLeakStrs, ", "))) + } + if len(errors) > 0 { + t.Fatalf("Failed with the following errors:\n%s\n\noutput:\n%s", errors, output) + } + }) + } +} diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 9872e5297fb4b7..fa6a9362bfcc55 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -1247,6 +1247,28 @@ func markBitsForSpan(base uintptr) (mbits markBits) { return mbits } +// isMarkedOrNotInHeap returns true if a pointer is in the heap and marked, +// or if the pointer is not in the heap. Used by goroutine leak detection +// to determine if concurrency resources are reachable in memory. +func isMarkedOrNotInHeap(p unsafe.Pointer) bool { + obj, span, objIndex := findObject(uintptr(p), 0, 0) + if obj != 0 { + mbits := span.markBitsForIndex(objIndex) + return mbits.isMarked() + } + + // If we fall through to get here, the object is not in the heap. + // In this case, it is either a pointer to a stack object or a global resource. + // Treat it as reachable in memory by default, to be safe. + // + // TODO(vsaioc): we could be more precise by checking against the stacks + // of runnable goroutines. I don't think this is necessary, based on what we've seen, but + // let's keep the option open in case the runtime evolves. + // This will (naively) lead to quadratic blow-up for goroutine leak detection, + // but if it is only run on demand, maybe the extra cost is not a show-stopper. + return true +} + // advance advances the markBits to the next object in the span. func (m *markBits) advance() { if m.mask == 1<<7 { diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index f2df1a00e0c683..72dde1704675fb 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -364,8 +364,8 @@ type workType struct { // (and thus 8-byte alignment even on 32-bit architectures). bytesMarked uint64 - markrootNext uint32 // next markroot job - markrootJobs uint32 // number of markroot jobs + markrootNext atomic.Uint32 // next markroot job + markrootJobs atomic.Uint32 // number of markroot jobs nproc uint32 tstart int64 @@ -373,17 +373,36 @@ type workType struct { // Number of roots of various root types. Set by gcPrepareMarkRoots. // - // nStackRoots == len(stackRoots), but we have nStackRoots for - // consistency. - nDataRoots, nBSSRoots, nSpanRoots, nStackRoots int + // During normal GC cycle, nStackRoots == nMaybeRunnableStackRoots == len(stackRoots); + // during goroutine leak detection, nMaybeRunnableStackRoots is the number of stackRoots + // scheduled for marking. + // In both variants, nStackRoots == len(stackRoots). + nDataRoots, nBSSRoots, nSpanRoots, nStackRoots, nMaybeRunnableStackRoots int + + // The following fields monitor the GC phase of the current cycle during + // goroutine leak detection. + goroutineLeakFinder struct { + // The GC has been instructed to perform goroutine leak detection during the next GC cycle; + // it is set by DetectGoroutineLeaks() and unset during gcStart(). + pending atomic.Bool + // The GC is running in goroutine leak detection mode; it is set during gcStart() + // and unset during gcMarkTermination(). Is protected by STW. + enabled bool + // The GC has performed goroutine leak detection during the current GC cycle; it is set + // during gcMarkDone(), right after goroutine leak detection has concluded, and unset during + // gcMarkTermination(). Is protected by STW. + done bool + } // Base indexes of each root type. Set by gcPrepareMarkRoots. baseData, baseBSS, baseSpans, baseStacks, baseEnd uint32 - // stackRoots is a snapshot of all of the Gs that existed - // before the beginning of concurrent marking. The backing - // store of this must not be modified because it might be - // shared with allgs. + // stackRoots is a snapshot of all of the Gs that existed before the + // beginning of concurrent marking. During goroutine leak detection, stackRoots + // is partitioned into two sets; to the left of nMaybeRunnableStackRoots are stackRoots + // of running / runnable goroutines and to the right of nMaybeRunnableStackRoots are + // stackRoots of unmarked / not runnable goroutines + // The stackRoots array is re-partitioned after each marking phase iteration. stackRoots []*g // Each type of GC state transition is protected by a lock. @@ -550,6 +569,25 @@ func GC() { releasem(mp) } +//go:linkname runtime_goroutineLeakGC runtime/pprof.runtime_goroutineLeakGC +func runtime_goroutineLeakGC() { + goroutineLeakGC() +} + +// goroutineLeakGC runs a GC cycle that performs goroutine leak detection. +func goroutineLeakGC() { + // Set the pending flag to true, instructing the next GC cycle to + // perform goroutine leak detection. + work.goroutineLeakFinder.pending.Store(true) + + // Spin GC cycles until the pending flag is unset. + // This ensures that goroutineLeakGC waits for a GC cycle that + // actually performs goroutine leak detection. + for work.goroutineLeakFinder.pending.Load() { + GC() + } +} + // gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has // already completed this mark phase, it returns immediately. func gcWaitOnMark(n uint32) { @@ -772,6 +810,12 @@ func gcStart(trigger gcTrigger) { schedEnableUser(false) } + if work.goroutineLeakFinder.pending.Load() { + work.goroutineLeakFinder.enabled = true + work.goroutineLeakFinder.pending.Store(false) + gcUntrackSyncObjects() + } + // Enter concurrent mark phase and enable // write barriers. // @@ -980,8 +1024,20 @@ top: } } }) - if restart { - gcDebugMarkDone.restartedDueTo27993 = true + + // Check whether we need to resume the marking phase because of issue #27993 + // or because of goroutine leak detection. + if restart || (work.goroutineLeakFinder.enabled && !work.goroutineLeakFinder.done) { + if restart { + // Restart because of issue #27993. + gcDebugMarkDone.restartedDueTo27993 = true + } else { + // Marking has reached a fixed-point. Attempt to detect goroutine leaks. + // + // If the returned value is true, then detection was performed during this cycle. + // Otherwise, more runnable goroutines were discovered, requiring additional mark work. + work.goroutineLeakFinder.done = findGoleaks() + } getg().m.preemptoff = "" systemstack(func() { @@ -1032,6 +1088,141 @@ top: gcMarkTermination(stw) } +// isMaybeRunnable checks whether a goroutine may still be semantically runnable. +// For goroutines which are semantically runnable, this will eventually return true +// as the GC marking phase progresses. It returns false for leaked goroutines, or for +// goroutines which are not yet computed as possibly runnable by the GC. +func (gp *g) isMaybeRunnable() bool { + // Check whether the goroutine is actually in a waiting state first. + if readgstatus(gp) != _Gwaiting { + // If the goroutine is not waiting, then clearly it is maybe runnable. + return true + } + + switch gp.waitreason { + case waitReasonSelectNoCases, + waitReasonChanSendNilChan, + waitReasonChanReceiveNilChan: + // Select with no cases or communicating on nil channels + // make goroutines unrunnable by definition. + return false + case waitReasonChanReceive, + waitReasonSelect, + waitReasonChanSend: + // Cycle all through all *sudog to check whether + // the goroutine is waiting on a marked channel. + for sg := gp.waiting; sg != nil; sg = sg.waitlink { + if isMarkedOrNotInHeap(unsafe.Pointer(sg.c.get())) { + return true + } + } + return false + case waitReasonSyncCondWait, + waitReasonSyncWaitGroupWait, + waitReasonSyncMutexLock, + waitReasonSyncRWMutexLock, + waitReasonSyncRWMutexRLock: + // If waiting on mutexes, wait groups, or condition variables, + // check if the synchronization primitive attached to the sudog is marked. + if gp.waiting != nil { + return isMarkedOrNotInHeap(gp.waiting.elem.get()) + } + } + return true +} + +// findMaybeRunnableGoroutines checks to see if more blocked but maybe-runnable goroutines exist. +// If so, it adds them into root set and increments work.markrootJobs accordingly. +// Returns true if we need to run another phase of markroots; returns false otherwise. +func findMaybeRunnableGoroutines() (moreWork bool) { + oldRootJobs := work.markrootJobs.Load() + + // To begin with we have a set of unchecked stackRoots between + // vIndex and ivIndex. During the loop, anything < vIndex should be + // valid stackRoots and anything >= ivIndex should be invalid stackRoots. + // The loop terminates when the two indices meet. + var vIndex, ivIndex int = work.nMaybeRunnableStackRoots, work.nStackRoots + // Reorder goroutine list + for vIndex < ivIndex { + if work.stackRoots[vIndex].isMaybeRunnable() { + vIndex = vIndex + 1 + continue + } + for ivIndex = ivIndex - 1; ivIndex != vIndex; ivIndex = ivIndex - 1 { + if gp := work.stackRoots[ivIndex]; gp.isMaybeRunnable() { + work.stackRoots[ivIndex] = work.stackRoots[vIndex] + work.stackRoots[vIndex] = gp + vIndex = vIndex + 1 + break + } + } + } + + newRootJobs := work.baseStacks + uint32(vIndex) + if newRootJobs > oldRootJobs { + work.nMaybeRunnableStackRoots = vIndex + work.markrootJobs.Store(newRootJobs) + } + return newRootJobs > oldRootJobs +} + +// getSyncObjectsUnreachable scans allgs and sets the elem and c fields of all sudogs to +// an untrackable pointer. This prevents the GC from marking these objects as live in memory +// by following these pointers when runnning deadlock detection. +func gcUntrackSyncObjects() { + assertWorldStopped() + + forEachGRace(func(gp *g) { + for sg := gp.waiting; sg != nil; sg = sg.waitlink { + sg.elem.setUntraceable() + sg.c.setUntraceable() + } + }) +} + +// gcRestoreSyncObjects restores the elem and c fields of all sudogs to their original values. +// Should be invoked after the goroutine leak detection phase. +func gcRestoreSyncObjects() { + assertWorldStopped() + + forEachGRace(func(gp *g) { + for sg := gp.waiting; sg != nil; sg = sg.waitlink { + sg.elem.setTraceable() + sg.c.setTraceable() + } + }) +} + +// findGoleaks scans the remaining stackRoots and marks any which are +// blocked over exclusively unreachable concurrency primitives as leaked (deadlocked). +// Returns true if the goroutine leak check was performed (or unnecessary). +// Returns false if the GC cycle has not yet computed all maybe-runnable goroutines. +func findGoleaks() bool { + // Report goroutine leaks and mark them unreachable, and resume marking + // we still need to mark these unreachable *g structs as they + // get reused, but their stack won't get scanned + if work.nMaybeRunnableStackRoots == work.nStackRoots { + // nMaybeRunnableStackRoots == nStackRoots means that all goroutines are marked. + return true + } + + // Check whether any more maybe-runnable goroutines can be found by the GC. + if findMaybeRunnableGoroutines() { + // We found more work, so we need to resume the marking phase. + return false + } + + // For the remaining goroutines, mark them as unreachable and leaked. + for i := work.nMaybeRunnableStackRoots; i < work.nStackRoots; i++ { + gp := work.stackRoots[i] + casgstatus(gp, _Gwaiting, _Gleaked) + } + // Put the remaining roots as ready for marking and drain them. + work.markrootJobs.Add(int32(work.nStackRoots - work.nMaybeRunnableStackRoots)) + work.nMaybeRunnableStackRoots = work.nStackRoots + return true +} + // World must be stopped and mark assists and background workers must be // disabled. func gcMarkTermination(stw worldStop) { @@ -1184,7 +1375,18 @@ func gcMarkTermination(stw worldStop) { throw("non-concurrent sweep failed to drain all sweep queues") } + if work.goroutineLeakFinder.enabled { + // Restore the elem and c fields of all sudogs to their original values. + gcRestoreSyncObjects() + } + + var goroutineLeakDetectionDone bool systemstack(func() { + // Pull the GC out of goroutine leak detection mode. + work.goroutineLeakFinder.enabled = false + goroutineLeakDetectionDone = work.goroutineLeakFinder.done + work.goroutineLeakFinder.done = false + // The memstats updated above must be updated with the world // stopped to ensure consistency of some values, such as // sched.idleTime and sched.totaltime. memstats also include @@ -1258,7 +1460,11 @@ func gcMarkTermination(stw worldStop) { printlock() print("gc ", memstats.numgc, " @", string(itoaDiv(sbuf[:], uint64(work.tSweepTerm-runtimeInitTime)/1e6, 3)), "s ", - util, "%: ") + util, "%") + if goroutineLeakDetectionDone { + print(" (checking for goroutine leaks)") + } + print(": ") prev := work.tSweepTerm for i, ns := range []int64{work.tMark, work.tMarkTerm, work.tEnd} { if i != 0 { @@ -1612,7 +1818,7 @@ func gcMarkWorkAvailable(p *p) bool { if !work.full.empty() || !work.spanq.empty() { return true // global work available } - if work.markrootNext < work.markrootJobs { + if work.markrootNext.Load() < work.markrootJobs.Load() { return true // root scan work available } return false @@ -1628,8 +1834,8 @@ func gcMark(startTime int64) { work.tstart = startTime // Check that there's no marking work remaining. - if work.full != 0 || work.markrootNext < work.markrootJobs || !work.spanq.empty() { - print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, " spanq.n=", work.spanq.size(), "\n") + if work.full != 0 || work.markrootNext.Load() < work.markrootJobs.Load() { + print("runtime: full=", hex(work.full), " next=", work.markrootNext.Load(), " jobs=", work.markrootJobs.Load(), " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") panic("non-empty mark queue after concurrent mark") } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 8b306045c5da21..419f6cacde93fd 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -53,6 +53,55 @@ const ( pagesPerSpanRoot = 512 ) +// internalBlocked returns true if the goroutine is blocked due to an +// internal (non-leaking) waitReason, e.g. waiting for the netpoller or garbage collector. +// Such goroutines are never leak detection candidates according to the GC. +// +//go:nosplit +func (gp *g) internalBlocked() bool { + reason := gp.waitreason + return reason < waitReasonChanReceiveNilChan || waitReasonSyncWaitGroupWait < reason +} + +// allGsSnapshotSortedForGC takes a snapshot of allgs and returns a sorted +// array of Gs. The array is sorted by the G's status, with running Gs +// first, followed by blocked Gs. The returned index indicates the cutoff +// between runnable and blocked Gs. +// +// The world must be stopped or allglock must be held. +func allGsSnapshotSortedForGC() ([]*g, int) { + assertWorldStoppedOrLockHeld(&allglock) + + // Reset the status of leaked goroutines in order to improve + // the precision of goroutine leak detection. + for _, gp := range allgs { + gp.atomicstatus.CompareAndSwap(_Gleaked, _Gwaiting) + } + + allgsSorted := make([]*g, len(allgs)) + + // Indices cutting off runnable and blocked Gs. + var currIndex, blockedIndex = 0, len(allgsSorted) - 1 + for _, gp := range allgs { + // not sure if we need atomic load because we are stopping the world, + // but do it just to be safe for now + if status := readgstatus(gp); status != _Gwaiting || gp.internalBlocked() { + allgsSorted[currIndex] = gp + currIndex++ + } else { + allgsSorted[blockedIndex] = gp + blockedIndex-- + } + } + + // Because the world is stopped or allglock is held, allgadd + // cannot happen concurrently with this. allgs grows + // monotonically and existing entries never change, so we can + // simply return a copy of the slice header. For added safety, + // we trim everything past len because that can still change. + return allgsSorted, blockedIndex + 1 +} + // gcPrepareMarkRoots queues root scanning jobs (stacks, globals, and // some miscellany) and initializes scanning-related state. // @@ -102,11 +151,20 @@ func gcPrepareMarkRoots() { // ignore them because they begin life without any roots, so // there's nothing to scan, and any roots they create during // the concurrent phase will be caught by the write barrier. - work.stackRoots = allGsSnapshot() + if work.goroutineLeakFinder.enabled { + // goroutine leak finder GC --- only prepare runnable + // goroutines for marking. + work.stackRoots, work.nMaybeRunnableStackRoots = allGsSnapshotSortedForGC() + } else { + // regular GC --- scan every goroutine + work.stackRoots = allGsSnapshot() + work.nMaybeRunnableStackRoots = len(work.stackRoots) + } + work.nStackRoots = len(work.stackRoots) - work.markrootNext = 0 - work.markrootJobs = uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nStackRoots) + work.markrootNext.Store(0) + work.markrootJobs.Store(uint32(fixedRootCount + work.nDataRoots + work.nBSSRoots + work.nSpanRoots + work.nMaybeRunnableStackRoots)) // Calculate base indexes of each root type work.baseData = uint32(fixedRootCount) @@ -119,8 +177,8 @@ func gcPrepareMarkRoots() { // gcMarkRootCheck checks that all roots have been scanned. It is // purely for debugging. func gcMarkRootCheck() { - if work.markrootNext < work.markrootJobs { - print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n") + if work.markrootNext.Load() < work.markrootJobs.Load() { + print(work.markrootNext.Load(), " of ", work.markrootJobs.Load(), " markroot jobs done\n") throw("left over markroot jobs") } @@ -868,7 +926,7 @@ func scanstack(gp *g, gcw *gcWork) int64 { case _Grunning: print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") throw("scanstack: goroutine not stopped") - case _Grunnable, _Gsyscall, _Gwaiting: + case _Grunnable, _Gsyscall, _Gwaiting, _Gleaked: // ok } @@ -1136,6 +1194,28 @@ func gcDrainMarkWorkerFractional(gcw *gcWork) { gcDrain(gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) } +// gcNextMarkRoot safely increments work.markrootNext and returns the +// index of the next root job. The returned boolean is true if the root job +// is valid, and false if there are no more root jobs to be claimed, +// i.e. work.markrootNext >= work.markrootJobs. +func gcNextMarkRoot() (uint32, bool) { + if !work.goroutineLeakFinder.enabled { + // If not running goroutine leak detection, behave as the GC previously did. + job := work.markrootNext.Add(1) - 1 + return job, job < work.markrootJobs.Load() + } + + // Otherwise, use a CAS loop to increment markrootNext. + for next, jobs := work.markrootNext.Load(), work.markrootJobs.Load(); next < jobs; next = work.markrootNext.Load() { + // There is still work available at the moment. + if work.markrootNext.CompareAndSwap(next, next+1) { + // We manage to snatch a root job. Return the root index. + return next, true + } + } + return 0, false +} + // gcDrain scans roots and objects in work buffers, blackening grey // objects until it is unable to get more work. It may return before // GC is done; it's the caller's responsibility to balance work from @@ -1194,13 +1274,12 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { } } - // Drain root marking jobs. - if work.markrootNext < work.markrootJobs { + if work.markrootNext.Load() < work.markrootJobs.Load() { // Stop if we're preemptible, if someone wants to STW, or if // someone is calling forEachP. for !(gp.preempt && (preemptible || sched.gcwaiting.Load() || pp.runSafePointFn != 0)) { - job := atomic.Xadd(&work.markrootNext, +1) - 1 - if job >= work.markrootJobs { + job, ok := gcNextMarkRoot() + if !ok { break } markroot(gcw, job, flushBgCredit) @@ -1346,9 +1425,9 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 { wbBufFlush() if b = gcw.tryGetObj(); b == 0 { // Try to do a root job. - if work.markrootNext < work.markrootJobs { - job := atomic.Xadd(&work.markrootNext, +1) - 1 - if job < work.markrootJobs { + if work.markrootNext.Load() < work.markrootJobs.Load() { + job, ok := gcNextMarkRoot() + if ok { workFlushed += markroot(gcw, job, false) continue } diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index b2ff257f65eca2..1e0f563a2930bf 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -1259,6 +1259,20 @@ func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.P return goroutineProfileWithLabelsConcurrent(p, labels) } +//go:linkname pprof_goroutineLeakProfileWithLabels +func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { + return goroutineLeakProfileWithLabelsConcurrent(p, labels) +} + +// labels may be nil. If labels is non-nil, it must have the same length as p. +func goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { + if labels != nil && len(labels) != len(p) { + labels = nil + } + + return goroutineLeakProfileWithLabelsConcurrent(p, labels) +} + var goroutineProfile = struct { sema uint32 active bool @@ -1302,6 +1316,84 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new)) } +func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { + if len(p) == 0 { + // An empty slice is obviously too small. Return a rough + // allocation estimate without bothering to STW. As long as + // this is close, then we'll only need to STW once (on the next + // call). + return int(gleakcount()), false + } + + // Use the same semaphore as goroutineProfileWithLabelsConcurrent, + // because ultimately we still use goroutine profiles. + semacquire(&goroutineProfile.sema) + + // Unlike in goroutineProfileWithLabelsConcurrent, we don't save the current + // goroutine stack, because it is obviously not a leaked goroutine. + + pcbuf := makeProfStack() // see saveg() for explanation + // Using gleakcount while the world is stopped should give us a consistent view + // of the number of leaked goroutines. + n = int(gleakcount()) + + if n > len(p) { + // There's not enough space in p to store the whole profile, so (per the + // contract of runtime.GoroutineProfile) we're not allowed to write to p + // at all and must return n, false. + semrelease(&goroutineProfile.sema) + return n, false + } + + // Prepare for all other goroutines to enter the profile. Every goroutine struct in the allgs list + // has its goroutineProfiled field cleared. Any goroutine created from this point on (while + // goroutineProfile.active is set) will start with its goroutineProfiled + // field set to goroutineProfileSatisfied. + goroutineProfile.active = true + goroutineProfile.records = p + goroutineProfile.labels = labels + + // Visit each leaked goroutine that existed as of the startTheWorld call above. + forEachGRace(func(gp1 *g) { + if readgstatus(gp1) == _Gleaked { + tryRecordGoroutineProfile(gp1, pcbuf, Gosched) + } + }) + + endOffset := goroutineProfile.offset.Swap(0) + goroutineProfile.active = false + goroutineProfile.records = nil + goroutineProfile.labels = nil + + // Restore the invariant that every goroutine struct in allgs has its + // goroutineProfiled field cleared. + forEachGRace(func(gp1 *g) { + gp1.goroutineProfiled.Store(goroutineProfileAbsent) + }) + + if raceenabled { + raceacquire(unsafe.Pointer(&labelSync)) + } + + if n != int(endOffset) { + // It's a big surprise that the number of goroutines changed while we + // were collecting the profile. But probably better to return a + // truncated profile than to crash the whole process. + // + // For instance, needm moves a goroutine out of the _Gdead state and so + // might be able to change the goroutine count without interacting with + // the scheduler. For code like that, the race windows are small and the + // combination of features is uncommon, so it's hard to be (and remain) + // sure we've caught them all. + // + // FIXME(vsaioc): I kept this in because goroutineProfileWithLabelsConcurrent + // also uses it, but... is this dead code? + } + + semrelease(&goroutineProfile.sema) + return n, true +} + func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) { if len(p) == 0 { // An empty slice is obviously too small. Return a rough diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index 55563009b3a2c1..28923181bb619f 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -105,12 +105,13 @@ import ( // // Each Profile has a unique name. A few profiles are predefined: // -// goroutine - stack traces of all current goroutines -// heap - a sampling of memory allocations of live objects -// allocs - a sampling of all past memory allocations -// threadcreate - stack traces that led to the creation of new OS threads -// block - stack traces that led to blocking on synchronization primitives -// mutex - stack traces of holders of contended mutexes +// goroutine - stack traces of all current goroutines +// goroutineleak - stack traces of all leaked goroutines +// allocs - a sampling of all past memory allocations +// heap - a sampling of memory allocations of live objects +// threadcreate - stack traces that led to the creation of new OS threads +// block - stack traces that led to blocking on synchronization primitives +// mutex - stack traces of holders of contended mutexes // // These predefined profiles maintain themselves and panic on an explicit // [Profile.Add] or [Profile.Remove] method call. @@ -169,6 +170,7 @@ import ( // holds a lock for 1s while 5 other goroutines are waiting for the entire // second to acquire the lock, its unlock call stack will report 5s of // contention. + type Profile struct { name string mu sync.Mutex @@ -189,6 +191,12 @@ var goroutineProfile = &Profile{ write: writeGoroutine, } +var goroutineLeakProfile = &Profile{ + name: "goroutineleak", + count: countGoroutineLeak, + write: writeGoroutineLeak, +} + var threadcreateProfile = &Profile{ name: "threadcreate", count: countThreadCreate, @@ -224,12 +232,13 @@ func lockProfiles() { if profiles.m == nil { // Initial built-in profiles. profiles.m = map[string]*Profile{ - "goroutine": goroutineProfile, - "threadcreate": threadcreateProfile, - "heap": heapProfile, - "allocs": allocsProfile, - "block": blockProfile, - "mutex": mutexProfile, + "goroutine": goroutineProfile, + "goroutineleak": goroutineLeakProfile, + "threadcreate": threadcreateProfile, + "heap": heapProfile, + "allocs": allocsProfile, + "block": blockProfile, + "mutex": mutexProfile, } } } @@ -739,6 +748,11 @@ func countGoroutine() int { return runtime.NumGoroutine() } +// countGoroutineLeak returns the number of leaked goroutines. +func countGoroutineLeak() int { + return int(runtime_gleakcount()) +} + // writeGoroutine writes the current runtime GoroutineProfile to w. func writeGoroutine(w io.Writer, debug int) error { if debug >= 2 { @@ -747,6 +761,23 @@ func writeGoroutine(w io.Writer, debug int) error { return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfileWithLabels) } +// writeGoroutineLeak first invokes a GC cycle that performs goroutine leak detection. +// It then writes the goroutine profile, filtering for leaked goroutines. +func writeGoroutineLeak(w io.Writer, debug int) error { + // Run the GC with leak detection first so that leaked goroutines + // may transition to the leaked state. + runtime_goroutineLeakGC() + + // If the debug flag is set sufficiently high, just defer to writing goroutine stacks + // like in a regular goroutine profile. Include non-leaked goroutines, too. + if debug >= 2 { + return writeGoroutineStacks(w) + } + + // Otherwise, write the goroutine leak profile. + return writeRuntimeProfile(w, debug, "goroutineleak", pprof_goroutineLeakProfileWithLabels) +} + func writeGoroutineStacks(w io.Writer) error { // We don't know how big the buffer needs to be to collect // all the goroutines. Start with 1 MB and try a few times, doubling each time. @@ -969,6 +1000,9 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu //go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) +//go:linkname pprof_goroutineLeakProfileWithLabels runtime.pprof_goroutineLeakProfileWithLabels +func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) + //go:linkname pprof_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond func pprof_cyclesPerSecond() int64 diff --git a/src/runtime/pprof/runtime.go b/src/runtime/pprof/runtime.go index 8d37c7d3add146..ec09700d811efa 100644 --- a/src/runtime/pprof/runtime.go +++ b/src/runtime/pprof/runtime.go @@ -29,6 +29,12 @@ func runtime_setProfLabel(labels unsafe.Pointer) // runtime_getProfLabel is defined in runtime/proflabel.go. func runtime_getProfLabel() unsafe.Pointer +// runtime_gleakcount is defined in runtime/proc.go. +func runtime_gleakcount() int32 + +// runtime_goroutineLeakGC is defined in runtime/mgc.go. +func runtime_goroutineLeakGC() + // SetGoroutineLabels sets the current goroutine's labels to match ctx. // A new goroutine inherits the labels of the goroutine that created it. // This is a lower-level API than [Do], which should be used instead when possible. diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go index 22727df74eead2..5367f66213804b 100644 --- a/src/runtime/preempt.go +++ b/src/runtime/preempt.go @@ -160,7 +160,7 @@ func suspendG(gp *g) suspendGState { s = _Gwaiting fallthrough - case _Grunnable, _Gsyscall, _Gwaiting: + case _Grunnable, _Gsyscall, _Gwaiting, _Gleaked: // Claim goroutine by setting scan bit. // This may race with execution or readying of gp. // The scan bit keeps it from transition state. @@ -269,6 +269,7 @@ func resumeG(state suspendGState) { case _Grunnable | _Gscan, _Gwaiting | _Gscan, + _Gleaked | _Gscan, _Gsyscall | _Gscan: casfrom_Gscanstatus(gp, s, s&^_Gscan) } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 25d39d9ba389ad..eff1edae7c53fc 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -513,7 +513,7 @@ func acquireSudog() *sudog { s := pp.sudogcache[n-1] pp.sudogcache[n-1] = nil pp.sudogcache = pp.sudogcache[:n-1] - if s.elem != nil { + if s.elem.get() != nil { throw("acquireSudog: found s.elem != nil in cache") } releasem(mp) @@ -522,7 +522,7 @@ func acquireSudog() *sudog { //go:nosplit func releaseSudog(s *sudog) { - if s.elem != nil { + if s.elem.get() != nil { throw("runtime: sudog with non-nil elem") } if s.isSelect { @@ -537,7 +537,7 @@ func releaseSudog(s *sudog) { if s.waitlink != nil { throw("runtime: sudog with non-nil waitlink") } - if s.c != nil { + if s.c.get() != nil { throw("runtime: sudog with non-nil c") } gp := getg() @@ -1208,6 +1208,7 @@ func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { _Gscanwaiting, _Gscanrunning, _Gscansyscall, + _Gscanleaked, _Gscanpreempted: if newval == oldval&^_Gscan { success = gp.atomicstatus.CompareAndSwap(oldval, newval) @@ -1228,6 +1229,7 @@ func castogscanstatus(gp *g, oldval, newval uint32) bool { case _Grunnable, _Grunning, _Gwaiting, + _Gleaked, _Gsyscall: if newval == oldval|_Gscan { r := gp.atomicstatus.CompareAndSwap(oldval, newval) @@ -5551,6 +5553,25 @@ func gcount() int32 { return n } +//go:linkname runtime_gleakcount runtime/pprof.runtime_gleakcount +func runtime_gleakcount() int32 { + return gleakcount() +} + +// gleakcount returns the number of leaked goroutines currently reported by +// the runtime. Protected by allglock. +func gleakcount() int32 { + n := int32(0) + lock(&allglock) + for _, g := range allgs { + if readgstatus(g) == _Gleaked { + n++ + } + } + unlock(&allglock) + return n +} + func mcount() int32 { return int32(sched.mnext - sched.nmfreed) } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index b5d2dcefaded99..7193d97c9e75f6 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -87,6 +87,9 @@ const ( // ready()ing this G. _Gpreempted // 9 + // _Gleaked represents a leaked goroutine caught by the GC. + _Gleaked // 10 + // _Gscan combined with one of the above states other than // _Grunning indicates that GC is scanning the stack. The // goroutine is not executing user code and the stack is owned @@ -104,6 +107,7 @@ const ( _Gscansyscall = _Gscan + _Gsyscall // 0x1003 _Gscanwaiting = _Gscan + _Gwaiting // 0x1004 _Gscanpreempted = _Gscan + _Gpreempted // 0x1009 + _Gscanleaked = _Gscan + _Gleaked // 0x100a ) const ( @@ -315,6 +319,78 @@ type gobuf struct { bp uintptr // for framepointer-enabled architectures } +// maybeTraceablePtr is a special pointer that is conditionally trackable +// by the GC. It consists of an address as a uintptr (vu) and a pointer +// to a data element (vp). +// +// maybeTraceablePtr values can be in one of three states: +// 1. Unset: vu == 0 && vp == nil +// 2. Untracked: vu != 0 && vp == nil +// 3. Tracked: vu != 0 && vp != nil +// +// Do not set fields manually. Use methods instead. +// Extend this type with additional methods if needed. +type maybeTraceablePtr struct { + vp unsafe.Pointer // For liveness only. + vu uintptr // Source of truth. +} + +// untrack unsets the pointer but preserves the address. +// This is used to hide the pointer from the GC. +// +//go:nosplit +func (p *maybeTraceablePtr) setUntraceable() { + p.vp = nil +} + +// setTraceable resets the pointer to the stored address. +// This is used to make the pointer visible to the GC. +// +//go:nosplit +func (p *maybeTraceablePtr) setTraceable() { + p.vp = unsafe.Pointer(p.vu) +} + +// set sets the pointer to the data element and updates the address. +// +//go:nosplit +func (p *maybeTraceablePtr) set(v unsafe.Pointer) { + p.vp = v + p.vu = uintptr(v) +} + +// get retrieves the pointer to the data element. +// +//go:nosplit +func (p *maybeTraceablePtr) get() unsafe.Pointer { + return unsafe.Pointer(p.vu) +} + +// uintptr returns the uintptr address of the pointer. +// +//go:nosplit +func (p *maybeTraceablePtr) uintptr() uintptr { + return p.vu +} + +// maybeTraceableChan extends conditionally trackable pointers (maybeTraceablePtr) +// to track hchan pointers. +// +// Do not set fields manually. Use methods instead. +type maybeTraceableChan struct { + maybeTraceablePtr +} + +//go:nosplit +func (p *maybeTraceableChan) set(c *hchan) { + p.maybeTraceablePtr.set(unsafe.Pointer(c)) +} + +//go:nosplit +func (p *maybeTraceableChan) get() *hchan { + return (*hchan)(p.maybeTraceablePtr.get()) +} + // sudog (pseudo-g) represents a g in a wait list, such as for sending/receiving // on a channel. // @@ -334,7 +410,8 @@ type sudog struct { next *sudog prev *sudog - elem unsafe.Pointer // data element (may point to stack) + + elem maybeTraceablePtr // data element (may point to stack) // The following fields are never accessed concurrently. // For channels, waitlink is only accessed by g. @@ -362,10 +439,10 @@ type sudog struct { // in the second entry in the list.) waiters uint16 - parent *sudog // semaRoot binary tree - waitlink *sudog // g.waiting list or semaRoot - waittail *sudog // semaRoot - c *hchan // channel + parent *sudog // semaRoot binary tree + waitlink *sudog // g.waiting list or semaRoot + waittail *sudog // semaRoot + c maybeTraceableChan // channel } type libcall struct { @@ -1064,24 +1141,24 @@ const ( waitReasonZero waitReason = iota // "" waitReasonGCAssistMarking // "GC assist marking" waitReasonIOWait // "IO wait" - waitReasonChanReceiveNilChan // "chan receive (nil chan)" - waitReasonChanSendNilChan // "chan send (nil chan)" waitReasonDumpingHeap // "dumping heap" waitReasonGarbageCollection // "garbage collection" waitReasonGarbageCollectionScan // "garbage collection scan" waitReasonPanicWait // "panicwait" - waitReasonSelect // "select" - waitReasonSelectNoCases // "select (no cases)" waitReasonGCAssistWait // "GC assist wait" waitReasonGCSweepWait // "GC sweep wait" waitReasonGCScavengeWait // "GC scavenge wait" - waitReasonChanReceive // "chan receive" - waitReasonChanSend // "chan send" waitReasonFinalizerWait // "finalizer wait" waitReasonForceGCIdle // "force gc (idle)" waitReasonUpdateGOMAXPROCSIdle // "GOMAXPROCS updater (idle)" waitReasonSemacquire // "semacquire" waitReasonSleep // "sleep" + waitReasonChanReceiveNilChan // "chan receive (nil chan)" + waitReasonChanSendNilChan // "chan send (nil chan)" + waitReasonSelect // "select" + waitReasonSelectNoCases // "select (no cases)" + waitReasonChanReceive // "chan receive" + waitReasonChanSend // "chan send" waitReasonSyncCondWait // "sync.Cond.Wait" waitReasonSyncMutexLock // "sync.Mutex.Lock" waitReasonSyncRWMutexRLock // "sync.RWMutex.RLock" @@ -1167,12 +1244,24 @@ func (w waitReason) String() string { return waitReasonStrings[w] } +// isMutexWait returns true if the goroutine is blocked because of +// sync.Mutex.Lock or sync.RWMutex.[R]Lock. +// +//go:nosplit func (w waitReason) isMutexWait() bool { return w == waitReasonSyncMutexLock || w == waitReasonSyncRWMutexRLock || w == waitReasonSyncRWMutexLock } +// isSyncWait returns true if the goroutine is blocked because of +// sync library primitive operations. +// +//go:nosplit +func (w waitReason) isSyncWait() bool { + return waitReasonSyncCondWait <= w && w <= waitReasonSyncWaitGroupWait +} + func (w waitReason) isWaitingForSuspendG() bool { return isWaitingForSuspendG[w] } diff --git a/src/runtime/select.go b/src/runtime/select.go index ae7754b17377dd..d94a08c2651082 100644 --- a/src/runtime/select.go +++ b/src/runtime/select.go @@ -83,7 +83,7 @@ func selparkcommit(gp *g, _ unsafe.Pointer) bool { // channels in lock order. var lastc *hchan for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if sg.c != lastc && lastc != nil { + if sg.c.get() != lastc && lastc != nil { // As soon as we unlock the channel, fields in // any sudog with that channel may change, // including c and waitlink. Since multiple @@ -92,7 +92,7 @@ func selparkcommit(gp *g, _ unsafe.Pointer) bool { // of a channel. unlock(&lastc.lock) } - lastc = sg.c + lastc = sg.c.get() } if lastc != nil { unlock(&lastc.lock) @@ -320,12 +320,12 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, nsends, nrecvs int, blo sg.isSelect = true // No stack splits between assigning elem and enqueuing // sg on gp.waiting where copystack can find it. - sg.elem = cas.elem + sg.elem.set(cas.elem) sg.releasetime = 0 if t0 != 0 { sg.releasetime = -1 } - sg.c = c + sg.c.set(c) // Construct waiting list in lock order. *nextp = sg nextp = &sg.waitlink @@ -368,8 +368,8 @@ func selectgo(cas0 *scase, order0 *uint16, pc0 *uintptr, nsends, nrecvs int, blo // Clear all elem before unlinking from gp.waiting. for sg1 := gp.waiting; sg1 != nil; sg1 = sg1.waitlink { sg1.isSelect = false - sg1.elem = nil - sg1.c = nil + sg1.elem.set(nil) + sg1.c.set(nil) } gp.waiting = nil diff --git a/src/runtime/sema.go b/src/runtime/sema.go index 6af49b1b0c42d9..833829f70ed219 100644 --- a/src/runtime/sema.go +++ b/src/runtime/sema.go @@ -21,6 +21,7 @@ package runtime import ( "internal/cpu" + "internal/goexperiment" "internal/runtime/atomic" "unsafe" ) @@ -188,7 +189,7 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes i } // Any semrelease after the cansemacquire knows we're waiting // (we set nwait above), so go to sleep. - root.queue(addr, s, lifo) + root.queue(addr, s, lifo, reason.isSyncWait()) goparkunlock(&root.lock, reason, traceBlockSync, 4+skipframes) if s.ticket != 0 || cansemacquire(addr) { break @@ -301,9 +302,16 @@ func cansemacquire(addr *uint32) bool { } // queue adds s to the blocked goroutines in semaRoot. -func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { +func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool, syncSema bool) { s.g = getg() - s.elem = unsafe.Pointer(addr) + s.elem.set(unsafe.Pointer(addr)) + if goexperiment.GoroutineLeakFinderGC && syncSema { + s.g.waiting = s + // When dealing with sync semaphores, hide the elem field from the GC + // to prevent it from prematurely marking the semaphore when running + // goroutine leak detection. + s.elem.setUntraceable() + } s.next = nil s.prev = nil s.waiters = 0 @@ -311,7 +319,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { var last *sudog pt := &root.treap for t := *pt; t != nil; t = *pt { - if t.elem == unsafe.Pointer(addr) { + if uintptr(unsafe.Pointer(addr)) == t.elem.uintptr() { // Already have addr in list. if lifo { // Substitute s in t's place in treap. @@ -357,7 +365,7 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { return } last = t - if uintptr(unsafe.Pointer(addr)) < uintptr(t.elem) { + if uintptr(unsafe.Pointer(addr)) < t.elem.uintptr() { pt = &t.prev } else { pt = &t.next @@ -402,11 +410,13 @@ func (root *semaRoot) queue(addr *uint32, s *sudog, lifo bool) { func (root *semaRoot) dequeue(addr *uint32) (found *sudog, now, tailtime int64) { ps := &root.treap s := *ps + for ; s != nil; s = *ps { - if s.elem == unsafe.Pointer(addr) { + if uintptr(unsafe.Pointer(addr)) == s.elem.uintptr() { goto Found } - if uintptr(unsafe.Pointer(addr)) < uintptr(s.elem) { + + if uintptr(unsafe.Pointer(addr)) < s.elem.uintptr() { ps = &s.prev } else { ps = &s.next @@ -470,8 +480,12 @@ Found: } tailtime = s.acquiretime } + if goexperiment.GoroutineLeakFinderGC { + // Goroutine is no longer blocked. Clear the waiting pointer. + s.g.waiting = nil + } s.parent = nil - s.elem = nil + s.elem.set(nil) s.next = nil s.prev = nil s.ticket = 0 @@ -590,6 +604,14 @@ func notifyListWait(l *notifyList, t uint32) { // Enqueue itself. s := acquireSudog() s.g = getg() + if goexperiment.GoroutineLeakFinderGC { + // Storing this pointer (invisible to GC) so that we can trace + // the condvar address from the blocked goroutine when + // checking for goroutine leaks. + s.elem.set(unsafe.Pointer(l)) + s.elem.setUntraceable() + s.g.waiting = s + } s.ticket = t s.releasetime = 0 t0 := int64(0) @@ -607,6 +629,12 @@ func notifyListWait(l *notifyList, t uint32) { if t0 != 0 { blockevent(s.releasetime-t0, 2) } + if goexperiment.GoroutineLeakFinderGC { + // Goroutine is no longer blocked. Clear up its waiting pointer, + // and clean up the sudog before releasing it. + s.g.waiting = nil + s.elem.set(nil) + } releaseSudog(s) } diff --git a/src/runtime/sizeof_test.go b/src/runtime/sizeof_test.go index de859866a5adb2..5888177f0ea7a1 100644 --- a/src/runtime/sizeof_test.go +++ b/src/runtime/sizeof_test.go @@ -22,7 +22,7 @@ func TestSizeof(t *testing.T) { _64bit uintptr // size on 64bit platforms }{ {runtime.G{}, 280 + xreg, 440 + xreg}, // g, but exported for testing - {runtime.Sudog{}, 56, 88}, // sudog, but exported for testing + {runtime.Sudog{}, 64, 104}, // sudog, but exported for testing } if xreg > runtime.PtrSize { diff --git a/src/runtime/stack.go b/src/runtime/stack.go index a338708d76fca8..8c78965d372223 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -821,7 +821,8 @@ func adjustsudogs(gp *g, adjinfo *adjustinfo) { // the data elements pointed to by a SudoG structure // might be in the stack. for s := gp.waiting; s != nil; s = s.waitlink { - adjustpointer(adjinfo, unsafe.Pointer(&s.elem)) + adjustpointer(adjinfo, unsafe.Pointer(&s.elem.vu)) + adjustpointer(adjinfo, unsafe.Pointer(&s.elem.vp)) } } @@ -834,7 +835,7 @@ func fillstack(stk stack, b byte) { func findsghi(gp *g, stk stack) uintptr { var sghi uintptr for sg := gp.waiting; sg != nil; sg = sg.waitlink { - p := uintptr(sg.elem) + uintptr(sg.c.elemsize) + p := sg.elem.uintptr() + uintptr(sg.c.get().elemsize) if stk.lo <= p && p < stk.hi && p > sghi { sghi = p } @@ -853,7 +854,7 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { // Lock channels to prevent concurrent send/receive. var lastc *hchan for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if sg.c != lastc { + if sg.c.get() != lastc { // There is a ranking cycle here between gscan bit and // hchan locks. Normally, we only allow acquiring hchan // locks and then getting a gscan bit. In this case, we @@ -863,9 +864,9 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { // suspended. So, we get a special hchan lock rank here // that is lower than gscan, but doesn't allow acquiring // any other locks other than hchan. - lockWithRank(&sg.c.lock, lockRankHchanLeaf) + lockWithRank(&sg.c.get().lock, lockRankHchanLeaf) } - lastc = sg.c + lastc = sg.c.get() } // Adjust sudogs. @@ -885,10 +886,10 @@ func syncadjustsudogs(gp *g, used uintptr, adjinfo *adjustinfo) uintptr { // Unlock channels. lastc = nil for sg := gp.waiting; sg != nil; sg = sg.waitlink { - if sg.c != lastc { - unlock(&sg.c.lock) + if sg.c.get() != lastc { + unlock(&sg.c.get().lock) } - lastc = sg.c + lastc = sg.c.get() } return sgsize diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go b/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go new file mode 100644 index 00000000000000..313c98e226dc93 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach10214.go @@ -0,0 +1,136 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/10214 + * Buggy version: 7207111aa3a43df0552509365fdec741a53f873f + * fix commit-id: 27e863d90ab0660494778f1c35966cc5ddc38e32 + * Flaky: 3/100 + * Description: This deadlock is caused by different order when acquiring + * coalescedMu.Lock() and raftMu.Lock(). The fix is to refactor sendQueuedHeartbeats() + * so that cockroachdb can unlock coalescedMu before locking raftMu. + */ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" + "unsafe" +) + +func init() { + register("Cockroach10214", Cockroach10214) +} + +type Store_cockroach10214 struct { + coalescedMu struct { + sync.Mutex + heartbeatResponses []int + } + mu struct { + replicas map[int]*Replica_cockroach10214 + } +} + +func (s *Store_cockroach10214) sendQueuedHeartbeats() { + s.coalescedMu.Lock() // LockA acquire + runtime.Gosched() + defer s.coalescedMu.Unlock() + for i := 0; i < len(s.coalescedMu.heartbeatResponses); i++ { + s.sendQueuedHeartbeatsToNode() // LockB + } + // LockA release +} + +func (s *Store_cockroach10214) sendQueuedHeartbeatsToNode() { + for i := 0; i < len(s.mu.replicas); i++ { + r := s.mu.replicas[i] + r.reportUnreachable() // LockB + } +} + +type Replica_cockroach10214 struct { + raftMu sync.Mutex + mu sync.Mutex + store *Store_cockroach10214 +} + +func (r *Replica_cockroach10214) reportUnreachable() { + r.raftMu.Lock() // LockB acquire + runtime.Gosched() + //+time.Sleep(time.Nanosecond) + defer r.raftMu.Unlock() + // LockB release +} + +func (r *Replica_cockroach10214) tick() { + r.raftMu.Lock() // LockB acquire + runtime.Gosched() + defer r.raftMu.Unlock() + r.tickRaftMuLocked() + // LockB release +} + +func (r *Replica_cockroach10214) tickRaftMuLocked() { + r.mu.Lock() + defer r.mu.Unlock() + if r.maybeQuiesceLocked() { + return + } +} +func (r *Replica_cockroach10214) maybeQuiesceLocked() bool { + for i := 0; i < 2; i++ { + if !r.maybeCoalesceHeartbeat() { + return true + } + } + return false +} +func (r *Replica_cockroach10214) maybeCoalesceHeartbeat() bool { + msgtype := uintptr(unsafe.Pointer(r)) % 3 + switch msgtype { + case 0, 1, 2: + r.store.coalescedMu.Lock() // LockA acquire + default: + return false + } + r.store.coalescedMu.Unlock() // LockA release + return true +} + +func Cockroach10214() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 1000; i++ { + go func() { + store := &Store_cockroach10214{} + responses := &store.coalescedMu.heartbeatResponses + *responses = append(*responses, 1, 2) + store.mu.replicas = make(map[int]*Replica_cockroach10214) + + rp1 := &Replica_cockroach10214{ + store: store, + } + rp2 := &Replica_cockroach10214{ + store: store, + } + store.mu.replicas[0] = rp1 + store.mu.replicas[1] = rp2 + + go func() { + // deadlocks: x > 0 + store.sendQueuedHeartbeats() + }() + + go func() { + // deadlocks: x > 0 + rp1.tick() + }() + + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go b/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go new file mode 100644 index 00000000000000..ac7bc93821caf3 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach1055.go @@ -0,0 +1,150 @@ +package main + +import ( + "os" + "runtime/pprof" + "sync" + "sync/atomic" + "time" +) + +func init() { + register("Cockroach1055", Cockroach1055) +} + +type Stopper_cockroach1055 struct { + stopper chan struct{} + stop sync.WaitGroup + mu sync.Mutex + draining int32 + drain sync.WaitGroup +} + +func (s *Stopper_cockroach1055) AddWorker() { + s.stop.Add(1) +} + +func (s *Stopper_cockroach1055) ShouldStop() <-chan struct{} { + if s == nil { + return nil + } + return s.stopper +} + +func (s *Stopper_cockroach1055) SetStopped() { + if s != nil { + s.stop.Done() + } +} + +func (s *Stopper_cockroach1055) Quiesce() { + s.mu.Lock() + defer s.mu.Unlock() + s.draining = 1 + s.drain.Wait() + s.draining = 0 +} + +func (s *Stopper_cockroach1055) Stop() { + s.mu.Lock() // L1 + defer s.mu.Unlock() + atomic.StoreInt32(&s.draining, 1) + s.drain.Wait() + close(s.stopper) + s.stop.Wait() +} + +func (s *Stopper_cockroach1055) StartTask() bool { + if atomic.LoadInt32(&s.draining) == 0 { + s.mu.Lock() + defer s.mu.Unlock() + s.drain.Add(1) + return true + } + return false +} + +func NewStopper_cockroach1055() *Stopper_cockroach1055 { + return &Stopper_cockroach1055{ + stopper: make(chan struct{}), + } +} + +func Cockroach1055() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(1 * time.Second) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i <= 1000; i++ { + go func() { // G1 + // deadlocks: x > 0 + var stoppers []*Stopper_cockroach1055 + for i := 0; i < 2; i++ { + stoppers = append(stoppers, NewStopper_cockroach1055()) + } + + for i := range stoppers { + s := stoppers[i] + s.AddWorker() + go func() { // G2 + // deadlocks: x > 0 + s.StartTask() + <-s.ShouldStop() + s.SetStopped() + }() + } + + done := make(chan struct{}) + go func() { // G3 + // deadlocks: x > 0 + for _, s := range stoppers { + s.Quiesce() + } + for _, s := range stoppers { + s.Stop() + } + close(done) + }() + + <-done + }() + } +} + +// Example deadlock trace: +// +// G1 G2.0 G2.1 G2.2 G3 +// --------------------------------------------------------------------------------------------------------------------- +// s[0].stop.Add(1) [1] +// go func() [G2.0] +// s[1].stop.Add(1) [1] . +// go func() [G2.1] . +// s[2].stop.Add(1) [1] . . +// go func() [G2.2] . . +// go func() [G3] . . . +// <-done . . . . +// . s[0].StartTask() . . . +// . s[0].draining == 0 . . . +// . . s[1].StartTask() . . +// . . s[1].draining == 0 . . +// . . . s[2].StartTask() . +// . . . s[2].draining == 0 . +// . . . . s[0].Quiesce() +// . . . . s[0].mu.Lock() [L1[0]] +// . s[0].mu.Lock() [L1[0]] . . . +// . s[0].drain.Add(1) [1] . . . +// . s[0].mu.Unlock() [L1[0]] . . . +// . <-s[0].ShouldStop() . . . +// . . . . s[0].draining = 1 +// . . . . s[0].drain.Wait() +// . . s[0].mu.Lock() [L1[1]] . . +// . . s[1].drain.Add(1) [1] . . +// . . s[1].mu.Unlock() [L1[1]] . . +// . . <-s[1].ShouldStop() . . +// . . . s[2].mu.Lock() [L1[2]] . +// . . . s[2].drain.Add() [1] . +// . . . s[2].mu.Unlock() [L1[2]] . +// . . . <-s[2].ShouldStop() . +// ----------------------------------------------------G1, G2.[0..2], G3 leak------------------------------------------------ diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go b/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go new file mode 100644 index 00000000000000..d31ffbc3195f95 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach10790.go @@ -0,0 +1,127 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/10790 + * Buggy version: 96b5452557ebe26bd9d85fe7905155009204d893 + * fix commit-id: f1a5c19125c65129b966fbdc0e6408e8df214aba + * Flaky: 28/100 + * Description: + * It is possible that a message from ctxDone will make the function beginCmds + * returns without draining the channel ch, so that goroutines created by anonymous + * function will leak. + */ + +package main + +import ( + "context" + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Cockroach10790", Cockroach10790) +} + +type Stopper_cockroach10790 struct { + quiescer chan struct{} + mu struct { + sync.Mutex + quiescing bool + } +} + +func (s *Stopper_cockroach10790) ShouldQuiesce() <-chan struct{} { + if s == nil { + return nil + } + return s.quiescer +} + +func (s *Stopper_cockroach10790) Quiesce() { + s.mu.Lock() + defer s.mu.Unlock() + if !s.mu.quiescing { + s.mu.quiescing = true + close(s.quiescer) + } +} + +func (s *Stopper_cockroach10790) Stop() { + s.Quiesce() +} + +type Replica_cockroach10790 struct { + chans []chan bool + stopper *Stopper_cockroach10790 +} + +func (r *Replica_cockroach10790) beginCmds(ctx context.Context) { + ctxDone := ctx.Done() + for _, ch := range r.chans { + select { + case <-ch: + case <-ctxDone: + go func() { + // deadlocks: x > 0 + for _, ch := range r.chans { + <-ch + } + }() + } + } +} + +func (r *Replica_cockroach10790) sendChans(ctx context.Context) { + for _, ch := range r.chans { + select { + case ch <- true: + case <-ctx.Done(): + return + } + } +} + +func NewReplica_cockroach10790() *Replica_cockroach10790 { + r := &Replica_cockroach10790{ + stopper: &Stopper_cockroach10790{ + quiescer: make(chan struct{}), + }, + } + r.chans = append(r.chans, make(chan bool)) + r.chans = append(r.chans, make(chan bool)) + return r +} + +/// +/// G1 G2 helper goroutine +/// r.sendChans() +/// r.beginCmds() +/// ch1 <- true +/// <- ch1 +/// ch2 <- true +/// ... ... ... +/// cancel() +/// <- ch1 +/// ------------------G1 leak-------------------------- +/// + +func Cockroach10790() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { + r := NewReplica_cockroach10790() + ctx, cancel := context.WithCancel(context.Background()) + go r.sendChans(ctx) // helper goroutine + go r.beginCmds(ctx) // G1 + go cancel() // G2 + r.stopper.Stop() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go new file mode 100644 index 00000000000000..195c911218e85a --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13197.go @@ -0,0 +1,73 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/13197 + * Buggy version: fff27aedabafe20cef57f75905fe340cab48c2a4 + * fix commit-id: 9bf770cd8f6eaff5441b80d3aec1a5614e8747e1 + * Flaky: 100/100 + * Description: One goroutine executing (*Tx).awaitDone() blocks and + * waiting for a signal context.Done(). + */ +package main + +import ( + "context" + "os" + "runtime" + "runtime/pprof" +) + +func init() { + register("Cockroach13197", Cockroach13197) +} + +type DB_cockroach13197 struct{} + +func (db *DB_cockroach13197) begin(ctx context.Context) *Tx_cockroach13197 { + ctx, cancel := context.WithCancel(ctx) + tx := &Tx_cockroach13197{ + cancel: cancel, + ctx: ctx, + } + // deadlocks: 1 + go tx.awaitDone() // G2 + return tx +} + +type Tx_cockroach13197 struct { + cancel context.CancelFunc + ctx context.Context +} + +func (tx *Tx_cockroach13197) awaitDone() { + <-tx.ctx.Done() +} + +func (tx *Tx_cockroach13197) Rollback() { + tx.rollback() +} + +func (tx *Tx_cockroach13197) rollback() { + tx.close() +} + +func (tx *Tx_cockroach13197) close() { + tx.cancel() +} + +/// G1 G2 +/// begin() +/// awaitDone() +/// <-tx.ctx.Done() +/// return +/// -----------G2 leak------------- + +func Cockroach13197() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + db := &DB_cockroach13197{} + db.begin(context.Background()) // G1 +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go new file mode 100644 index 00000000000000..063446efe02782 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach13755.go @@ -0,0 +1,60 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/13755 + * Buggy version: 7acb881bbb8f23e87b69fce9568d9a3316b5259c + * fix commit-id: ef906076adc1d0e3721944829cfedfed51810088 + * Flaky: 100/100 + * Description: The buggy code does not close the db query result (rows), + * so that one goroutine running (*Rows).awaitDone is blocked forever. + * The blocking goroutine is waiting for cancel signal from context. + */ + +package main + +import ( + "context" + "os" + "runtime" + "runtime/pprof" +) + +func init() { + register("Cockroach13755", Cockroach13755) +} + +type Rows_cockroach13755 struct { + cancel context.CancelFunc +} + +func (rs *Rows_cockroach13755) initContextClose(ctx context.Context) { + ctx, rs.cancel = context.WithCancel(ctx) + // deadlocks: 1 + go rs.awaitDone(ctx) +} + +func (rs *Rows_cockroach13755) awaitDone(ctx context.Context) { + <-ctx.Done() + rs.close(ctx.Err()) +} + +func (rs *Rows_cockroach13755) close(err error) { + rs.cancel() +} + +/// G1 G2 +/// initContextClose() +/// awaitDone() +/// <-tx.ctx.Done() +/// return +/// ---------------G2 leak----------------- + +func Cockroach13755() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + rs := &Rows_cockroach13755{} + rs.initContextClose(context.Background()) +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go b/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go new file mode 100644 index 00000000000000..30e2a8a11ee93f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach1462.go @@ -0,0 +1,187 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Cockroach1462", Cockroach1462) +} + +type Stopper_cockroach1462 struct { + stopper chan struct{} + stopped chan struct{} + stop sync.WaitGroup + mu sync.Mutex + drain *sync.Cond + draining bool + numTasks int +} + +func NewStopper_cockroach1462() *Stopper_cockroach1462 { + s := &Stopper_cockroach1462{ + stopper: make(chan struct{}), + stopped: make(chan struct{}), + } + s.drain = sync.NewCond(&s.mu) + return s +} + +func (s *Stopper_cockroach1462) RunWorker(f func()) { + s.AddWorker() + go func() { // G2, G3 + defer s.SetStopped() + // deadlocks: x > 0 + f() + }() +} + +func (s *Stopper_cockroach1462) AddWorker() { + s.stop.Add(1) +} +func (s *Stopper_cockroach1462) StartTask() bool { + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + if s.draining { + return false + } + s.numTasks++ + return true +} + +func (s *Stopper_cockroach1462) FinishTask() { + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + s.numTasks-- + s.drain.Broadcast() +} +func (s *Stopper_cockroach1462) SetStopped() { + if s != nil { + s.stop.Done() + } +} +func (s *Stopper_cockroach1462) ShouldStop() <-chan struct{} { + if s == nil { + return nil + } + return s.stopper +} + +func (s *Stopper_cockroach1462) Quiesce() { + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + s.draining = true + for s.numTasks > 0 { + // Unlock s.mu, wait for the signal, and lock s.mu. + s.drain.Wait() + } +} + +func (s *Stopper_cockroach1462) Stop() { + s.Quiesce() + close(s.stopper) + s.stop.Wait() + s.mu.Lock() + runtime.Gosched() + defer s.mu.Unlock() + close(s.stopped) +} + +type interceptMessage_cockroach1462 int + +type localInterceptableTransport_cockroach1462 struct { + mu sync.Mutex + Events chan interceptMessage_cockroach1462 + stopper *Stopper_cockroach1462 +} + +func (lt *localInterceptableTransport_cockroach1462) Close() {} + +type Transport_cockroach1462 interface { + Close() +} + +func NewLocalInterceptableTransport_cockroach1462(stopper *Stopper_cockroach1462) Transport_cockroach1462 { + lt := &localInterceptableTransport_cockroach1462{ + Events: make(chan interceptMessage_cockroach1462), + stopper: stopper, + } + lt.start() + return lt +} + +func (lt *localInterceptableTransport_cockroach1462) start() { + lt.stopper.RunWorker(func() { + for { + select { + case <-lt.stopper.ShouldStop(): + return + default: + lt.Events <- interceptMessage_cockroach1462(0) + } + } + }) +} + +func processEventsUntil_cockroach1462(ch <-chan interceptMessage_cockroach1462, stopper *Stopper_cockroach1462) { + for { + select { + case _, ok := <-ch: + runtime.Gosched() + if !ok { + return + } + case <-stopper.ShouldStop(): + return + } + } +} + +func Cockroach1462() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(2000 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i <= 1000; i++ { + go func() { // G1 + // deadlocks: x > 0 + stopper := NewStopper_cockroach1462() + transport := NewLocalInterceptableTransport_cockroach1462(stopper).(*localInterceptableTransport_cockroach1462) + stopper.RunWorker(func() { + processEventsUntil_cockroach1462(transport.Events, stopper) + }) + stopper.Stop() + }() + } +} + +// Example of a deadlocking trace +// G1 G2 G3 +// --------------------------------------------------------------------------------------------------------------------- +// NewLocalInterceptableTransport() +// lt.start() +// lt.stopper.RunWorker() +// s.AddWorker() +// s.stop.Add(1) [1] +// go func() [G2] +// stopper.RunWorker() . +// s.AddWorker() . +// s.stop.Add(1) [2] . +// go func() [G3] . +// s.Stop() . . +// s.Quiesce() . . +// . select [default] . +// . lt.Events <- interceptMessage(0) . +// close(s.stopper) . . +// . . select [<-stopper.ShouldStop()] +// . . <<>> +// s.stop.Wait() . +// -----------------------------------------------------G1,G2 leak------------------------------------------------------ diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go b/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go new file mode 100644 index 00000000000000..52e5ec692a4b78 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach16167.go @@ -0,0 +1,124 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/16167 + * Buggy version: 36fa784aa846b46c29e077634c4e362635f6e74a + * fix commit-id: d064942b067ab84628f79cbfda001fa3138d8d6e + * Flaky: 1/100 + * Description: + * This is another example for deadlock caused by recursively + * acquiring RWLock. There are two lock variables (systemConfigCond and systemConfigMu) + * involved in this bug, but they are actually the same lock, which can be found from + * the following code. + * There are two goroutine involved in this deadlock. The first goroutine acquires + * systemConfigMu.Lock() firstly, then tries to acquire systemConfigMu.RLock(). The + * second goroutine tries to acquire systemConfigMu.Lock(). If the second goroutine + * interleaves in between the two lock operations of the first goroutine, deadlock will happen. + */ + +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Cockroach16167", Cockroach16167) +} + +type PreparedStatements_cockroach16167 struct { + session *Session_cockroach16167 +} + +func (ps PreparedStatements_cockroach16167) New(e *Executor_cockroach16167) { + e.Prepare(ps.session) +} + +type Session_cockroach16167 struct { + PreparedStatements PreparedStatements_cockroach16167 +} + +func (s *Session_cockroach16167) resetForBatch(e *Executor_cockroach16167) { + e.getDatabaseCache() +} + +type Executor_cockroach16167 struct { + systemConfigCond *sync.Cond + systemConfigMu sync.RWMutex +} + +func (e *Executor_cockroach16167) Start() { + e.updateSystemConfig() +} + +func (e *Executor_cockroach16167) execParsed(session *Session_cockroach16167) { + e.systemConfigCond.L.Lock() // Same as e.systemConfigMu.RLock() + runtime.Gosched() + defer e.systemConfigCond.L.Unlock() + runTxnAttempt_cockroach16167(e, session) +} + +func (e *Executor_cockroach16167) execStmtsInCurrentTxn(session *Session_cockroach16167) { + e.execStmtInOpenTxn(session) +} + +func (e *Executor_cockroach16167) execStmtInOpenTxn(session *Session_cockroach16167) { + session.PreparedStatements.New(e) +} + +func (e *Executor_cockroach16167) Prepare(session *Session_cockroach16167) { + session.resetForBatch(e) +} + +func (e *Executor_cockroach16167) getDatabaseCache() { + e.systemConfigMu.RLock() + defer e.systemConfigMu.RUnlock() +} + +func (e *Executor_cockroach16167) updateSystemConfig() { + e.systemConfigMu.Lock() + runtime.Gosched() + defer e.systemConfigMu.Unlock() +} + +func runTxnAttempt_cockroach16167(e *Executor_cockroach16167, session *Session_cockroach16167) { + e.execStmtsInCurrentTxn(session) +} + +func NewExectorAndSession_cockroach16167() (*Executor_cockroach16167, *Session_cockroach16167) { + session := &Session_cockroach16167{} + session.PreparedStatements = PreparedStatements_cockroach16167{session} + e := &Executor_cockroach16167{} + return e, session +} + +/// G1 G2 +/// e.Start() +/// e.updateSystemConfig() +/// e.execParsed() +/// e.systemConfigCond.L.Lock() +/// e.systemConfigMu.Lock() +/// e.systemConfigMu.RLock() +/// ----------------------G1,G2 deadlock-------------------- + +func Cockroach16167() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + e, s := NewExectorAndSession_cockroach16167() + e.systemConfigCond = sync.NewCond(e.systemConfigMu.RLocker()) + // deadlocks: x > 0 + go e.Start() // G1 + e.execParsed(s) // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go b/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go new file mode 100644 index 00000000000000..716d6c2b12524f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach18101.go @@ -0,0 +1,75 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/18101 + * Buggy version: f7a8e2f57b6bcf00b9abaf3da00598e4acd3a57f + * fix commit-id: 822bd176cc725c6b50905ea615023200b395e14f + * Flaky: 100/100 + * Description: + * context.Done() signal only stops the goroutine who pulls data + * from a channel, while does not stops goroutines which send data + * to the channel. This causes all goroutines trying to send data + * through the channel to block. + */ + +package main + +import ( + "context" + "os" + "runtime/pprof" + "time" +) + +func init() { + register("Cockroach18101", Cockroach18101) +} + +const chanSize_cockroach18101 = 6 + +func restore_cockroach18101(ctx context.Context) bool { + readyForImportCh := make(chan bool, chanSize_cockroach18101) + go func() { // G2 + defer close(readyForImportCh) + // deadlocks: x > 0 + splitAndScatter_cockroach18101(ctx, readyForImportCh) + }() + for readyForImportSpan := range readyForImportCh { + select { + case <-ctx.Done(): + return readyForImportSpan + } + } + return true +} + +func splitAndScatter_cockroach18101(ctx context.Context, readyForImportCh chan bool) { + for i := 0; i < chanSize_cockroach18101+2; i++ { + readyForImportCh <- (false || i != 0) + } +} + +/// +/// G1 G2 helper goroutine +/// restore() +/// splitAndScatter() +/// <-readyForImportCh +/// readyForImportCh<- +/// ... ... +/// cancel() +/// return +/// readyForImportCh<- +/// -----------------------G2 leak------------------------- +/// + +func Cockroach18101() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 100; i++ { + ctx, cancel := context.WithCancel(context.Background()) + go restore_cockroach18101(ctx) // G1 + go cancel() // helper goroutine + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go b/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go new file mode 100644 index 00000000000000..aec73f5dfac5d0 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach2448.go @@ -0,0 +1,139 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "time" +) + +func init() { + register("Cockroach2448", Cockroach2448) +} + +type Stopper_cockroach2448 struct { + Done chan bool +} + +func (s *Stopper_cockroach2448) ShouldStop() <-chan bool { + return s.Done +} + +type EventMembershipChangeCommitted_cockroach2448 struct { + Callback func() +} +type MultiRaft_cockroach2448 struct { + stopper *Stopper_cockroach2448 + Events chan interface{} + callbackChan chan func() +} + +// sendEvent can be invoked many times +func (m *MultiRaft_cockroach2448) sendEvent(event interface{}) { + select { + case m.Events <- event: // Waiting for events consumption + case <-m.stopper.ShouldStop(): + } +} + +type state_cockroach2448 struct { + *MultiRaft_cockroach2448 +} + +func (s *state_cockroach2448) start() { + for { + select { + case <-s.stopper.ShouldStop(): + return + case cb := <-s.callbackChan: + cb() + default: + s.handleWriteResponse() + time.Sleep(time.Millisecond) + } + } +} + +func (s *state_cockroach2448) handleWriteResponse() { + s.sendEvent(&EventMembershipChangeCommitted_cockroach2448{ + Callback: func() { + select { + case s.callbackChan <- func() { // Waiting for callbackChan consumption + time.Sleep(time.Nanosecond) + }: + case <-s.stopper.ShouldStop(): + } + }, + }) +} + +type Store_cockroach2448 struct { + multiraft *MultiRaft_cockroach2448 +} + +func (s *Store_cockroach2448) processRaft() { + for { + select { + case e := <-s.multiraft.Events: + switch e := e.(type) { + case *EventMembershipChangeCommitted_cockroach2448: + callback := e.Callback + runtime.Gosched() + if callback != nil { + callback() // Waiting for callbackChan consumption + } + } + case <-s.multiraft.stopper.ShouldStop(): + return + } + } +} + +func NewStoreAndState_cockroach2448() (*Store_cockroach2448, *state_cockroach2448) { + stopper := &Stopper_cockroach2448{ + Done: make(chan bool), + } + mltrft := &MultiRaft_cockroach2448{ + stopper: stopper, + Events: make(chan interface{}), + callbackChan: make(chan func()), + } + st := &state_cockroach2448{mltrft} + s := &Store_cockroach2448{mltrft} + return s, st +} + +func Cockroach2448() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(time.Second) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 1000; i++ { + go func() { + s, st := NewStoreAndState_cockroach2448() + // deadlocks: x > 0 + go s.processRaft() // G1 + // deadlocks: x > 0 + go st.start() // G2 + }() + } +} + +// Example of deadlock trace: +// +// G1 G2 +// -------------------------------------------------------------------------------------------------- +// s.processRaft() st.start() +// select . +// . select [default] +// . s.handleWriteResponse() +// . s.sendEvent() +// . select +// <-s.multiraft.Events <----> m.Events <- event +// . select [default] +// . s.handleWriteResponse() +// . s.sendEvent() +// . select [m.Events<-, <-s.stopper.ShouldStop()] +// callback() +// select [m.callbackChan<-,<-s.stopper.ShouldStop()] . diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go new file mode 100644 index 00000000000000..9621a91e52b5bb --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach24808.go @@ -0,0 +1,83 @@ +package main + +import ( + "context" + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Cockroach24808", Cockroach24808) +} + +type Compactor_cockroach24808 struct { + ch chan struct{} +} + +type Stopper_cockroach24808 struct { + stop sync.WaitGroup + stopper chan struct{} +} + +func (s *Stopper_cockroach24808) RunWorker(ctx context.Context, f func(context.Context)) { + s.stop.Add(1) + go func() { + defer s.stop.Done() + f(ctx) + }() +} + +func (s *Stopper_cockroach24808) ShouldStop() <-chan struct{} { + if s == nil { + return nil + } + return s.stopper +} + +func (s *Stopper_cockroach24808) Stop() { + close(s.stopper) +} + +func NewStopper_cockroach24808() *Stopper_cockroach24808 { + s := &Stopper_cockroach24808{ + stopper: make(chan struct{}), + } + return s +} + +func NewCompactor_cockroach24808() *Compactor_cockroach24808 { + return &Compactor_cockroach24808{ch: make(chan struct{}, 1)} +} + +func (c *Compactor_cockroach24808) Start(ctx context.Context, stopper *Stopper_cockroach24808) { + c.ch <- struct{}{} + stopper.RunWorker(ctx, func(ctx context.Context) { + for { + select { + case <-stopper.ShouldStop(): + return + case <-c.ch: + } + } + }) +} + +func Cockroach24808() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + // deadlocks: 1 + stopper := NewStopper_cockroach24808() + defer stopper.Stop() + + compactor := NewCompactor_cockroach24808() + compactor.ch <- struct{}{} + + compactor.Start(context.Background(), stopper) + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go new file mode 100644 index 00000000000000..dfda17c1b9172e --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach25456.go @@ -0,0 +1,93 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" +) + +func init() { + register("Cockroach25456", Cockroach25456) +} + +type Stopper_cockroach25456 struct { + quiescer chan struct{} +} + +func (s *Stopper_cockroach25456) ShouldQuiesce() <-chan struct{} { + if s == nil { + return nil + } + return s.quiescer +} + +func NewStopper_cockroach25456() *Stopper_cockroach25456 { + return &Stopper_cockroach25456{quiescer: make(chan struct{})} +} + +type Store_cockroach25456 struct { + stopper *Stopper_cockroach25456 + consistencyQueue *consistencyQueue_cockroach25456 +} + +func (s *Store_cockroach25456) Stopper() *Stopper_cockroach25456 { + return s.stopper +} +func (s *Store_cockroach25456) Start(stopper *Stopper_cockroach25456) { + s.stopper = stopper +} + +func NewStore_cockroach25456() *Store_cockroach25456 { + return &Store_cockroach25456{ + consistencyQueue: newConsistencyQueue_cockroach25456(), + } +} + +type Replica_cockroach25456 struct { + store *Store_cockroach25456 +} + +func NewReplica_cockroach25456(store *Store_cockroach25456) *Replica_cockroach25456 { + return &Replica_cockroach25456{store: store} +} + +type consistencyQueue_cockroach25456 struct{} + +func (q *consistencyQueue_cockroach25456) process(repl *Replica_cockroach25456) { + <-repl.store.Stopper().ShouldQuiesce() +} + +func newConsistencyQueue_cockroach25456() *consistencyQueue_cockroach25456 { + return &consistencyQueue_cockroach25456{} +} + +type testContext_cockroach25456 struct { + store *Store_cockroach25456 + repl *Replica_cockroach25456 +} + +func (tc *testContext_cockroach25456) StartWithStoreConfig(stopper *Stopper_cockroach25456) { + if tc.store == nil { + tc.store = NewStore_cockroach25456() + } + tc.store.Start(stopper) + tc.repl = NewReplica_cockroach25456(tc.store) +} + +func Cockroach25456() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + // deadlocks: 1 + stopper := NewStopper_cockroach25456() + tc := testContext_cockroach25456{} + tc.StartWithStoreConfig(stopper) + + for i := 0; i < 2; i++ { + tc.store.consistencyQueue.process(tc.repl) + } + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go new file mode 100644 index 00000000000000..0ffc6609f9aa30 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35073.go @@ -0,0 +1,119 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "sync/atomic" +) + +func init() { + register("Cockroach35073", Cockroach35073) +} + +type ConsumerStatus_cockroach35073 uint32 + +const ( + NeedMoreRows_cockroach35073 ConsumerStatus_cockroach35073 = iota + DrainRequested_cockroach35073 + ConsumerClosed_cockroach35073 +) + +const rowChannelBufSize_cockroach35073 = 16 +const outboxBufRows_cockroach35073 = 16 + +type rowSourceBase_cockroach35073 struct { + consumerStatus ConsumerStatus_cockroach35073 +} + +func (rb *rowSourceBase_cockroach35073) consumerClosed() { + atomic.StoreUint32((*uint32)(&rb.consumerStatus), uint32(ConsumerClosed_cockroach35073)) +} + +type RowChannelMsg_cockroach35073 int + +type RowChannel_cockroach35073 struct { + rowSourceBase_cockroach35073 + dataChan chan RowChannelMsg_cockroach35073 +} + +func (rc *RowChannel_cockroach35073) ConsumerClosed() { + rc.consumerClosed() + select { + case <-rc.dataChan: + default: + } +} + +func (rc *RowChannel_cockroach35073) Push() ConsumerStatus_cockroach35073 { + consumerStatus := ConsumerStatus_cockroach35073( + atomic.LoadUint32((*uint32)(&rc.consumerStatus))) + switch consumerStatus { + case NeedMoreRows_cockroach35073: + rc.dataChan <- RowChannelMsg_cockroach35073(0) + case DrainRequested_cockroach35073: + case ConsumerClosed_cockroach35073: + } + return consumerStatus +} + +func (rc *RowChannel_cockroach35073) InitWithNumSenders() { + rc.initWithBufSizeAndNumSenders(rowChannelBufSize_cockroach35073) +} + +func (rc *RowChannel_cockroach35073) initWithBufSizeAndNumSenders(chanBufSize int) { + rc.dataChan = make(chan RowChannelMsg_cockroach35073, chanBufSize) +} + +type outbox_cockroach35073 struct { + RowChannel_cockroach35073 +} + +func (m *outbox_cockroach35073) init() { + m.RowChannel_cockroach35073.InitWithNumSenders() +} + +func (m *outbox_cockroach35073) start(wg *sync.WaitGroup) { + if wg != nil { + wg.Add(1) + } + go m.run(wg) +} + +func (m *outbox_cockroach35073) run(wg *sync.WaitGroup) { + if wg != nil { + wg.Done() + } +} + +func Cockroach35073() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + // deadlocks: 1 + outbox := &outbox_cockroach35073{} + outbox.init() + + var wg sync.WaitGroup + for i := 0; i < outboxBufRows_cockroach35073; i++ { + outbox.Push() + } + + var blockedPusherWg sync.WaitGroup + blockedPusherWg.Add(1) + go func() { + // deadlocks: 1 + outbox.Push() + blockedPusherWg.Done() + }() + + outbox.start(&wg) + + wg.Wait() + outbox.RowChannel_cockroach35073.Push() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go new file mode 100644 index 00000000000000..edbcdc3720d373 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach35931.go @@ -0,0 +1,129 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Cockroach35931", Cockroach35931) +} + +type RowReceiver_cockroach35931 interface { + Push() +} + +type inboundStreamInfo_cockroach35931 struct { + receiver RowReceiver_cockroach35931 +} + +type RowChannel_cockroach35931 struct { + dataChan chan struct{} +} + +func (rc *RowChannel_cockroach35931) Push() { + // The buffer size can be either 0 or 1 when this function is entered. + // We need context sensitivity or a path-condition on the buffer size + // to find this bug. + rc.dataChan <- struct{}{} +} + +func (rc *RowChannel_cockroach35931) initWithBufSizeAndNumSenders(chanBufSize int) { + rc.dataChan = make(chan struct{}, chanBufSize) +} + +type flowEntry_cockroach35931 struct { + flow *Flow_cockroach35931 + inboundStreams map[int]*inboundStreamInfo_cockroach35931 +} + +type flowRegistry_cockroach35931 struct { + sync.Mutex + flows map[int]*flowEntry_cockroach35931 +} + +func (fr *flowRegistry_cockroach35931) getEntryLocked(id int) *flowEntry_cockroach35931 { + entry, ok := fr.flows[id] + if !ok { + entry = &flowEntry_cockroach35931{} + fr.flows[id] = entry + } + return entry +} + +func (fr *flowRegistry_cockroach35931) cancelPendingStreamsLocked(id int) []RowReceiver_cockroach35931 { + entry := fr.flows[id] + pendingReceivers := make([]RowReceiver_cockroach35931, 0) + for _, is := range entry.inboundStreams { + pendingReceivers = append(pendingReceivers, is.receiver) + } + return pendingReceivers +} + +type Flow_cockroach35931 struct { + id int + flowRegistry *flowRegistry_cockroach35931 + inboundStreams map[int]*inboundStreamInfo_cockroach35931 +} + +func (f *Flow_cockroach35931) cancel() { + f.flowRegistry.Lock() + timedOutReceivers := f.flowRegistry.cancelPendingStreamsLocked(f.id) + f.flowRegistry.Unlock() + + for _, receiver := range timedOutReceivers { + receiver.Push() + } +} + +func (fr *flowRegistry_cockroach35931) RegisterFlow(f *Flow_cockroach35931, inboundStreams map[int]*inboundStreamInfo_cockroach35931) { + entry := fr.getEntryLocked(f.id) + entry.flow = f + entry.inboundStreams = inboundStreams +} + +func makeFlowRegistry_cockroach35931() *flowRegistry_cockroach35931 { + return &flowRegistry_cockroach35931{ + flows: make(map[int]*flowEntry_cockroach35931), + } +} + +func Cockroach35931() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + // deadlocks: 1 + fr := makeFlowRegistry_cockroach35931() + + left := &RowChannel_cockroach35931{} + left.initWithBufSizeAndNumSenders(1) + right := &RowChannel_cockroach35931{} + right.initWithBufSizeAndNumSenders(1) + + inboundStreams := map[int]*inboundStreamInfo_cockroach35931{ + 0: { + receiver: left, + }, + 1: { + receiver: right, + }, + } + + left.Push() + + flow := &Flow_cockroach35931{ + id: 0, + flowRegistry: fr, + inboundStreams: inboundStreams, + } + + fr.RegisterFlow(flow, inboundStreams) + + flow.cancel() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go b/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go new file mode 100644 index 00000000000000..4c5c215b0ff854 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach3710.go @@ -0,0 +1,135 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/3710 + * Buggy version: 4afdd4860fd7c3bd9e92489f84a95e5cc7d11a0d + * fix commit-id: cb65190f9caaf464723e7d072b1f1b69a044ef7b + * Flaky: 2/100 + * Description: This deadlock is casued by acquiring a RLock twice in a call chain. + * ForceRaftLogScanAndProcess(acquire s.mu.RLock()) ->MaybeAdd()->shouldQueue()-> + * getTruncatableIndexes()->RaftStatus(acquire s.mu.Rlock()) + */ + +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" + "unsafe" +) + +func init() { + register("Cockroach3710", Cockroach3710) +} + +type Store_cockroach3710 struct { + raftLogQueue *baseQueue + replicas map[int]*Replica_cockroach3710 + + mu struct { + sync.RWMutex + } +} + +func (s *Store_cockroach3710) ForceRaftLogScanAndProcess() { + s.mu.RLock() + runtime.Gosched() + for _, r := range s.replicas { + s.raftLogQueue.MaybeAdd(r) + } + s.mu.RUnlock() +} + +func (s *Store_cockroach3710) RaftStatus() { + s.mu.RLock() + defer s.mu.RUnlock() +} + +func (s *Store_cockroach3710) processRaft() { + go func() { + // deadlocks: x > 0 + for { + var replicas []*Replica_cockroach3710 + s.mu.Lock() + for _, r := range s.replicas { + replicas = append(replicas, r) + } + s.mu.Unlock() + break + } + }() +} + +type Replica_cockroach3710 struct { + store *Store_cockroach3710 +} + +type baseQueue struct { + sync.Mutex + impl *raftLogQueue +} + +func (bq *baseQueue) MaybeAdd(repl *Replica_cockroach3710) { + bq.Lock() + defer bq.Unlock() + bq.impl.shouldQueue(repl) +} + +type raftLogQueue struct{} + +func (*raftLogQueue) shouldQueue(r *Replica_cockroach3710) { + getTruncatableIndexes(r) +} + +func getTruncatableIndexes(r *Replica_cockroach3710) { + r.store.RaftStatus() +} + +func NewStore_cockroach3710() *Store_cockroach3710 { + rlq := &raftLogQueue{} + bq := &baseQueue{impl: rlq} + store := &Store_cockroach3710{ + raftLogQueue: bq, + replicas: make(map[int]*Replica_cockroach3710), + } + r1 := &Replica_cockroach3710{store} + r2 := &Replica_cockroach3710{store} + + makeKey := func(r *Replica_cockroach3710) int { + return int((uintptr(unsafe.Pointer(r)) >> 1) % 7) + } + store.replicas[makeKey(r1)] = r1 + store.replicas[makeKey(r2)] = r2 + + return store +} + +/// G1 G2 +/// store.ForceRaftLogScanAndProcess() +/// s.mu.RLock() +/// s.raftLogQueue.MaybeAdd() +/// bq.impl.shouldQueue() +/// getTruncatableIndexes() +/// r.store.RaftStatus() +/// store.processRaft() +/// s.mu.Lock() +/// s.mu.RLock() +/// ----------------------G1,G2 deadlock--------------------- + +func Cockroach3710() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 10000; i++ { + go func() { + store := NewStore_cockroach3710() + // deadlocks: x > 0 + go store.ForceRaftLogScanAndProcess() // G1 + go store.processRaft() // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach584.go b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go new file mode 100644 index 00000000000000..cfb9902592791d --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach584.go @@ -0,0 +1,56 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Cockroach584", Cockroach584) +} + +type gossip_cockroach584 struct { + mu sync.Mutex + closed bool +} + +func (g *gossip_cockroach584) bootstrap() { + for { + g.mu.Lock() + if g.closed { + /// Missing g.mu.Unlock + break + } + g.mu.Unlock() + } +} + +func (g *gossip_cockroach584) manage() { + for { + g.mu.Lock() + if g.closed { + /// Missing g.mu.Unlock + break + } + g.mu.Unlock() + } +} + +func Cockroach584() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + g := &gossip_cockroach584{ + closed: true, + } + go func() { + // deadlocks: 1 + g.bootstrap() + g.manage() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go b/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go new file mode 100644 index 00000000000000..9a6b9354f5e144 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach6181.go @@ -0,0 +1,103 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/6181 + * Buggy version: c0a232b5521565904b851699853bdbd0c670cf1e + * fix commit-id: d5814e4886a776bf7789b3c51b31f5206480d184 + * Flaky: 57/100 + */ +package main + +import ( + "io" + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Cockroach6181", Cockroach6181) +} + +type testDescriptorDB_cockroach6181 struct { + cache *rangeDescriptorCache_cockroach6181 +} + +func initTestDescriptorDB_cockroach6181() *testDescriptorDB_cockroach6181 { + return &testDescriptorDB_cockroach6181{&rangeDescriptorCache_cockroach6181{}} +} + +type rangeDescriptorCache_cockroach6181 struct { + rangeCacheMu sync.RWMutex +} + +func (rdc *rangeDescriptorCache_cockroach6181) LookupRangeDescriptor() { + rdc.rangeCacheMu.RLock() + runtime.Gosched() + io.Discard.Write([]byte(rdc.String())) + rdc.rangeCacheMu.RUnlock() + rdc.rangeCacheMu.Lock() + rdc.rangeCacheMu.Unlock() +} + +func (rdc *rangeDescriptorCache_cockroach6181) String() string { + rdc.rangeCacheMu.RLock() + defer rdc.rangeCacheMu.RUnlock() + return rdc.stringLocked() +} + +func (rdc *rangeDescriptorCache_cockroach6181) stringLocked() string { + return "something here" +} + +func doLookupWithToken_cockroach6181(rc *rangeDescriptorCache_cockroach6181) { + rc.LookupRangeDescriptor() +} + +func testRangeCacheCoalescedRequests_cockroach6181() { + // deadlocks: x > 0 + db := initTestDescriptorDB_cockroach6181() + pauseLookupResumeAndAssert := func() { + var wg sync.WaitGroup + for i := 0; i < 3; i++ { + wg.Add(1) + go func() { // G2,G3,... + // deadlocks: x > 0 + doLookupWithToken_cockroach6181(db.cache) + wg.Done() + }() + } + wg.Wait() + } + pauseLookupResumeAndAssert() +} + +/// G1 G2 G3 ... +/// testRangeCacheCoalescedRquests() +/// initTestDescriptorDB() +/// pauseLookupResumeAndAssert() +/// return +/// doLookupWithToken() +/// doLookupWithToken() +/// rc.LookupRangeDescriptor() +/// rc.LookupRangeDescriptor() +/// rdc.rangeCacheMu.RLock() +/// rdc.String() +/// rdc.rangeCacheMu.RLock() +/// fmt.Printf() +/// rdc.rangeCacheMu.RUnlock() +/// rdc.rangeCacheMu.Lock() +/// rdc.rangeCacheMu.RLock() +/// -------------------------------------G2,G3,... deadlock-------------------------------------- + +func Cockroach6181() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 100; i++ { + go testRangeCacheCoalescedRequests_cockroach6181() // G1 + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go b/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go new file mode 100644 index 00000000000000..1de6987b673b85 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach7504.go @@ -0,0 +1,199 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/7504 + * Buggy version: bc963b438cdc3e0ad058a5282358e5aee0595e17 + * fix commit-id: cab761b9f5ee5dee1448bc5d6b1d9f5a0ff0bad5 + * Flaky: 1/100 + * Description: There are locking leaseState, tableNameCache in Release(), but + * tableNameCache,LeaseState in AcquireByName. It is AB and BA deadlock. + */ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Cockroach7504", Cockroach7504) +} + +func MakeCacheKey_cockroach7504(lease *LeaseState_cockroach7504) int { + return lease.id +} + +type LeaseState_cockroach7504 struct { + mu sync.Mutex // L1 + id int +} +type LeaseSet_cockroach7504 struct { + data []*LeaseState_cockroach7504 +} + +func (l *LeaseSet_cockroach7504) find(id int) *LeaseState_cockroach7504 { + return l.data[id] +} + +func (l *LeaseSet_cockroach7504) remove(s *LeaseState_cockroach7504) { + for i := 0; i < len(l.data); i++ { + if s == l.data[i] { + l.data = append(l.data[:i], l.data[i+1:]...) + break + } + } +} + +type tableState_cockroach7504 struct { + tableNameCache *tableNameCache_cockroach7504 + mu sync.Mutex // L3 + active *LeaseSet_cockroach7504 +} + +func (t *tableState_cockroach7504) release(lease *LeaseState_cockroach7504) { + t.mu.Lock() // L3 + defer t.mu.Unlock() // L3 + + s := t.active.find(MakeCacheKey_cockroach7504(lease)) + s.mu.Lock() // L1 + runtime.Gosched() + defer s.mu.Unlock() // L1 + + t.removeLease(s) +} +func (t *tableState_cockroach7504) removeLease(lease *LeaseState_cockroach7504) { + t.active.remove(lease) + t.tableNameCache.remove(lease) // L1 acquire/release +} + +type tableNameCache_cockroach7504 struct { + mu sync.Mutex // L2 + tables map[int]*LeaseState_cockroach7504 +} + +func (c *tableNameCache_cockroach7504) get(id int) { + c.mu.Lock() // L2 + defer c.mu.Unlock() // L2 + lease, ok := c.tables[id] + if !ok { + return + } + if lease == nil { + panic("nil lease in name cache") + } + lease.mu.Lock() // L1 + defer lease.mu.Unlock() // L1 +} + +func (c *tableNameCache_cockroach7504) remove(lease *LeaseState_cockroach7504) { + c.mu.Lock() // L2 + runtime.Gosched() + defer c.mu.Unlock() // L2 + key := MakeCacheKey_cockroach7504(lease) + existing, ok := c.tables[key] + if !ok { + return + } + if existing == lease { + delete(c.tables, key) + } +} + +type LeaseManager_cockroach7504 struct { + _ [64]byte + tableNames *tableNameCache_cockroach7504 + tables map[int]*tableState_cockroach7504 +} + +func (m *LeaseManager_cockroach7504) AcquireByName(id int) { + m.tableNames.get(id) +} + +func (m *LeaseManager_cockroach7504) findTableState(lease *LeaseState_cockroach7504) *tableState_cockroach7504 { + existing, ok := m.tables[lease.id] + if !ok { + return nil + } + return existing +} + +func (m *LeaseManager_cockroach7504) Release(lease *LeaseState_cockroach7504) { + t := m.findTableState(lease) + t.release(lease) +} +func NewLeaseManager_cockroach7504(tname *tableNameCache_cockroach7504, ts *tableState_cockroach7504) *LeaseManager_cockroach7504 { + mgr := &LeaseManager_cockroach7504{ + tableNames: tname, + tables: make(map[int]*tableState_cockroach7504), + } + mgr.tables[0] = ts + return mgr +} +func NewLeaseSet_cockroach7504(n int) *LeaseSet_cockroach7504 { + lset := &LeaseSet_cockroach7504{} + for i := 0; i < n; i++ { + lease := new(LeaseState_cockroach7504) + lset.data = append(lset.data, lease) + } + return lset +} + +func Cockroach7504() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 100; i++ { + go func() { + leaseNum := 2 + lset := NewLeaseSet_cockroach7504(leaseNum) + + nc := &tableNameCache_cockroach7504{ + tables: make(map[int]*LeaseState_cockroach7504), + } + for i := 0; i < leaseNum; i++ { + nc.tables[i] = lset.find(i) + } + + ts := &tableState_cockroach7504{ + tableNameCache: nc, + active: lset, + } + + mgr := NewLeaseManager_cockroach7504(nc, ts) + + // G1 + go func() { + // deadlocks: x > 0 + // lock L2-L1 + mgr.AcquireByName(0) + }() + + // G2 + go func() { + // deadlocks: x > 0 + // lock L1-L2 + mgr.Release(lset.find(0)) + }() + }() + } +} + +// Example deadlock trace: +// +// G1 G2 +// ------------------------------------------------------------------------------------------------ +// mgr.AcquireByName(0) mgr.Release(lset.find(0)) +// m.tableNames.get(id) . +// c.mu.Lock() [L2] . +// . t.release(lease) +// . t.mu.Lock() [L3] +// . s.mu.Lock() [L1] +// lease.mu.Lock() [L1] . +// . t.removeLease(s) +// . t.tableNameCache.remove(lease) +// . c.mu.Lock() [L2] +// ---------------------------------------G1, G2 leak---------------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go b/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go new file mode 100644 index 00000000000000..8e1914b0689056 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/cockroach9935.go @@ -0,0 +1,60 @@ +/* + * Project: cockroach + * Issue or PR : https://github.com/cockroachdb/cockroach/pull/9935 + * Buggy version: 4df302cc3f03328395dc3fefbfba58b7718e4f2f + * fix commit-id: ed6a100ba38dd51b0888b9a3d3ac6bdbb26c528c + * Flaky: 100/100 + * Description: This bug is caused by acquiring l.mu.Lock() twice. The fix is + * to release l.mu.Lock() before acquiring l.mu.Lock for the second time. + */ +package main + +import ( + "errors" + "math/rand" + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Cockroach9935", Cockroach9935) +} + +type loggingT_cockroach9935 struct { + mu sync.Mutex +} + +func (l *loggingT_cockroach9935) outputLogEntry() { + l.mu.Lock() + if err := l.createFile(); err != nil { + l.exit(err) + } + l.mu.Unlock() +} +func (l *loggingT_cockroach9935) createFile() error { + if rand.Intn(8)%4 > 0 { + return errors.New("") + } + return nil +} +func (l *loggingT_cockroach9935) exit(err error) { + l.mu.Lock() + defer l.mu.Unlock() +} +func Cockroach9935() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { + l := &loggingT_cockroach9935{} + // deadlocks: x > 0 + go l.outputLogEntry() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/commonpatterns.go b/src/runtime/testdata/testgoroutineleakgc/commonpatterns.go new file mode 100644 index 00000000000000..295e5f18520b73 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/commonpatterns.go @@ -0,0 +1,282 @@ +package main + +import ( + "context" + "fmt" + "os" + "runtime" + "runtime/pprof" + "time" +) + +// Common goroutine leak patterns. Extracted from: +// "Unveiling and Vanquishing Goroutine Leaks in Enterprise Microservices: A Dynamic Analysis Approach" +// doi:10.1109/CGO57630.2024.10444835 +// +// Tests in this file are not flaky iff. the test is run with GOMAXPROCS=1. +// The main goroutine forcefully yields via `runtime.Gosched()` before +// running the profiler. This moves them to the back of the run queue, +// allowing the leaky goroutines to be scheduled beforehand and get stuck. + +func init() { + register("NoCloseRange", NoCloseRange) + register("MethodContractViolation", MethodContractViolation) + register("DoubleSend", DoubleSend) + register("EarlyReturn", EarlyReturn) + register("NCastLeak", NCastLeak) + register("Timeout", Timeout) +} + +// Incoming list of items and the number of workers. +func noCloseRange(list []any, workers int) { + ch := make(chan any) + + // Create each worker + for i := 0; i < workers; i++ { + go func() { + // deadlocks: 10 + + // Each worker waits for an item and processes it. + for item := range ch { + // Process each item + _ = item + } + }() + } + + // Send each item to one of the workers. + for _, item := range list { + // Sending can deadlock if workers == 0 or if one of the workers panics + ch <- item + } + // The channel is never closed, so workers deadlock once there are no more + // items left to process. +} + +func NoCloseRange() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + go noCloseRange([]any{1, 2, 3}, 0) + go noCloseRange([]any{1, 2, 3}, 3) +} + +// A worker processes items pushed to `ch` one by one in the background. +// When the worker is no longer needed, it must be closed with `Stop`. +// +// Specifications: +// +// A worker may be started any number of times, but must be stopped only once. +// Stopping a worker multiple times will lead to a close panic. +// Any worker that is started must eventually be stopped. +// Failing to stop a worker results in a goroutine leak +type worker struct { + ch chan any + done chan any +} + +// Start spawns a background goroutine that extracts items pushed to the queue. +func (w worker) Start() { + go func() { + // deadlocks: 1 + + for { + select { + case <-w.ch: // Normal workflow + case <-w.done: + return // Shut down + } + } + }() +} + +func (w worker) Stop() { + // Allows goroutine created by Start to terminate + close(w.done) +} + +func (w worker) AddToQueue(item any) { + w.ch <- item +} + +// worker limited in scope by workerLifecycle +func workerLifecycle(items []any) { + // Create a new worker + w := worker{ + ch: make(chan any), + done: make(chan any), + } + // Start worker + w.Start() + + // Operate on worker + for _, item := range items { + w.AddToQueue(item) + } + + runtime.Gosched() + // Exits without calling ’Stop’. Goroutine created by `Start` eventually deadlocks. +} + +func MethodContractViolation() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + workerLifecycle(make([]any, 10)) + runtime.Gosched() +} + +// doubleSend incoming channel must send a message (incoming error simulates an error generated internally). +func doubleSend(ch chan any, err error) { + if err != nil { + // In case of an error, send nil. + ch <- nil + // Return is missing here. + } + // Otherwise, continue with normal behaviour + // This send is still executed in the error case, which may lead to deadlock. + ch <- struct{}{} +} + +func DoubleSend() { + prof := pprof.Lookup("goroutineleak") + ch := make(chan any) + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + go func() { + // deadlocks: 0 + doubleSend(ch, nil) + }() + <-ch + + go func() { + // deadlocks: 1 + doubleSend(ch, fmt.Errorf("error")) + }() + <-ch + + ch1 := make(chan any, 1) + go func() { + // deadlocks: 0 + doubleSend(ch1, fmt.Errorf("error")) + }() + <-ch1 +} + +// earlyReturn demonstrates a common pattern of goroutine leaks. +// A return statement interrupts the evaluation of the parent goroutine before it can consume a message. +// Incoming error simulates an error produced internally. +func earlyReturn(err error) { + // Create a synchronous channel + ch := make(chan any) + + go func() { + // deadlocks: 1 + + // Send something to the channel. + // Deadlocks if the parent goroutine terminates early. + ch <- struct{}{} + }() + + if err != nil { + // Interrupt evaluation of parent early in case of error. + // Sender deadlocks. + return + } + + // Only receive if there is no error. + <-ch +} + +func EarlyReturn() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + go earlyReturn(fmt.Errorf("error")) +} + +// nCastLeak processes a number of items. First result to pass the post is retrieved from the channel queue. +func nCastLeak(items []any) { + // Channel is synchronous. + ch := make(chan any) + + // Iterate over every item + for range items { + go func() { + // deadlocks: 4 + + // Process item and send result to channel + ch <- struct{}{} + // Channel is synchronous: only one sender will synchronise + }() + } + // Retrieve first result. All other senders block. + // Receiver blocks if there are no senders. + <-ch +} + +func NCastLeak() { + prof := pprof.Lookup("goroutineleak") + defer func() { + for i := 0; i < 10; i++ { + // Yield enough times to allow all the leaky goroutines to + // reach the execution point. + runtime.Gosched() + } + prof.WriteTo(os.Stdout, 2) + }() + + go func() { + // deadlocks: 1 + nCastLeak(nil) + }() + + go func() { + nCastLeak(make([]any, 5)) + }() +} + +// A context is provided to short-circuit evaluation, leading +// the sender goroutine to leak. +func timeout(ctx context.Context) { + ch := make(chan any) + + go func() { + // deadlocks: x > 0 + ch <- struct{}{} + }() + + select { + case <-ch: // Receive message + // Sender is released + case <-ctx.Done(): // Context was cancelled or timed out + // Sender is leaked + } +} + +func Timeout() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + for i := 0; i < 100; i++ { + go timeout(ctx) + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd10492.go b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go new file mode 100644 index 00000000000000..bbeb24290829d4 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd10492.go @@ -0,0 +1,66 @@ +package main + +import ( + "context" + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Etcd10492", Etcd10492) +} + +type Checkpointer_etcd10492 func(ctx context.Context) + +type lessor_etcd10492 struct { + mu sync.RWMutex + cp Checkpointer_etcd10492 + checkpointInterval time.Duration +} + +func (le *lessor_etcd10492) Checkpoint() { + le.mu.Lock() // Lock acquired twice here + defer le.mu.Unlock() +} + +func (le *lessor_etcd10492) SetCheckpointer(cp Checkpointer_etcd10492) { + le.mu.Lock() + defer le.mu.Unlock() + + le.cp = cp +} + +func (le *lessor_etcd10492) Renew() { + le.mu.Lock() + unlock := func() { le.mu.Unlock() } + defer func() { unlock() }() + + if le.cp != nil { + le.cp(context.Background()) + } +} + +func Etcd10492() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + go func() { // G1 + // deadlocks: 1 + le := &lessor_etcd10492{ + checkpointInterval: 0, + } + fakerCheckerpointer_etcd10492 := func(ctx context.Context) { + le.Checkpoint() + } + le.SetCheckpointer(fakerCheckerpointer_etcd10492) + le.mu.Lock() + le.mu.Unlock() + le.Renew() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd5509.go b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go new file mode 100644 index 00000000000000..7c87fd64e7bb42 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd5509.go @@ -0,0 +1,121 @@ +package main + +import ( + "context" + "io" + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Etcd5509", Etcd5509) +} + +var ErrConnClosed_etcd5509 error + +type Client_etcd5509 struct { + mu sync.RWMutex + ctx context.Context + cancel context.CancelFunc +} + +func (c *Client_etcd5509) Close() { + c.mu.Lock() + defer c.mu.Unlock() + if c.cancel == nil { + return + } + c.cancel() + c.cancel = nil + c.mu.Unlock() + c.mu.Lock() +} + +type remoteClient_etcd5509 struct { + client *Client_etcd5509 + mu sync.Mutex +} + +func (r *remoteClient_etcd5509) acquire(ctx context.Context) error { + for { + r.client.mu.RLock() + closed := r.client.cancel == nil + r.mu.Lock() + r.mu.Unlock() + if closed { + return ErrConnClosed_etcd5509 // Missing RUnlock before return + } + r.client.mu.RUnlock() + } +} + +type kv_etcd5509 struct { + rc *remoteClient_etcd5509 +} + +func (kv *kv_etcd5509) Get(ctx context.Context) error { + return kv.Do(ctx) +} + +func (kv *kv_etcd5509) Do(ctx context.Context) error { + for { + err := kv.do(ctx) + if err == nil { + return nil + } + return err + } +} + +func (kv *kv_etcd5509) do(ctx context.Context) error { + err := kv.getRemote(ctx) + return err +} + +func (kv *kv_etcd5509) getRemote(ctx context.Context) error { + return kv.rc.acquire(ctx) +} + +type KV interface { + Get(ctx context.Context) error + Do(ctx context.Context) error +} + +func NewKV_etcd5509(c *Client_etcd5509) KV { + return &kv_etcd5509{rc: &remoteClient_etcd5509{ + client: c, + }} +} + +func Etcd5509() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + runtime.Gosched() // Yield twice. + prof.WriteTo(os.Stdout, 2) + }() + + go func() { + // deadlocks: 1 + ctx, _ := context.WithCancel(context.TODO()) + cli := &Client_etcd5509{ + ctx: ctx, + } + kv := NewKV_etcd5509(cli) + donec := make(chan struct{}) + go func() { + defer close(donec) + err := kv.Get(context.TODO()) + if err != nil && err != ErrConnClosed_etcd5509 { + io.Discard.Write([]byte("Expect ErrConnClosed")) + } + }() + + runtime.Gosched() + cli.Close() + + <-donec + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6708.go b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go new file mode 100644 index 00000000000000..16186fe9fc0a2c --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6708.go @@ -0,0 +1,94 @@ +package main + +import ( + "context" + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Etcd6708", Etcd6708) +} + +type EndpointSelectionMode_etcd6708 int + +const ( + EndpointSelectionRandom_etcd6708 EndpointSelectionMode_etcd6708 = iota + EndpointSelectionPrioritizeLeader_etcd6708 +) + +type MembersAPI_etcd6708 interface { + Leader(ctx context.Context) +} + +type Client_etcd6708 interface { + Sync(ctx context.Context) + SetEndpoints() + httpClient_etcd6708 +} + +type httpClient_etcd6708 interface { + Do(context.Context) +} + +type httpClusterClient_etcd6708 struct { + sync.RWMutex + selectionMode EndpointSelectionMode_etcd6708 +} + +func (c *httpClusterClient_etcd6708) getLeaderEndpoint() { + mAPI := NewMembersAPI_etcd6708(c) + mAPI.Leader(context.Background()) +} + +func (c *httpClusterClient_etcd6708) SetEndpoints() { + switch c.selectionMode { + case EndpointSelectionRandom_etcd6708: + case EndpointSelectionPrioritizeLeader_etcd6708: + c.getLeaderEndpoint() + } +} + +func (c *httpClusterClient_etcd6708) Do(ctx context.Context) { + c.RLock() + c.RUnlock() +} + +func (c *httpClusterClient_etcd6708) Sync(ctx context.Context) { + c.Lock() + defer c.Unlock() + + c.SetEndpoints() +} + +type httpMembersAPI_etcd6708 struct { + client httpClient_etcd6708 +} + +func (m *httpMembersAPI_etcd6708) Leader(ctx context.Context) { + m.client.Do(ctx) +} + +func NewMembersAPI_etcd6708(c Client_etcd6708) MembersAPI_etcd6708 { + return &httpMembersAPI_etcd6708{ + client: c, + } +} + +func Etcd6708() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + go func() { + // deadlocks: 1 + hc := &httpClusterClient_etcd6708{ + selectionMode: EndpointSelectionPrioritizeLeader_etcd6708, + } + hc.Sync(context.Background()) + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6857.go b/src/runtime/testdata/testgoroutineleakgc/etcd6857.go new file mode 100644 index 00000000000000..8c3af3ef192eaf --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6857.go @@ -0,0 +1,93 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/etcd-io/etcd/pull/6857 + * Buggy version: 7c8f13aed7fe251e7066ed6fc1a090699c2cae0e + * fix commit-id: 7afc490c95789c408fbc256d8e790273d331c984 + * Flaky: 19/100 + */ +package main + +import ( + "os" + "runtime/pprof" + "time" +) + +func init() { + register("Etcd6857", Etcd6857) +} + +type Status_etcd6857 struct{} + +type node_etcd6857 struct { + status chan chan Status_etcd6857 + stop chan struct{} + done chan struct{} +} + +func (n *node_etcd6857) Status() Status_etcd6857 { + c := make(chan Status_etcd6857) + n.status <- c + return <-c +} + +func (n *node_etcd6857) run() { + for { + select { + case c := <-n.status: + c <- Status_etcd6857{} + case <-n.stop: + close(n.done) + return + } + } +} + +func (n *node_etcd6857) Stop() { + select { + case n.stop <- struct{}{}: + case <-n.done: + return + } + <-n.done +} + +func NewNode_etcd6857() *node_etcd6857 { + return &node_etcd6857{ + status: make(chan chan Status_etcd6857), + stop: make(chan struct{}), + done: make(chan struct{}), + } +} + +/// +/// G1 G2 G3 +/// n.run() +/// n.Stop() +/// n.stop<- +/// <-n.stop +/// <-n.done +/// close(n.done) +/// return +/// return +/// n.Status() +/// n.status<- +///----------------G2 leak------------------- +/// + +func Etcd6857() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i <= 100; i++ { + go func() { + n := NewNode_etcd6857() + go n.run() // G1 + // deadlocks: x > 0 + go n.Status() // G2 + go n.Stop() // G3 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd6873.go b/src/runtime/testdata/testgoroutineleakgc/etcd6873.go new file mode 100644 index 00000000000000..a668219d40aea4 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd6873.go @@ -0,0 +1,94 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/etcd-io/etcd/commit/7618fdd1d642e47cac70c03f637b0fd798a53a6e + * Buggy version: 377f19b0031f9c0aafe2aec28b6f9019311f52f9 + * fix commit-id: 7618fdd1d642e47cac70c03f637b0fd798a53a6e + * Flaky: 9/100 + */ +package main + +import ( + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Etcd6873", Etcd6873) +} + +type watchBroadcast_etcd6873 struct{} + +type watchBroadcasts_etcd6873 struct { + mu sync.Mutex + updatec chan *watchBroadcast_etcd6873 + donec chan struct{} +} + +func newWatchBroadcasts_etcd6873() *watchBroadcasts_etcd6873 { + wbs := &watchBroadcasts_etcd6873{ + updatec: make(chan *watchBroadcast_etcd6873, 1), + donec: make(chan struct{}), + } + go func() { // G2 + defer close(wbs.donec) + // deadlocks: x > 0 + for wb := range wbs.updatec { + wbs.coalesce(wb) + } + }() + return wbs +} + +func (wbs *watchBroadcasts_etcd6873) coalesce(wb *watchBroadcast_etcd6873) { + wbs.mu.Lock() + wbs.mu.Unlock() +} + +func (wbs *watchBroadcasts_etcd6873) stop() { + wbs.mu.Lock() + defer wbs.mu.Unlock() + close(wbs.updatec) + <-wbs.donec +} + +func (wbs *watchBroadcasts_etcd6873) update(wb *watchBroadcast_etcd6873) { + select { + case wbs.updatec <- wb: + default: + } +} + +/// +/// G1 G2 G3 +/// newWatchBroadcasts() +/// wbs.update() +/// wbs.updatec <- +/// return +/// <-wbs.updatec +/// wbs.coalesce() +/// wbs.stop() +/// wbs.mu.Lock() +/// close(wbs.updatec) +/// <-wbs.donec +/// wbs.mu.Lock() +///---------------------G2,G3 deadlock------------------------- +/// + +func Etcd6873() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { + wbs := newWatchBroadcasts_etcd6873() // G1 + wbs.update(&watchBroadcast_etcd6873{}) + // deadlocks: x > 0 + go wbs.stop() // G3 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd7492.go b/src/runtime/testdata/testgoroutineleakgc/etcd7492.go new file mode 100644 index 00000000000000..fbda74c48359f7 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd7492.go @@ -0,0 +1,181 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/etcd-io/etcd/pull/7492 + * Buggy version: 51939650057d602bb5ab090633138fffe36854dc + * fix commit-id: 1b1fabef8ffec606909f01c3983300fff539f214 + * Flaky: 40/100 + */ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Etcd7492", Etcd7492) +} + +type TokenProvider_etcd7492 interface { + assign() + enable() + disable() +} + +type simpleTokenTTLKeeper_etcd7492 struct { + tokens map[string]time.Time + addSimpleTokenCh chan struct{} + stopCh chan chan struct{} + deleteTokenFunc func(string) +} + +type authStore_etcd7492 struct { + tokenProvider TokenProvider_etcd7492 +} + +func (as *authStore_etcd7492) Authenticate() { + as.tokenProvider.assign() +} + +func NewSimpleTokenTTLKeeper_etcd7492(deletefunc func(string)) *simpleTokenTTLKeeper_etcd7492 { + stk := &simpleTokenTTLKeeper_etcd7492{ + tokens: make(map[string]time.Time), + addSimpleTokenCh: make(chan struct{}, 1), + stopCh: make(chan chan struct{}), + deleteTokenFunc: deletefunc, + } + // deadlocks: x > 0 + go stk.run() // G1 + return stk +} + +func (tm *simpleTokenTTLKeeper_etcd7492) run() { + tokenTicker := time.NewTicker(time.Nanosecond) + defer tokenTicker.Stop() + for { + select { + case <-tm.addSimpleTokenCh: + runtime.Gosched() + /// Make tm.tokens not empty is enough + tm.tokens["1"] = time.Now() + case <-tokenTicker.C: + runtime.Gosched() + for t, _ := range tm.tokens { + tm.deleteTokenFunc(t) + delete(tm.tokens, t) + } + case waitCh := <-tm.stopCh: + waitCh <- struct{}{} + return + } + } +} + +func (tm *simpleTokenTTLKeeper_etcd7492) addSimpleToken() { + tm.addSimpleTokenCh <- struct{}{} + runtime.Gosched() +} + +func (tm *simpleTokenTTLKeeper_etcd7492) stop() { + waitCh := make(chan struct{}) + tm.stopCh <- waitCh + <-waitCh + close(tm.stopCh) +} + +type tokenSimple_etcd7492 struct { + simpleTokenKeeper *simpleTokenTTLKeeper_etcd7492 + simpleTokensMu sync.RWMutex +} + +func (t *tokenSimple_etcd7492) assign() { + t.assignSimpleTokenToUser() +} + +func (t *tokenSimple_etcd7492) assignSimpleTokenToUser() { + t.simpleTokensMu.Lock() + runtime.Gosched() + t.simpleTokenKeeper.addSimpleToken() + t.simpleTokensMu.Unlock() +} +func newDeleterFunc(t *tokenSimple_etcd7492) func(string) { + return func(tk string) { + t.simpleTokensMu.Lock() + defer t.simpleTokensMu.Unlock() + } +} + +func (t *tokenSimple_etcd7492) enable() { + t.simpleTokenKeeper = NewSimpleTokenTTLKeeper_etcd7492(newDeleterFunc(t)) +} + +func (t *tokenSimple_etcd7492) disable() { + if t.simpleTokenKeeper != nil { + t.simpleTokenKeeper.stop() + t.simpleTokenKeeper = nil + } + t.simpleTokensMu.Lock() + t.simpleTokensMu.Unlock() +} + +func newTokenProviderSimple_etcd7492() *tokenSimple_etcd7492 { + return &tokenSimple_etcd7492{} +} + +func setupAuthStore_etcd7492() (store *authStore_etcd7492, teardownfunc func()) { + as := &authStore_etcd7492{ + tokenProvider: newTokenProviderSimple_etcd7492(), + } + as.tokenProvider.enable() + tearDown := func() { + as.tokenProvider.disable() + } + return as, tearDown +} + +/// +/// G2 G1 +/// stk.run() +/// ts.assignSimpleTokenToUser() +/// t.simpleTokensMu.Lock() +/// t.simpleTokenKeeper.addSimpleToken() +/// tm.addSimpleTokenCh <- true +/// <-tm.addSimpleTokenCh +/// t.simpleTokensMu.Unlock() +/// ts.assignSimpleTokenToUser() +/// ... ... +/// t.simpleTokensMu.Lock() +/// <-tokenTicker.C +/// tm.addSimpleTokenCh <- true +/// tm.deleteTokenFunc() +/// t.simpleTokensMu.Lock() +///------------------------------------G1,G2 deadlock--------------------------------------------- +/// + +func Etcd7492() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + as, tearDown := setupAuthStore_etcd7492() + defer tearDown() + var wg sync.WaitGroup + wg.Add(3) + for i := 0; i < 3; i++ { + go func() { // G2 + // deadlocks: x > 0 + as.Authenticate() + defer wg.Done() + }() + } + wg.Wait() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/etcd7902.go b/src/runtime/testdata/testgoroutineleakgc/etcd7902.go new file mode 100644 index 00000000000000..4e8bbb2eb04df2 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/etcd7902.go @@ -0,0 +1,115 @@ +/* + * Project: etcd + * Issue or PR : https://github.com/coreos/etcd/pull/7902 + * Buggy version: dfdaf082c51ba14861267f632f6af795a27eb4ef + * fix commit-id: 87d99fe0387ee1df1cf1811d88d37331939ef4ae + * Flaky: 100/100 + * Description: + * At least two goroutines are needed to trigger this bug, + * one is leader and the other is follower. Both the leader + * and the follower execute the code above. If the follower + * acquires mu.Lock() firstly and enter rc.release(), it will + * be blocked at <- rcNextc (nextc). Only the leader can execute + * close(nextc) to unblock the follower inside rc.release(). + * However, in order to invoke rc.release(), the leader needs + * to acquires mu.Lock(). + * The fix is to remove the lock and unlock around rc.release(). + */ +package main + +import ( + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Etcd7902", Etcd7902) +} + +type roundClient_etcd7902 struct { + progress int + acquire func() + validate func() + release func() +} + +func runElectionFunc_etcd7902() { + // deadlocks: x > 0 + rcs := make([]roundClient_etcd7902, 3) + nextc := make(chan bool) + for i := range rcs { + var rcNextc chan bool + setRcNextc := func() { + rcNextc = nextc + } + rcs[i].acquire = func() {} + rcs[i].validate = func() { + setRcNextc() + } + rcs[i].release = func() { + if i == 0 { // Assume the first roundClient is the leader + close(nextc) + nextc = make(chan bool) + } + <-rcNextc // Follower is blocking here + } + } + doRounds_etcd7902(rcs, 100) +} + +func doRounds_etcd7902(rcs []roundClient_etcd7902, rounds int) { + var mu sync.Mutex + var wg sync.WaitGroup + wg.Add(len(rcs)) + for i := range rcs { + go func(rc *roundClient_etcd7902) { // G2,G3 + // deadlocks: x > 0 + defer wg.Done() + for rc.progress < rounds || rounds <= 0 { + rc.acquire() + mu.Lock() + rc.validate() + mu.Unlock() + time.Sleep(10 * time.Millisecond) + rc.progress++ + mu.Lock() + rc.release() + mu.Unlock() + } + }(&rcs[i]) + } + wg.Wait() +} + +/// +/// G1 G2 (leader) G3 (follower) +/// runElectionFunc() +/// doRounds() +/// wg.Wait() +/// ... +/// mu.Lock() +/// rc.validate() +/// rcNextc = nextc +/// mu.Unlock() ... +/// mu.Lock() +/// rc.validate() +/// mu.Unlock() +/// mu.Lock() +/// rc.release() +/// <-rcNextc +/// mu.Lock() +/// -------------------------G1,G2,G3 deadlock-------------------------- +/// + +func Etcd7902() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 100; i++ { + go runElectionFunc_etcd7902() // G1 + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1275.go b/src/runtime/testdata/testgoroutineleakgc/grpc1275.go new file mode 100644 index 00000000000000..155b4ef6b16244 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1275.go @@ -0,0 +1,118 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/1275 + * Buggy version: (missing) + * fix commit-id: 0669f3f89e0330e94bb13fa1ce8cc704aab50c9c + * Flaky: 100/100 + * Description: + * Two goroutines are invovled in this deadlock. The first goroutine + * is the main goroutine. It is blocked at case <- donec, and it is + * waiting for the second goroutine to close the channel. + * The second goroutine is created by the main goroutine. It is blocked + * when calling stream.Read(). stream.Read() invokes recvBufferRead.Read(). + * The second goroutine is blocked at case i := r.recv.get(), and it is + * waiting for someone to send a message to this channel. + * It is the client.CloseSream() method called by the main goroutine that + * should send the message, but it is not. The patch is to send out this message. + */ +package main + +import ( + "io" + "os" + "runtime/pprof" + "time" +) + +func init() { + register("Grpc1275", Grpc1275) +} + +type recvBuffer_grpc1275 struct { + c chan bool +} + +func (b *recvBuffer_grpc1275) get() <-chan bool { + return b.c +} + +type recvBufferReader_grpc1275 struct { + recv *recvBuffer_grpc1275 +} + +func (r *recvBufferReader_grpc1275) Read(p []byte) (int, error) { + select { + case <-r.recv.get(): + } + return 0, nil +} + +type Stream_grpc1275 struct { + trReader io.Reader +} + +func (s *Stream_grpc1275) Read(p []byte) (int, error) { + return io.ReadFull(s.trReader, p) +} + +type http2Client_grpc1275 struct{} + +func (t *http2Client_grpc1275) CloseStream(s *Stream_grpc1275) { + // It is the client.CloseSream() method called by the + // main goroutine that should send the message, but it + // is not. The patch is to send out this message. +} + +func (t *http2Client_grpc1275) NewStream() *Stream_grpc1275 { + return &Stream_grpc1275{ + trReader: &recvBufferReader_grpc1275{ + recv: &recvBuffer_grpc1275{ + c: make(chan bool), + }, + }, + } +} + +func testInflightStreamClosing_grpc1275() { + client := &http2Client_grpc1275{} + stream := client.NewStream() + donec := make(chan bool) + go func() { // G2 + defer close(donec) + // deadlocks: 1 + stream.Read([]byte{1}) + }() + + client.CloseStream(stream) + + timeout := time.NewTimer(300 * time.Nanosecond) + select { + case <-donec: + if !timeout.Stop() { + <-timeout.C + } + case <-timeout.C: + } +} + +/// +/// G1 G2 +/// testInflightStreamClosing() +/// stream.Read() +/// io.ReadFull() +/// <- r.recv.get() +/// CloseStream() +/// <- donec +/// ------------G1 timeout, G2 leak--------------------- +/// + +func Grpc1275() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + go func() { + testInflightStreamClosing_grpc1275() // G1 + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1424.go b/src/runtime/testdata/testgoroutineleakgc/grpc1424.go new file mode 100644 index 00000000000000..0ec8c8f8e7e41d --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1424.go @@ -0,0 +1,114 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/1424 + * Buggy version: 39c8c3866d926d95e11c03508bf83d00f2963f91 + * fix commit-id: 64bd0b04a7bb1982078bae6a2ab34c226125fbc1 + * Flaky: 100/100 + * Description: + * The parent function could return without draining the done channel. + */ +package main + +import ( + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Grpc1424", Grpc1424) +} + +type Balancer_grpc1424 interface { + Notify() <-chan bool +} + +type roundRobin_grpc1424 struct { + mu sync.Mutex + addrCh chan bool +} + +func (rr *roundRobin_grpc1424) Notify() <-chan bool { + return rr.addrCh +} + +type addrConn_grpc1424 struct { + mu sync.Mutex +} + +func (ac *addrConn_grpc1424) tearDown() { + ac.mu.Lock() + defer ac.mu.Unlock() +} + +type dialOption_grpc1424 struct { + balancer Balancer_grpc1424 +} + +type ClientConn_grpc1424 struct { + dopts dialOption_grpc1424 + conns []*addrConn_grpc1424 +} + +func (cc *ClientConn_grpc1424) lbWatcher(doneChan chan bool) { + for addr := range cc.dopts.balancer.Notify() { + if addr { + // nop, make compiler happy + } + var ( + del []*addrConn_grpc1424 + ) + for _, a := range cc.conns { + del = append(del, a) + } + for _, c := range del { + c.tearDown() + } + } +} + +func NewClientConn_grpc1424() *ClientConn_grpc1424 { + cc := &ClientConn_grpc1424{ + dopts: dialOption_grpc1424{ + &roundRobin_grpc1424{addrCh: make(chan bool)}, + }, + } + return cc +} + +func DialContext_grpc1424() { + cc := NewClientConn_grpc1424() + waitC := make(chan error, 1) + go func() { // G2 + defer close(waitC) + // deadlocks: 1 + ch := cc.dopts.balancer.Notify() + if ch != nil { + doneChan := make(chan bool) + go cc.lbWatcher(doneChan) // G3 + <-doneChan + } + }() + /// close addrCh + close(cc.dopts.balancer.(*roundRobin_grpc1424).addrCh) +} + +/// +/// G1 G2 G3 +/// DialContext() +/// cc.dopts.balancer.Notify() +/// cc.lbWatcher() +/// <-doneChan +/// close() +/// -----------------------G2 leak------------------------------------ +/// + +func Grpc1424() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + go DialContext_grpc1424() // G1 +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc1460.go b/src/runtime/testdata/testgoroutineleakgc/grpc1460.go new file mode 100644 index 00000000000000..d55db220064baa --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc1460.go @@ -0,0 +1,89 @@ +/* + * Project: grpc + * Issue or PR : https://github.com/grpc/grpc-go/pull/1460 + * Buggy version: 7db1564ba1229bc42919bb1f6d9c4186f3aa8678 + * fix commit-id: e605a1ecf24b634f94f4eefdab10a9ada98b70dd + * Flaky: 100/100 + * Description: + * When gRPC keepalives are enabled (which isn't the case + * by default at this time) and PermitWithoutStream is false + * (the default), the client can deadlock when transitioning + * between having no active stream and having one active + * stream.The keepalive() goroutine is stuck at “<-t.awakenKeepalive”, + * while the main goroutine is stuck in NewStream() on t.mu.Lock(). + */ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Grpc1460", Grpc1460) +} + +type Stream_grpc1460 struct{} + +type http2Client_grpc1460 struct { + mu sync.Mutex + awakenKeepalive chan struct{} + activeStream []*Stream_grpc1460 +} + +func (t *http2Client_grpc1460) keepalive() { + t.mu.Lock() + if len(t.activeStream) < 1 { + <-t.awakenKeepalive + runtime.Gosched() + t.mu.Unlock() + } else { + t.mu.Unlock() + } +} + +func (t *http2Client_grpc1460) NewStream() { + t.mu.Lock() + runtime.Gosched() + t.activeStream = append(t.activeStream, &Stream_grpc1460{}) + if len(t.activeStream) == 1 { + select { + case t.awakenKeepalive <- struct{}{}: + default: + } + } + t.mu.Unlock() +} + +/// +/// G1 G2 +/// client.keepalive() +/// client.NewStream() +/// t.mu.Lock() +/// <-t.awakenKeepalive +/// t.mu.Lock() +/// ---------------G1, G2 deadlock-------------- +/// + +func Grpc1460() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 1000; i++ { + go func() { + client := &http2Client_grpc1460{ + awakenKeepalive: make(chan struct{}), + } + // deadlocks: x > 0 + go client.keepalive() //G1 + // deadlocks: x > 0 + go client.NewStream() //G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc3017.go b/src/runtime/testdata/testgoroutineleakgc/grpc3017.go new file mode 100644 index 00000000000000..ba0c7f4b7480cb --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc3017.go @@ -0,0 +1,150 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +// This test case is a reproduction of grpc/3017. +// +// It is a goroutine leak that also simultaneously engages many GC assists. +// Testing runtime behaviour when pivoting between regular and goroutine leak detection modes. + +func init() { + register("Grpc3017", Grpc3017) +} + +type Address_grpc3017 int +type SubConn_grpc3017 int + +type subConnCacheEntry_grpc3017 struct { + sc SubConn_grpc3017 + cancel func() + abortDeleting bool +} + +type lbCacheClientConn_grpc3017 struct { + mu sync.Mutex // L1 + timeout time.Duration + subConnCache map[Address_grpc3017]*subConnCacheEntry_grpc3017 + subConnToAddr map[SubConn_grpc3017]Address_grpc3017 +} + +func (ccc *lbCacheClientConn_grpc3017) NewSubConn(addrs []Address_grpc3017) SubConn_grpc3017 { + if len(addrs) != 1 { + return SubConn_grpc3017(1) + } + addrWithoutMD := addrs[0] + ccc.mu.Lock() // L1 + defer ccc.mu.Unlock() + if entry, ok := ccc.subConnCache[addrWithoutMD]; ok { + entry.cancel() + delete(ccc.subConnCache, addrWithoutMD) + return entry.sc + } + scNew := SubConn_grpc3017(1) + ccc.subConnToAddr[scNew] = addrWithoutMD + return scNew +} + +func (ccc *lbCacheClientConn_grpc3017) RemoveSubConn(sc SubConn_grpc3017) { + ccc.mu.Lock() // L1 + defer ccc.mu.Unlock() + addr, ok := ccc.subConnToAddr[sc] + if !ok { + return + } + + if entry, ok := ccc.subConnCache[addr]; ok { + if entry.sc != sc { + delete(ccc.subConnToAddr, sc) + } + return + } + + entry := &subConnCacheEntry_grpc3017{ + sc: sc, + } + ccc.subConnCache[addr] = entry + + timer := time.AfterFunc(ccc.timeout, func() { // G3 + runtime.Gosched() + ccc.mu.Lock() // L1 + // deadlocks: x > 0 + if entry.abortDeleting { + return // Missing unlock + } + delete(ccc.subConnToAddr, sc) + delete(ccc.subConnCache, addr) + ccc.mu.Unlock() + }) + + entry.cancel = func() { + if !timer.Stop() { + entry.abortDeleting = true + } + } +} + +func Grpc3017() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { //G1 + done := make(chan struct{}) + + // deadlocks: x > 0 + ccc := &lbCacheClientConn_grpc3017{ + timeout: time.Nanosecond, + subConnCache: make(map[Address_grpc3017]*subConnCacheEntry_grpc3017), + subConnToAddr: make(map[SubConn_grpc3017]Address_grpc3017), + } + + sc := ccc.NewSubConn([]Address_grpc3017{Address_grpc3017(1)}) + go func() { // G2 + // deadlocks: x > 0 + for i := 0; i < 10000; i++ { + ccc.RemoveSubConn(sc) + sc = ccc.NewSubConn([]Address_grpc3017{Address_grpc3017(1)}) + } + close(done) + }() + <-done + }() + } +} + +// Example of a deadlocking trace +// +// G1 G2 G3 +// ------------------------------------------------------------------------------------------------ +// NewSubConn([1]) +// ccc.mu.Lock() [L1] +// sc = 1 +// ccc.subConnToAddr[1] = 1 +// go func() [G2] +// <-done . +// . ccc.RemoveSubConn(1) +// . ccc.mu.Lock() +// . addr = 1 +// . entry = &subConnCacheEntry_grpc3017{sc: 1} +// . cc.subConnCache[1] = entry +// . timer = time.AfterFunc() [G3] +// . entry.cancel = func() . +// . sc = ccc.NewSubConn([1]) . +// . ccc.mu.Lock() [L1] . +// . entry.cancel() . +// . !timer.Stop() [true] . +// . entry.abortDeleting = true . +// . . ccc.mu.Lock() +// . . <<>> +// . ccc.RemoveSubConn(1) +// . ccc.mu.Lock() [L1] +// -------------------------------------------G1, G2 leak----------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc660.go b/src/runtime/testdata/testgoroutineleakgc/grpc660.go new file mode 100644 index 00000000000000..9f458904ee6479 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc660.go @@ -0,0 +1,71 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/660 + * Buggy version: db85417dd0de6cc6f583672c6175a7237e5b5dd2 + * fix commit-id: ceacfbcbc1514e4e677932fd55938ac455d182fb + * Flaky: 100/100 + * Description: + * The parent function could return without draining the done channel. + */ +package main + +import ( + "math/rand" + "os" + "runtime" + "runtime/pprof" +) + +func init() { + register("Grpc660", Grpc660) +} + +type benchmarkClient_grpc660 struct { + stop chan bool +} + +func (bc *benchmarkClient_grpc660) doCloseLoopUnary() { + for { + done := make(chan bool) + go func() { // G2 + // deadlocks: 1 + if rand.Intn(10) > 7 { + done <- false + return + } + done <- true + }() + select { + case <-bc.stop: + return + case <-done: + } + } +} + +/// +/// G1 G2 helper goroutine +/// doCloseLoopUnary() +/// bc.stop <- true +/// <-bc.stop +/// return +/// done <- +/// ----------------------G2 leak-------------------------- +/// + +func Grpc660() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + bc := &benchmarkClient_grpc660{ + stop: make(chan bool), + } + go bc.doCloseLoopUnary() // G1 + go func() { // helper goroutine + bc.stop <- true + }() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc795.go b/src/runtime/testdata/testgoroutineleakgc/grpc795.go new file mode 100644 index 00000000000000..1208f4ff875248 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc795.go @@ -0,0 +1,72 @@ +package main + +import ( + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Grpc795", Grpc795) +} + +type Server_grpc795 struct { + mu sync.Mutex + drain bool +} + +func (s *Server_grpc795) GracefulStop() { + s.mu.Lock() + if s.drain { + s.mu.Lock() + return + } + s.drain = true + s.mu.Unlock() +} +func (s *Server_grpc795) Serve() { + s.mu.Lock() + s.mu.Unlock() +} + +func NewServer_grpc795() *Server_grpc795 { + return &Server_grpc795{} +} + +type test_grpc795 struct { + srv *Server_grpc795 +} + +func (te *test_grpc795) startServer() { + s := NewServer_grpc795() + te.srv = s + // deadlocks: x > 0 + go s.Serve() +} + +func newTest_grpc795() *test_grpc795 { + return &test_grpc795{} +} + +func testServerGracefulStopIdempotent_grpc795() { + // deadlocks: x > 0 + te := newTest_grpc795() + + te.startServer() + + for i := 0; i < 3; i++ { + te.srv.GracefulStop() + } +} + +func Grpc795() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 100; i++ { + go testServerGracefulStopIdempotent_grpc795() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/grpc862.go b/src/runtime/testdata/testgoroutineleakgc/grpc862.go new file mode 100644 index 00000000000000..69c79e2f6d8fba --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/grpc862.go @@ -0,0 +1,112 @@ +/* + * Project: grpc-go + * Issue or PR : https://github.com/grpc/grpc-go/pull/862 + * Buggy version: d8f4ebe77f6b7b6403d7f98626de8a534f9b93a7 + * fix commit-id: dd5645bebff44f6b88780bb949022a09eadd7dae + * Flaky: 100/100 + * Description: + * When return value conn is nil, cc (ClientConn) is not closed. + * The goroutine executing resetAddrConn is leaked. The patch is to + * close ClientConn in the defer func(). + */ +package main + +import ( + "context" + "os" + "runtime" + "runtime/pprof" + "time" +) + +func init() { + register("Grpc862", Grpc862) +} + +type ClientConn_grpc862 struct { + ctx context.Context + cancel context.CancelFunc + conns []*addrConn_grpc862 +} + +func (cc *ClientConn_grpc862) Close() { + cc.cancel() + conns := cc.conns + cc.conns = nil + for _, ac := range conns { + ac.tearDown() + } +} + +func (cc *ClientConn_grpc862) resetAddrConn() { + ac := &addrConn_grpc862{ + cc: cc, + } + cc.conns = append(cc.conns, ac) + ac.ctx, ac.cancel = context.WithCancel(cc.ctx) + ac.resetTransport() +} + +type addrConn_grpc862 struct { + cc *ClientConn_grpc862 + ctx context.Context + cancel context.CancelFunc +} + +func (ac *addrConn_grpc862) resetTransport() { + for retries := 1; ; retries++ { + _ = 2 * time.Nanosecond * time.Duration(retries) + timeout := 10 * time.Nanosecond + _, cancel := context.WithTimeout(ac.ctx, timeout) + _ = time.Now() + cancel() + <-ac.ctx.Done() + return + } +} + +func (ac *addrConn_grpc862) tearDown() { + ac.cancel() +} + +func DialContext_grpc862(ctx context.Context) (conn *ClientConn_grpc862) { + cc := &ClientConn_grpc862{} + cc.ctx, cc.cancel = context.WithCancel(context.Background()) + defer func() { + select { + case <-ctx.Done(): + if conn != nil { + conn.Close() + } + conn = nil + default: + } + }() + go func() { // G2 + // deadlocks: 1 + cc.resetAddrConn() + }() + return conn +} + +/// +/// G1 G2 +/// DialContext() +/// cc.resetAddrConn() +/// resetTransport() +/// <-ac.ctx.Done() +/// --------------G2 leak------------------ +/// + +func Grpc862() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + ctx, cancel := context.WithCancel(context.Background()) + go DialContext_grpc862(ctx) // G1 + go cancel() // helper goroutine + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/hugo3251.go b/src/runtime/testdata/testgoroutineleakgc/hugo3251.go new file mode 100644 index 00000000000000..de19aa10069480 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/hugo3251.go @@ -0,0 +1,102 @@ +package main + +import ( + "fmt" + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Hugo3251", Hugo3251) +} + +type remoteLock_hugo3251 struct { + sync.RWMutex // L1 + m map[string]*sync.Mutex // L2 +} + +func (l *remoteLock_hugo3251) URLLock(url string) { + l.Lock() // L1 + if _, ok := l.m[url]; !ok { + l.m[url] = &sync.Mutex{} + } + l.m[url].Lock() // L2 + runtime.Gosched() + l.Unlock() // L1 + // runtime.Gosched() +} + +func (l *remoteLock_hugo3251) URLUnlock(url string) { + l.RLock() // L1 + defer l.RUnlock() // L1 + if um, ok := l.m[url]; ok { + um.Unlock() // L2 + } +} + +func resGetRemote_hugo3251(remoteURLLock *remoteLock_hugo3251, url string) error { + remoteURLLock.URLLock(url) + defer func() { remoteURLLock.URLUnlock(url) }() + + return nil +} + +func Hugo3251() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(time.Second) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 10; i++ { + go func() { // G1 + // deadlocks: x > 0 + url := "http://Foo.Bar/foo_Bar-Foo" + remoteURLLock := &remoteLock_hugo3251{m: make(map[string]*sync.Mutex)} + for range []bool{false, true} { + var wg sync.WaitGroup + for i := 0; i < 100; i++ { + wg.Add(1) + go func(gor int) { // G2 + // deadlocks: x > 0 + defer wg.Done() + for j := 0; j < 200; j++ { + err := resGetRemote_hugo3251(remoteURLLock, url) + if err != nil { + fmt.Errorf("Error getting resource content: %s", err) + } + time.Sleep(300 * time.Nanosecond) + } + }(i) + } + wg.Wait() + } + }() + } +} + +// Example of deadlocking trace: +// +// G1 G2 G3 +// ------------------------------------------------------------------------------------------------ +// wg.Add(1) [W1: 1] +// go func() [G2] +// go func() [G3] +// . resGetRemote() +// . remoteURLLock.URLLock(url) +// . l.Lock() [L1] +// . l.m[url] = &sync.Mutex{} [L2] +// . l.m[url].Lock() [L2] +// . l.Unlock() [L1] +// . . resGetRemote() +// . . remoteURLLock.URLLock(url) +// . . l.Lock() [L1] +// . . l.m[url].Lock() [L2] +// . remoteURLLock.URLUnlock(url) +// . l.RLock() [L1] +// ... +// wg.Wait() [W1] +// ----------------------------------------G1,G2,G3 leak------------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/hugo5379.go b/src/runtime/testdata/testgoroutineleakgc/hugo5379.go new file mode 100644 index 00000000000000..cee52801319d04 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/hugo5379.go @@ -0,0 +1,315 @@ +package main + +import ( + "context" + "runtime" + + "log" + "sync" + "time" +) + +func init() { + register("Hugo5379", Hugo5379) +} + +type shortcodeHandler_hugo5379 struct { + p *PageWithoutContent_hugo5379 + contentShortcodes map[int]func() error + contentShortcodesDelta map[int]func() error + init sync.Once // O1 +} + +func (s *shortcodeHandler_hugo5379) executeShortcodesForDelta(p *PageWithoutContent_hugo5379) error { + for k, _ := range s.contentShortcodesDelta { + render := s.contentShortcodesDelta[k] + if err := render(); err != nil { + continue + } + } + return nil +} + +func (s *shortcodeHandler_hugo5379) updateDelta() { + s.init.Do(func() { + s.contentShortcodes = createShortcodeRenderers_hugo5379(s.p.withoutContent()) + }) + + delta := make(map[int]func() error) + + for k, v := range s.contentShortcodes { + if _, ok := delta[k]; !ok { + delta[k] = v + } + } + + s.contentShortcodesDelta = delta +} + +type Page_hugo5379 struct { + *pageInit_hugo5379 + *pageContentInit_hugo5379 + pageWithoutContent *PageWithoutContent_hugo5379 + contentInit sync.Once // O2 + contentInitMu sync.Mutex // L1 + shortcodeState *shortcodeHandler_hugo5379 +} + +func (p *Page_hugo5379) WordCount() { + p.initContentPlainAndMeta() +} + +func (p *Page_hugo5379) initContentPlainAndMeta() { + p.initContent() + p.initPlain(true) +} + +func (p *Page_hugo5379) initPlain(lock bool) { + p.plainInit.Do(func() { + if lock { + /// Double locking here. + p.contentInitMu.Lock() + defer p.contentInitMu.Unlock() + } + }) +} + +func (p *Page_hugo5379) withoutContent() *PageWithoutContent_hugo5379 { + p.pageInit_hugo5379.withoutContentInit.Do(func() { + p.pageWithoutContent = &PageWithoutContent_hugo5379{Page_hugo5379: p} + }) + return p.pageWithoutContent +} + +func (p *Page_hugo5379) prepareForRender() error { + var err error + if err = handleShortcodes_hugo5379(p.withoutContent()); err != nil { + return err + } + return nil +} + +func (p *Page_hugo5379) setContentInit() { + p.shortcodeState.updateDelta() +} + +func (p *Page_hugo5379) initContent() { + p.contentInit.Do(func() { + ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) + defer cancel() + c := make(chan error, 1) + + go func() { // G2 + // deadlocks: x > 0 + var err error + p.contentInitMu.Lock() // first lock here + defer p.contentInitMu.Unlock() + + err = p.prepareForRender() + if err != nil { + c <- err + return + } + c <- err + }() + + select { + case <-ctx.Done(): + case <-c: + } + }) +} + +type PageWithoutContent_hugo5379 struct { + *Page_hugo5379 +} + +type pageInit_hugo5379 struct { + withoutContentInit sync.Once +} + +type pageContentInit_hugo5379 struct { + contentInit sync.Once // O3 + plainInit sync.Once // O4 +} + +type HugoSites_hugo5379 struct { + Sites []*Site_hugo5379 +} + +func (h *HugoSites_hugo5379) render() { + for _, s := range h.Sites { + for _, s2 := range h.Sites { + s2.preparePagesForRender() + } + s.renderPages() + } +} + +func (h *HugoSites_hugo5379) Build() { + h.render() +} + +type Pages_hugo5379 []*Page_hugo5379 + +type PageCollections_hugo5379 struct { + Pages Pages_hugo5379 +} + +type Site_hugo5379 struct { + *PageCollections_hugo5379 +} + +func (s *Site_hugo5379) preparePagesForRender() { + for _, p := range s.Pages { + p.setContentInit() + } +} + +func (s *Site_hugo5379) renderForLayouts() { + /// Omit reflections + for _, p := range s.Pages { + p.WordCount() + } +} + +func (s *Site_hugo5379) renderAndWritePage() { + s.renderForLayouts() +} + +func (s *Site_hugo5379) renderPages() { + numWorkers := 2 + wg := &sync.WaitGroup{} + + for i := 0; i < numWorkers; i++ { + wg.Add(1) + // deadlocks: x > 0 + go pageRenderer_hugo5379(s, wg) // G3 + } + + wg.Wait() +} + +type sitesBuilder_hugo5379 struct { + H *HugoSites_hugo5379 +} + +func (s *sitesBuilder_hugo5379) Build() *sitesBuilder_hugo5379 { + return s.build() +} + +func (s *sitesBuilder_hugo5379) build() *sitesBuilder_hugo5379 { + s.H.Build() + return s +} + +func (s *sitesBuilder_hugo5379) CreateSitesE() error { + sites, err := NewHugoSites_hugo5379() + if err != nil { + return err + } + s.H = sites + return nil +} + +func (s *sitesBuilder_hugo5379) CreateSites() *sitesBuilder_hugo5379 { + if err := s.CreateSitesE(); err != nil { + log.Fatalf("Failed to create sites: %s", err) + } + return s +} + +func newHugoSites_hugo5379(sites ...*Site_hugo5379) (*HugoSites_hugo5379, error) { + h := &HugoSites_hugo5379{Sites: sites} + return h, nil +} + +func newSite_hugo5379() *Site_hugo5379 { + c := &PageCollections_hugo5379{} + s := &Site_hugo5379{ + PageCollections_hugo5379: c, + } + return s +} + +func createSitesFromConfig_hugo5379() []*Site_hugo5379 { + var ( + sites []*Site_hugo5379 + ) + + var s *Site_hugo5379 = newSite_hugo5379() + sites = append(sites, s) + return sites +} + +func NewHugoSites_hugo5379() (*HugoSites_hugo5379, error) { + sites := createSitesFromConfig_hugo5379() + return newHugoSites_hugo5379(sites...) +} + +func prepareShortcodeForPage_hugo5379(p *PageWithoutContent_hugo5379) map[int]func() error { + m := make(map[int]func() error) + m[0] = func() error { + return renderShortcode_hugo5379(p) + } + return m +} + +func renderShortcode_hugo5379(p *PageWithoutContent_hugo5379) error { + return renderShortcodeWithPage_hugo5379(p) +} + +func renderShortcodeWithPage_hugo5379(p *PageWithoutContent_hugo5379) error { + /// Omit reflections + p.WordCount() + return nil +} + +func createShortcodeRenderers_hugo5379(p *PageWithoutContent_hugo5379) map[int]func() error { + return prepareShortcodeForPage_hugo5379(p) +} + +func newShortcodeHandler_hugo5379(p *Page_hugo5379) *shortcodeHandler_hugo5379 { + return &shortcodeHandler_hugo5379{ + p: p.withoutContent(), + contentShortcodes: make(map[int]func() error), + contentShortcodesDelta: make(map[int]func() error), + } +} + +func handleShortcodes_hugo5379(p *PageWithoutContent_hugo5379) error { + return p.shortcodeState.executeShortcodesForDelta(p) +} + +func pageRenderer_hugo5379(s *Site_hugo5379, wg *sync.WaitGroup) { + defer wg.Done() + s.renderAndWritePage() +} + +func Hugo5379() { + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.GC() + }() + + for i := 0; i < 100; i++ { + go func() { // G1 + // deadlocks: x > 0 + b := &sitesBuilder_hugo5379{} + s := b.CreateSites() + for _, site := range s.H.Sites { + p := &Page_hugo5379{ + pageInit_hugo5379: &pageInit_hugo5379{}, + pageContentInit_hugo5379: &pageContentInit_hugo5379{}, + pageWithoutContent: &PageWithoutContent_hugo5379{}, + contentInit: sync.Once{}, + contentInitMu: sync.Mutex{}, + shortcodeState: nil, + } + p.shortcodeState = newShortcodeHandler_hugo5379(p) + site.Pages = append(site.Pages, p) + } + s.Build() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/istio16224.go b/src/runtime/testdata/testgoroutineleakgc/istio16224.go new file mode 100644 index 00000000000000..20658186d68022 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/istio16224.go @@ -0,0 +1,127 @@ +package main + +import ( + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Istio16224", Istio16224) +} + +type ConfigStoreCache_istio16224 interface { + RegisterEventHandler(handler func()) + Run() +} + +type Event_istio16224 int + +type Handler_istio16224 func(Event_istio16224) + +type configstoreMonitor_istio16224 struct { + handlers []Handler_istio16224 + eventCh chan Event_istio16224 +} + +func (m *configstoreMonitor_istio16224) Run(stop <-chan struct{}) { + for { + select { + case <-stop: + // This bug is not descibed, but is a true positive (in our eyes) + // In a real run main exits when the goro is blocked here. + if _, ok := <-m.eventCh; ok { + close(m.eventCh) + } + return + case ce, ok := <-m.eventCh: + if ok { + m.processConfigEvent(ce) + } + } + } +} + +func (m *configstoreMonitor_istio16224) processConfigEvent(ce Event_istio16224) { + m.applyHandlers(ce) +} + +func (m *configstoreMonitor_istio16224) AppendEventHandler(h Handler_istio16224) { + m.handlers = append(m.handlers, h) +} + +func (m *configstoreMonitor_istio16224) applyHandlers(e Event_istio16224) { + for _, f := range m.handlers { + f(e) + } +} +func (m *configstoreMonitor_istio16224) ScheduleProcessEvent(configEvent Event_istio16224) { + m.eventCh <- configEvent +} + +type Monitor_istio16224 interface { + Run(<-chan struct{}) + AppendEventHandler(Handler_istio16224) + ScheduleProcessEvent(Event_istio16224) +} + +type controller_istio16224 struct { + monitor Monitor_istio16224 +} + +func (c *controller_istio16224) RegisterEventHandler(f func(Event_istio16224)) { + c.monitor.AppendEventHandler(f) +} + +func (c *controller_istio16224) Run(stop <-chan struct{}) { + c.monitor.Run(stop) +} + +func (c *controller_istio16224) Create() { + c.monitor.ScheduleProcessEvent(Event_istio16224(0)) +} + +func NewMonitor_istio16224() Monitor_istio16224 { + return NewBufferedMonitor_istio16224() +} + +func NewBufferedMonitor_istio16224() Monitor_istio16224 { + return &configstoreMonitor_istio16224{ + eventCh: make(chan Event_istio16224), + } +} + +func Istio16224() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { + // deadlocks: x > 0 + controller := &controller_istio16224{monitor: NewMonitor_istio16224()} + done := make(chan bool) + lock := sync.Mutex{} + controller.RegisterEventHandler(func(event Event_istio16224) { + lock.Lock() + defer lock.Unlock() + done <- true + }) + + stop := make(chan struct{}) + // deadlocks: x > 0 + go controller.Run(stop) + + controller.Create() + + lock.Lock() // blocks + lock.Unlock() + <-done + + close(stop) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/istio17860.go b/src/runtime/testdata/testgoroutineleakgc/istio17860.go new file mode 100644 index 00000000000000..7163603ff831ff --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/istio17860.go @@ -0,0 +1,141 @@ +package main + +import ( + "context" + "os" + "runtime/pprof" + + "sync" + "time" +) + +func init() { + register("Istio17860", Istio17860) +} + +type Proxy_istio17860 interface { + IsLive() bool +} + +type TestProxy_istio17860 struct { + live func() bool +} + +func (tp TestProxy_istio17860) IsLive() bool { + if tp.live == nil { + return true + } + return tp.live() +} + +type Agent_istio17860 interface { + Run(ctx context.Context) + Restart() +} + +type exitStatus_istio17860 int + +type agent_istio17860 struct { + proxy Proxy_istio17860 + mu *sync.Mutex + statusCh chan exitStatus_istio17860 + currentEpoch int + activeEpochs map[int]struct{} +} + +func (a *agent_istio17860) Run(ctx context.Context) { + for { + select { + case status := <-a.statusCh: + a.mu.Lock() + delete(a.activeEpochs, int(status)) + active := len(a.activeEpochs) + a.mu.Unlock() + if active == 0 { + return + } + case <-ctx.Done(): + return + } + } +} + +func (a *agent_istio17860) Restart() { + a.mu.Lock() + defer a.mu.Unlock() + + a.waitUntilLive() + a.currentEpoch++ + a.activeEpochs[a.currentEpoch] = struct{}{} + + // deadlocks: x > 0 + go a.runWait(a.currentEpoch) +} + +func (a *agent_istio17860) runWait(epoch int) { + a.statusCh <- exitStatus_istio17860(epoch) +} + +func (a *agent_istio17860) waitUntilLive() { + if len(a.activeEpochs) == 0 { + return + } + + interval := time.NewTicker(30 * time.Nanosecond) + timer := time.NewTimer(100 * time.Nanosecond) + defer func() { + interval.Stop() + timer.Stop() + }() + + if a.proxy.IsLive() { + return + } + + for { + select { + case <-timer.C: + return + case <-interval.C: + if a.proxy.IsLive() { + return + } + } + } +} + +func NewAgent_istio17860(proxy Proxy_istio17860) Agent_istio17860 { + return &agent_istio17860{ + proxy: proxy, + mu: &sync.Mutex{}, + statusCh: make(chan exitStatus_istio17860), + activeEpochs: make(map[int]struct{}), + } +} + +func Istio17860() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + neverLive := func() bool { + return false + } + + a := NewAgent_istio17860(TestProxy_istio17860{live: neverLive}) + go func() { a.Run(ctx) }() + + a.Restart() + go a.Restart() + + time.Sleep(200 * time.Nanosecond) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/istio18454.go b/src/runtime/testdata/testgoroutineleakgc/istio18454.go new file mode 100644 index 00000000000000..680ff85d61152d --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/istio18454.go @@ -0,0 +1,151 @@ +package main + +import ( + "context" + "os" + "runtime/pprof" + + "sync" + "time" +) + +func init() { + register("Istio18454", Istio18454) +} + +const eventChCap_istio18454 = 1024 + +type Worker_istio18454 struct { + ctx context.Context + ctxCancel context.CancelFunc +} + +func (w *Worker_istio18454) Start(setupFn func(), runFn func(c context.Context)) { + if setupFn != nil { + setupFn() + } + go func() { + // deadlocks: x > 0 + runFn(w.ctx) + }() +} + +func (w *Worker_istio18454) Stop() { + w.ctxCancel() +} + +type Strategy_istio18454 struct { + timer *time.Timer + timerFrequency time.Duration + stateLock sync.Mutex + resetChan chan struct{} + worker *Worker_istio18454 + startTimerFn func() +} + +func (s *Strategy_istio18454) OnChange() { + s.stateLock.Lock() + if s.timer != nil { + s.stateLock.Unlock() + s.resetChan <- struct{}{} + return + } + s.startTimerFn() + s.stateLock.Unlock() +} + +func (s *Strategy_istio18454) startTimer() { + s.timer = time.NewTimer(s.timerFrequency) + eventLoop := func(ctx context.Context) { + for { + select { + case <-s.timer.C: + case <-s.resetChan: + if !s.timer.Stop() { + <-s.timer.C + } + s.timer.Reset(s.timerFrequency) + case <-ctx.Done(): + s.timer.Stop() + return + } + } + } + s.worker.Start(nil, eventLoop) +} + +func (s *Strategy_istio18454) Close() { + s.worker.Stop() +} + +type Event_istio18454 int + +type Processor_istio18454 struct { + stateStrategy *Strategy_istio18454 + worker *Worker_istio18454 + eventCh chan Event_istio18454 +} + +func (p *Processor_istio18454) processEvent() { + p.stateStrategy.OnChange() +} + +func (p *Processor_istio18454) Start() { + setupFn := func() { + for i := 0; i < eventChCap_istio18454; i++ { + p.eventCh <- Event_istio18454(0) + } + } + runFn := func(ctx context.Context) { + defer func() { + p.stateStrategy.Close() + }() + for { + select { + case <-ctx.Done(): + return + case <-p.eventCh: + p.processEvent() + } + } + } + p.worker.Start(setupFn, runFn) +} + +func (p *Processor_istio18454) Stop() { + p.worker.Stop() +} + +func NewWorker_istio18454() *Worker_istio18454 { + worker := &Worker_istio18454{} + worker.ctx, worker.ctxCancel = context.WithCancel(context.Background()) + return worker +} + +func Istio18454() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { + stateStrategy := &Strategy_istio18454{ + timerFrequency: time.Nanosecond, + resetChan: make(chan struct{}, 1), + worker: NewWorker_istio18454(), + } + stateStrategy.startTimerFn = stateStrategy.startTimer + + p := &Processor_istio18454{ + stateStrategy: stateStrategy, + worker: NewWorker_istio18454(), + eventCh: make(chan Event_istio18454, eventChCap_istio18454), + } + + p.Start() + defer p.Stop() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go new file mode 100644 index 00000000000000..d83cb60c289dc2 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes10182.go @@ -0,0 +1,100 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/10182 + * Buggy version: 4b990d128a17eea9058d28a3b3688ab8abafbd94 + * fix commit-id: 64ad3e17ad15cd0f9a4fd86706eec1c572033254 + * Flaky: 15/100 + * Description: + * This is a lock-channel bug. goroutine 1 is blocked on a lock + * held by goroutine 3, while goroutine 3 is blocked on sending + * message to ch, which is read by goroutine 1. + */ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Kubernetes10182", Kubernetes10182) +} + +type statusManager_kubernetes10182 struct { + podStatusesLock sync.RWMutex + podStatusChannel chan bool +} + +func (s *statusManager_kubernetes10182) Start() { + go func() { + // deadlocks: x > 0 + for i := 0; i < 2; i++ { + s.syncBatch() + } + }() +} + +func (s *statusManager_kubernetes10182) syncBatch() { + runtime.Gosched() + <-s.podStatusChannel + s.DeletePodStatus() +} + +func (s *statusManager_kubernetes10182) DeletePodStatus() { + s.podStatusesLock.Lock() + defer s.podStatusesLock.Unlock() +} + +func (s *statusManager_kubernetes10182) SetPodStatus() { + s.podStatusesLock.Lock() + defer s.podStatusesLock.Unlock() + s.podStatusChannel <- true +} + +func NewStatusManager_kubernetes10182() *statusManager_kubernetes10182 { + return &statusManager_kubernetes10182{ + podStatusChannel: make(chan bool), + } +} + +// Example of deadlock trace: +// +// G1 G2 G3 +// -------------------------------------------------------------------------------- +// s.Start() +// s.syncBatch() +// s.SetPodStatus() +// <-s.podStatusChannel +// s.podStatusesLock.Lock() +// s.podStatusChannel <- true +// s.podStatusesLock.Unlock() +// return +// s.DeletePodStatus() +// s.podStatusesLock.Lock() +// s.podStatusChannel <- true +// s.podStatusesLock.Lock() +// -----------------------------------G1,G3 leak------------------------------------- + +func Kubernetes10182() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: 0 + s := NewStatusManager_kubernetes10182() + // deadlocks: 0 + go s.Start() + // deadlocks: x > 0 + go s.SetPodStatus() + // deadlocks: x > 0 + go s.SetPodStatus() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go new file mode 100644 index 00000000000000..20e80e436647bc --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes11298.go @@ -0,0 +1,116 @@ +package main + +import ( + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Kubernetes11298", Kubernetes11298) +} + +type Signal_kubernetes11298 <-chan struct{} + +func After_kubernetes11298(f func()) Signal_kubernetes11298 { + ch := make(chan struct{}) + go func() { + // deadlocks: x > 0 + defer close(ch) + if f != nil { + f() + } + }() + return Signal_kubernetes11298(ch) +} + +func Until_kubernetes11298(f func(), period time.Duration, stopCh <-chan struct{}) { + if f == nil { + return + } + for { + select { + case <-stopCh: + return + default: + } + f() + select { + case <-stopCh: + case <-time.After(period): + } + } + +} + +type notifier_kubernetes11298 struct { + lock sync.Mutex + cond *sync.Cond +} + +// abort will be closed no matter what +func (n *notifier_kubernetes11298) serviceLoop(abort <-chan struct{}) { + n.lock.Lock() + defer n.lock.Unlock() + for { + select { + case <-abort: + return + default: + ch := After_kubernetes11298(func() { + n.cond.Wait() + }) + select { + case <-abort: + n.cond.Signal() + <-ch + return + case <-ch: + } + } + } +} + +// abort will be closed no matter what +func Notify_kubernetes11298(abort <-chan struct{}) { + n := ¬ifier_kubernetes11298{} + n.cond = sync.NewCond(&n.lock) + finished := After_kubernetes11298(func() { + Until_kubernetes11298(func() { + for { + select { + case <-abort: + return + default: + func() { + n.lock.Lock() + defer n.lock.Unlock() + n.cond.Signal() + }() + } + } + }, 0, abort) + }) + Until_kubernetes11298(func() { n.serviceLoop(finished) }, 0, abort) +} +func Kubernetes11298() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: x > 0 + done := make(chan struct{}) + notifyDone := After_kubernetes11298(func() { Notify_kubernetes11298(done) }) + go func() { + defer close(done) + time.Sleep(300 * time.Nanosecond) + }() + <-notifyDone + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go new file mode 100644 index 00000000000000..a50a4c80ff8951 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes13135.go @@ -0,0 +1,201 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/13135 + * Buggy version: 6ced66249d4fd2a81e86b4a71d8df0139fe5ceae + * fix commit-id: a12b7edc42c5c06a2e7d9f381975658692951d5a + * Flaky: 93/100 + */ +package main + +import ( + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Kubernetes13135", Kubernetes13135) +} + +var ( + StopChannel_kubernetes13135 chan struct{} +) + +func Util_kubernetes13135(f func(), period time.Duration, stopCh <-chan struct{}) { + for { + select { + case <-stopCh: + return + default: + } + func() { + f() + }() + time.Sleep(period) + } +} + +type Store_kubernetes13135 interface { + Add(obj interface{}) + Replace(obj interface{}) +} + +type Reflector_kubernetes13135 struct { + store Store_kubernetes13135 +} + +func (r *Reflector_kubernetes13135) ListAndWatch(stopCh <-chan struct{}) error { + r.syncWith() + return nil +} + +func NewReflector_kubernetes13135(store Store_kubernetes13135) *Reflector_kubernetes13135 { + return &Reflector_kubernetes13135{ + store: store, + } +} + +func (r *Reflector_kubernetes13135) syncWith() { + r.store.Replace(nil) +} + +type Cacher_kubernetes13135 struct { + sync.Mutex + initialized sync.WaitGroup + initOnce sync.Once + watchCache *WatchCache_kubernetes13135 + reflector *Reflector_kubernetes13135 +} + +func (c *Cacher_kubernetes13135) processEvent() { + c.Lock() + defer c.Unlock() +} + +func (c *Cacher_kubernetes13135) startCaching(stopChannel <-chan struct{}) { + c.Lock() + for { + err := c.reflector.ListAndWatch(stopChannel) + if err == nil { + break + } + } +} + +type WatchCache_kubernetes13135 struct { + sync.RWMutex + onReplace func() + onEvent func() +} + +func (w *WatchCache_kubernetes13135) SetOnEvent(onEvent func()) { + w.Lock() + defer w.Unlock() + w.onEvent = onEvent +} + +func (w *WatchCache_kubernetes13135) SetOnReplace(onReplace func()) { + w.Lock() + defer w.Unlock() + w.onReplace = onReplace +} + +func (w *WatchCache_kubernetes13135) processEvent() { + w.Lock() + defer w.Unlock() + if w.onEvent != nil { + w.onEvent() + } +} + +func (w *WatchCache_kubernetes13135) Add(obj interface{}) { + w.processEvent() +} + +func (w *WatchCache_kubernetes13135) Replace(obj interface{}) { + w.Lock() + defer w.Unlock() + if w.onReplace != nil { + w.onReplace() + } +} + +func NewCacher_kubernetes13135(stopCh <-chan struct{}) *Cacher_kubernetes13135 { + watchCache := &WatchCache_kubernetes13135{} + cacher := &Cacher_kubernetes13135{ + initialized: sync.WaitGroup{}, + watchCache: watchCache, + reflector: NewReflector_kubernetes13135(watchCache), + } + cacher.initialized.Add(1) + watchCache.SetOnReplace(func() { + cacher.initOnce.Do(func() { cacher.initialized.Done() }) + cacher.Unlock() + }) + watchCache.SetOnEvent(cacher.processEvent) + go Util_kubernetes13135(func() { cacher.startCaching(stopCh) }, 0, stopCh) // G2 + cacher.initialized.Wait() + return cacher +} + +/// +/// G1 G2 G3 +/// NewCacher() +/// watchCache.SetOnReplace() +/// watchCache.SetOnEvent() +/// cacher.startCaching() +/// c.Lock() +/// c.reflector.ListAndWatch() +/// r.syncWith() +/// r.store.Replace() +/// w.Lock() +/// w.onReplace() +/// cacher.initOnce.Do() +/// cacher.Unlock() +/// return cacher +/// c.watchCache.Add() +/// w.processEvent() +/// w.Lock() +/// cacher.startCaching() +/// c.Lock() +/// ... +/// c.Lock() +/// w.Lock() +///--------------------------------G2,G3 deadlock------------------------------------- +/// + +/// +/// G1 G2 G3 +/// NewCacher() +/// watchCache.SetOnReplace() +/// watchCache.SetOnEvent() +/// watchCache.initialized.Wait() +/// Util(...) +/// cacher.startCaching() +/// c.Lock() +/// c.reflector.ListAndWatch() +/// r.syncWith() +///--------------------------------G1 deadlocks------------------------------------- +func Kubernetes13135() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + StopChannel_kubernetes13135 = make(chan struct{}) + for i := 0; i < 50; i++ { + go func() { + // Should create a local channel. Using a single global channel + // concurrently will cause a deadlock which does not actually exist + // in the original microbenchmark. + StopChannel_kubernetes13135 := make(chan struct{}) + + // deadlocks: x > 0 + c := NewCacher_kubernetes13135(StopChannel_kubernetes13135) // G1 + go c.watchCache.Add(nil) // G3 + go close(StopChannel_kubernetes13135) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go new file mode 100644 index 00000000000000..c5b3e2f27378e6 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes1321.go @@ -0,0 +1,124 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/1321 + * Buggy version: 9cd0fc70f1ca852c903b18b0933991036b3b2fa1 + * fix commit-id: 435e0b73bb99862f9dedf56a50260ff3dfef14ff + * Flaky: 1/100 + * Description: + * This is a lock-channel bug. The first goroutine invokes + * distribute() function. distribute() function holds m.lock.Lock(), + * while blocking at sending message to w.result. The second goroutine + * invokes stopWatching() funciton, which can unblock the first + * goroutine by closing w.result. However, in order to close w.result, + * stopWatching() function needs to acquire m.lock.Lock() firstly. + * The fix is to introduce another channel and put receive message + * from the second channel in the same select as the w.result. Close + * the second channel can unblock the first goroutine, while no need + * to hold m.lock.Lock(). + */ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Kubernetes1321", Kubernetes1321) +} + +type muxWatcher_kubernetes1321 struct { + result chan struct{} + m *Mux_kubernetes1321 + id int64 +} + +func (mw *muxWatcher_kubernetes1321) Stop() { + mw.m.stopWatching(mw.id) +} + +type Mux_kubernetes1321 struct { + lock sync.Mutex + watchers map[int64]*muxWatcher_kubernetes1321 +} + +func NewMux_kubernetes1321() *Mux_kubernetes1321 { + m := &Mux_kubernetes1321{ + watchers: map[int64]*muxWatcher_kubernetes1321{}, + } + // deadlocks: x > 0 + go m.loop() // G2 + return m +} + +func (m *Mux_kubernetes1321) Watch() *muxWatcher_kubernetes1321 { + mw := &muxWatcher_kubernetes1321{ + result: make(chan struct{}), + m: m, + id: int64(len(m.watchers)), + } + m.watchers[mw.id] = mw + runtime.Gosched() + return mw +} + +func (m *Mux_kubernetes1321) loop() { + for i := 0; i < 100; i++ { + m.distribute() + } +} + +func (m *Mux_kubernetes1321) distribute() { + m.lock.Lock() + defer m.lock.Unlock() + for _, w := range m.watchers { + w.result <- struct{}{} + runtime.Gosched() + } +} + +func (m *Mux_kubernetes1321) stopWatching(id int64) { + m.lock.Lock() + defer m.lock.Unlock() + w, ok := m.watchers[id] + if !ok { + return + } + delete(m.watchers, id) + close(w.result) +} + +func testMuxWatcherClose_kubernetes1321() { + // deadlocks: x > 0 + m := NewMux_kubernetes1321() + m.watchers[m.Watch().id].Stop() +} + +/// +/// G1 G2 +/// testMuxWatcherClose() +/// NewMux() +/// m.loop() +/// m.distribute() +/// m.lock.Lock() +/// w.result <- true +/// w := m.Watch() +/// w.Stop() +/// mw.m.stopWatching() +/// m.lock.Lock() +/// ---------------G1,G2 deadlock--------------- +/// + +func Kubernetes1321() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 1000; i++ { + go testMuxWatcherClose_kubernetes1321() // G1 + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go new file mode 100644 index 00000000000000..56d9d127af1dbb --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes25331.go @@ -0,0 +1,83 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/25331 + * Buggy version: 5dd087040bb13434f1ddf2f0693d0203c30f28cb + * fix commit-id: 97f4647dc3d8cf46c2b66b89a31c758a6edfb57c + * Flaky: 100/100 + * Description: + * In reflector.go, it could probably call Stop() without retrieving + * all results from ResultChan(). See here. A potential leak is that + * when an error has happened, it could block on resultChan, and then + * cancelling context in Stop() wouldn't unblock it. + */ +package main + +import ( + "context" + "errors" + "os" + "runtime" + "runtime/pprof" +) + +func init() { + register("Kubernetes25331", Kubernetes25331) +} + +type watchChan_kubernetes25331 struct { + ctx context.Context + cancel context.CancelFunc + resultChan chan bool + errChan chan error +} + +func (wc *watchChan_kubernetes25331) Stop() { + wc.errChan <- errors.New("Error") + wc.cancel() +} + +func (wc *watchChan_kubernetes25331) run() { + select { + case err := <-wc.errChan: + errResult := len(err.Error()) != 0 + wc.cancel() // Removed in fix + wc.resultChan <- errResult + case <-wc.ctx.Done(): + } +} + +func NewWatchChan_kubernetes25331() *watchChan_kubernetes25331 { + ctx, cancel := context.WithCancel(context.Background()) + return &watchChan_kubernetes25331{ + ctx: ctx, + cancel: cancel, + resultChan: make(chan bool), + errChan: make(chan error), + } +} + +/// +/// G1 G2 +/// wc.run() +/// wc.Stop() +/// wc.errChan <- +/// wc.cancel() +/// <-wc.errChan +/// wc.cancel() +/// wc.resultChan <- +/// -------------G1 leak---------------- +/// + +func Kubernetes25331() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + wc := NewWatchChan_kubernetes25331() + // deadlocks: 1 + go wc.run() // G1 + go wc.Stop() // G2 + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go new file mode 100644 index 00000000000000..1ad2c6c28f63f4 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes26980.go @@ -0,0 +1,86 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Kubernetes26980", Kubernetes26980) +} + +type processorListener_kubernetes26980 struct { + lock sync.RWMutex + cond sync.Cond + + pendingNotifications []interface{} +} + +func (p *processorListener_kubernetes26980) add(notification interface{}) { + p.lock.Lock() + defer p.lock.Unlock() + + p.pendingNotifications = append(p.pendingNotifications, notification) + p.cond.Broadcast() +} + +func (p *processorListener_kubernetes26980) pop(stopCh <-chan struct{}) { + p.lock.Lock() + runtime.Gosched() + defer p.lock.Unlock() + for { + for len(p.pendingNotifications) == 0 { + select { + case <-stopCh: + return + default: + } + p.cond.Wait() + } + select { + case <-stopCh: + return + } + } +} + +func newProcessListener_kubernetes26980() *processorListener_kubernetes26980 { + ret := &processorListener_kubernetes26980{ + pendingNotifications: []interface{}{}, + } + ret.cond.L = &ret.lock + return ret +} +func Kubernetes26980() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 3000; i++ { + go func() { + // deadlocks: x > 0 + pl := newProcessListener_kubernetes26980() + stopCh := make(chan struct{}) + defer close(stopCh) + pl.add(1) + runtime.Gosched() + // deadlocks: x > 0 + go pl.pop(stopCh) + + resultCh := make(chan struct{}) + go func() { + // deadlocks: x > 0 + pl.lock.Lock() + close(resultCh) + }() + runtime.Gosched() + <-resultCh + pl.lock.Unlock() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go new file mode 100644 index 00000000000000..b3661168d78b58 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes30872.go @@ -0,0 +1,259 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Kubernetes30872", Kubernetes30872) +} + +type PopProcessFunc_kubernetes30872 func() + +type ProcessFunc_kubernetes30872 func() + +func Util_kubernetes30872(f func(), stopCh <-chan struct{}) { + JitterUntil_kubernetes30872(f, stopCh) +} + +func JitterUntil_kubernetes30872(f func(), stopCh <-chan struct{}) { + for { + select { + case <-stopCh: + return + default: + } + func() { + f() + }() + } +} + +type Queue_kubernetes30872 interface { + HasSynced() + Pop(PopProcessFunc_kubernetes30872) +} + +type Config_kubernetes30872 struct { + Queue Queue_kubernetes30872 + Process ProcessFunc_kubernetes30872 +} + +type Controller_kubernetes30872 struct { + config Config_kubernetes30872 +} + +func (c *Controller_kubernetes30872) Run(stopCh <-chan struct{}) { + Util_kubernetes30872(c.processLoop, stopCh) +} + +func (c *Controller_kubernetes30872) HasSynced() { + c.config.Queue.HasSynced() +} + +func (c *Controller_kubernetes30872) processLoop() { + c.config.Queue.Pop(PopProcessFunc_kubernetes30872(c.config.Process)) +} + +type ControllerInterface_kubernetes30872 interface { + Run(<-chan struct{}) + HasSynced() +} + +type ResourceEventHandler_kubernetes30872 interface { + OnAdd() +} + +type ResourceEventHandlerFuncs_kubernetes30872 struct { + AddFunc func() +} + +func (r ResourceEventHandlerFuncs_kubernetes30872) OnAdd() { + if r.AddFunc != nil { + r.AddFunc() + } +} + +type informer_kubernetes30872 struct { + controller ControllerInterface_kubernetes30872 + + stopChan chan struct{} +} + +type federatedInformerImpl_kubernetes30872 struct { + sync.Mutex + clusterInformer informer_kubernetes30872 +} + +func (f *federatedInformerImpl_kubernetes30872) ClustersSynced() { + f.Lock() // L1 + defer f.Unlock() + f.clusterInformer.controller.HasSynced() +} + +func (f *federatedInformerImpl_kubernetes30872) addCluster() { + f.Lock() // L1 + defer f.Unlock() +} + +func (f *federatedInformerImpl_kubernetes30872) Start() { + f.Lock() // L1 + defer f.Unlock() + + f.clusterInformer.stopChan = make(chan struct{}) + // deadlocks: x > 0 + go f.clusterInformer.controller.Run(f.clusterInformer.stopChan) // G2 + runtime.Gosched() +} + +func (f *federatedInformerImpl_kubernetes30872) Stop() { + f.Lock() // L1 + defer f.Unlock() + close(f.clusterInformer.stopChan) +} + +type DelayingDeliverer_kubernetes30872 struct{} + +func (d *DelayingDeliverer_kubernetes30872) StartWithHandler(handler func()) { + go func() { // G4 + // deadlocks: x > 0 + handler() + }() +} + +type FederationView_kubernetes30872 interface { + ClustersSynced() +} + +type FederatedInformer_kubernetes30872 interface { + FederationView_kubernetes30872 + Start() + Stop() +} + +type NamespaceController_kubernetes30872 struct { + namespaceDeliverer *DelayingDeliverer_kubernetes30872 + namespaceFederatedInformer FederatedInformer_kubernetes30872 +} + +func (nc *NamespaceController_kubernetes30872) isSynced() { + nc.namespaceFederatedInformer.ClustersSynced() +} + +func (nc *NamespaceController_kubernetes30872) reconcileNamespace() { + nc.isSynced() +} + +func (nc *NamespaceController_kubernetes30872) Run(stopChan <-chan struct{}) { + nc.namespaceFederatedInformer.Start() + go func() { // G3 + // deadlocks: x > 0 + <-stopChan + nc.namespaceFederatedInformer.Stop() + }() + nc.namespaceDeliverer.StartWithHandler(func() { + nc.reconcileNamespace() + }) +} + +type DeltaFIFO_kubernetes30872 struct { + lock sync.RWMutex +} + +func (f *DeltaFIFO_kubernetes30872) HasSynced() { + f.lock.Lock() // L2 + defer f.lock.Unlock() +} + +func (f *DeltaFIFO_kubernetes30872) Pop(process PopProcessFunc_kubernetes30872) { + f.lock.Lock() // L2 + defer f.lock.Unlock() + process() +} + +func NewFederatedInformer_kubernetes30872() FederatedInformer_kubernetes30872 { + federatedInformer := &federatedInformerImpl_kubernetes30872{} + federatedInformer.clusterInformer.controller = NewInformer_kubernetes30872( + ResourceEventHandlerFuncs_kubernetes30872{ + AddFunc: func() { + federatedInformer.addCluster() + }, + }) + return federatedInformer +} + +func NewInformer_kubernetes30872(h ResourceEventHandler_kubernetes30872) *Controller_kubernetes30872 { + fifo := &DeltaFIFO_kubernetes30872{} + cfg := &Config_kubernetes30872{ + Queue: fifo, + Process: func() { + h.OnAdd() + }, + } + return &Controller_kubernetes30872{config: *cfg} +} + +func NewNamespaceController_kubernetes30872() *NamespaceController_kubernetes30872 { + nc := &NamespaceController_kubernetes30872{} + nc.namespaceDeliverer = &DelayingDeliverer_kubernetes30872{} + nc.namespaceFederatedInformer = NewFederatedInformer_kubernetes30872() + return nc +} + +func Kubernetes30872() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { // G1 + namespaceController := NewNamespaceController_kubernetes30872() + stop := make(chan struct{}) + namespaceController.Run(stop) + close(stop) + }() + } +} + +/// Example of deadlocking trace. +/// +/// G1 G2 G3 G4 +/// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +/// namespaceController.Run() +/// nc.namespaceFederatedInformer.Start() +/// f.Lock() [L1] +/// go f.clusterInformer.controller.Run()[G2] +/// <<>> +/// . Util(c.processLoop, stopCh) +/// . c.config.Queue.Pop() +/// . f.lock.Lock() [L2] +/// . process() +/// . h.OnAdd() +/// . r.AddFunc() +/// . federatedInformer.addCluster() +/// . f.Lock() [L1] +/// f.Unlock() [L1] . +/// go func()[G3] . +/// nc.namespaceDeliverer.StartWithHandler() . . +/// go func()[G4] . . +/// close(stop) . . . +/// <<>> . . . +/// . <-stopChan . +/// . nc.namespaceFederatedInformer.Stop() . +/// . f.Lock() [L1] . +/// . . handler() +/// . . nc.reconcileNamespace() +/// . . nc.isSynced() +/// . . nc.namespaceFederatedInformer.ClustersSynced() +/// . . f.Lock() [L1] +/// . . f.clusterInformer.controller.HasSynced() +/// . . c.config.Queue.HasSynced() +/// . . f.lock.Lock() [L2] +///----------------------------------------------------------------------------G2,G3,G4 leak---------------------------------------------------------------------------------------------- +/// diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go new file mode 100644 index 00000000000000..1e132d9221bca6 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes38669.go @@ -0,0 +1,77 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Kubernetes38669", Kubernetes38669) +} + +type Event_kubernetes38669 int +type watchCacheEvent_kubernetes38669 int + +type cacheWatcher_kubernetes38669 struct { + sync.Mutex + input chan watchCacheEvent_kubernetes38669 + result chan Event_kubernetes38669 + stopped bool +} + +func (c *cacheWatcher_kubernetes38669) process(initEvents []watchCacheEvent_kubernetes38669) { + for _, event := range initEvents { + c.sendWatchCacheEvent(&event) + } + defer close(c.result) + defer c.Stop() + for { + _, ok := <-c.input + if !ok { + return + } + } +} + +func (c *cacheWatcher_kubernetes38669) sendWatchCacheEvent(event *watchCacheEvent_kubernetes38669) { + c.result <- Event_kubernetes38669(*event) +} + +func (c *cacheWatcher_kubernetes38669) Stop() { + c.stop() +} + +func (c *cacheWatcher_kubernetes38669) stop() { + c.Lock() + defer c.Unlock() + if !c.stopped { + c.stopped = true + close(c.input) + } +} + +func newCacheWatcher_kubernetes38669(chanSize int, initEvents []watchCacheEvent_kubernetes38669) *cacheWatcher_kubernetes38669 { + watcher := &cacheWatcher_kubernetes38669{ + input: make(chan watchCacheEvent_kubernetes38669, chanSize), + result: make(chan Event_kubernetes38669, chanSize), + stopped: false, + } + // deadlocks: 1 + go watcher.process(initEvents) + return watcher +} + +func Kubernetes38669() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + initEvents := []watchCacheEvent_kubernetes38669{1, 2} + w := newCacheWatcher_kubernetes38669(0, initEvents) + w.Stop() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go new file mode 100644 index 00000000000000..832d59ca504c6f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes5316.go @@ -0,0 +1,73 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/5316 + * Buggy version: c868b0bbf09128960bc7c4ada1a77347a464d876 + * fix commit-id: cc3a433a7abc89d2f766d4c87eaae9448e3dc091 + * Flaky: 100/100 + * Description: + * If the main goroutine selects a case that doesn’t consumes + * the channels, the anonymous goroutine will be blocked on sending + * to channel. + */ + +package main + +import ( + "errors" + "math/rand" + "os" + "runtime" + "runtime/pprof" + "time" +) + +func init() { + register("Kubernetes5316", Kubernetes5316) +} + +func finishRequest_kubernetes5316(timeout time.Duration, fn func() error) { + ch := make(chan bool) + errCh := make(chan error) + go func() { // G2 + // deadlocks: 1 + if err := fn(); err != nil { + errCh <- err + } else { + ch <- true + } + }() + + select { + case <-ch: + case <-errCh: + case <-time.After(timeout): + } +} + +/// +/// G1 G2 +/// finishRequest() +/// fn() +/// time.After() +/// errCh<-/ch<- +/// --------------G2 leak---------------- +/// + +func Kubernetes5316() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + fn := func() error { + time.Sleep(2 * time.Millisecond) + if rand.Intn(10) > 5 { + return errors.New("Error") + } + return nil + } + go finishRequest_kubernetes5316(time.Millisecond, fn) // G1 + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go new file mode 100644 index 00000000000000..7b4fed76f5be26 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes58107.go @@ -0,0 +1,137 @@ +/* + * Project: kubernetes + * Tag: Reproduce misbehavior + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/58107 + * Buggy version: 2f17d782eb2772d6401da7ddced9ac90656a7a79 + * fix commit-id: 010a127314a935d8d038f8dd4559fc5b249813e4 + * Flaky: 53/100 + * Description: + * The rules for read and write lock: allows concurrent read lock; + * write lock has higher priority than read lock. + * There are two queues (queue 1 and queue 2) involved in this bug, + * and the two queues are protected by the same read-write lock + * (rq.workerLock.RLock()). Before getting an element from queue 1 or + * queue 2, rq.workerLock.RLock() is acquired. If the queue is empty, + * cond.Wait() will be invoked. There is another goroutine (goroutine D), + * which will periodically invoke rq.workerLock.Lock(). Under the following + * situation, deadlock will happen. Queue 1 is empty, so that some goroutines + * hold rq.workerLock.RLock(), and block at cond.Wait(). Goroutine D is + * blocked when acquiring rq.workerLock.Lock(). Some goroutines try to process + * jobs in queue 2, but they are blocked when acquiring rq.workerLock.RLock(), + * since write lock has a higher priority. + * The fix is to not acquire rq.workerLock.RLock(), while pulling data + * from any queue. Therefore, when a goroutine is blocked at cond.Wait(), + * rq.workLock.RLock() is not held. + */ + +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Kubernetes58107", Kubernetes58107) +} + +type RateLimitingInterface_kubernetes58107 interface { + Get() + Put() +} + +type Type_kubernetes58107 struct { + cond *sync.Cond +} + +func (q *Type_kubernetes58107) Get() { + q.cond.L.Lock() + defer q.cond.L.Unlock() + q.cond.Wait() +} + +func (q *Type_kubernetes58107) Put() { + q.cond.Signal() +} + +type ResourceQuotaController_kubernetes58107 struct { + workerLock sync.RWMutex + queue RateLimitingInterface_kubernetes58107 + missingUsageQueue RateLimitingInterface_kubernetes58107 +} + +func (rq *ResourceQuotaController_kubernetes58107) worker(queue RateLimitingInterface_kubernetes58107, _ string) { + workFunc := func() bool { + rq.workerLock.RLock() + defer rq.workerLock.RUnlock() + queue.Get() + return true + } + for { + if quit := workFunc(); quit { + return + } + } +} + +func (rq *ResourceQuotaController_kubernetes58107) Run() { + // deadlocks: x > 0 + go rq.worker(rq.queue, "G1") // G3 + // deadlocks: x > 0 + go rq.worker(rq.missingUsageQueue, "G2") // G4 +} + +func (rq *ResourceQuotaController_kubernetes58107) Sync() { + for i := 0; i < 100000; i++ { + rq.workerLock.Lock() + runtime.Gosched() + rq.workerLock.Unlock() + } +} + +func (rq *ResourceQuotaController_kubernetes58107) HelperSignals() { + for i := 0; i < 100000; i++ { + rq.queue.Put() + rq.missingUsageQueue.Put() + } +} + +func startResourceQuotaController_kubernetes58107() { + resourceQuotaController := &ResourceQuotaController_kubernetes58107{ + queue: &Type_kubernetes58107{sync.NewCond(&sync.Mutex{})}, + missingUsageQueue: &Type_kubernetes58107{sync.NewCond(&sync.Mutex{})}, + } + + go resourceQuotaController.Run() // G2 + // deadlocks: x > 0 + go resourceQuotaController.Sync() // G5 + resourceQuotaController.HelperSignals() +} + +func Kubernetes58107() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(1000 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 1000; i++ { + go startResourceQuotaController_kubernetes58107() // G1 + } +} + +// Example of deadlock: +// +// G1 G3 G4 G5 +// ------------------------------------------------------------------------------------------------------------ +// <<>> (no more signals) ... ... Sync() +// rq.workerLock.RLock() . . +// q.cond.L.Lock() . . +// q.cond.Wait() . . +// . . rq.workerLock.Lock() +// . rq.workerLock.RLock() . +// . q.cond.L.Lock() . +// --------------------------------------------G3, G4, G5 leak------------------------------------------------- diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go new file mode 100644 index 00000000000000..90669b0e545d5c --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes62464.go @@ -0,0 +1,122 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/62464 + * Buggy version: a048ca888ad27367b1a7b7377c67658920adbf5d + * fix commit-id: c1b19fce903675b82e9fdd1befcc5f5d658bfe78 + * Flaky: 8/100 + * Description: + * This is another example for recursive read lock bug. It has + * been noticed by the go developers that RLock should not be + * recursively used in the same thread. + */ + +package main + +import ( + "math/rand" + "os" + "runtime/pprof" + "runtime" + "sync" + "time" +) + +func init() { + register("Kubernetes62464", Kubernetes62464) +} + +type State_kubernetes62464 interface { + GetCPUSetOrDefault() + GetCPUSet() bool + GetDefaultCPUSet() + SetDefaultCPUSet() +} + +type stateMemory_kubernetes62464 struct { + sync.RWMutex +} + +func (s *stateMemory_kubernetes62464) GetCPUSetOrDefault() { + s.RLock() + defer s.RUnlock() + if ok := s.GetCPUSet(); ok { + return + } + s.GetDefaultCPUSet() +} + +func (s *stateMemory_kubernetes62464) GetCPUSet() bool { + runtime.Gosched() + s.RLock() + defer s.RUnlock() + + if rand.Intn(10) > 5 { + return true + } + return false +} + +func (s *stateMemory_kubernetes62464) GetDefaultCPUSet() { + s.RLock() + defer s.RUnlock() +} + +func (s *stateMemory_kubernetes62464) SetDefaultCPUSet() { + s.Lock() + runtime.Gosched() + defer s.Unlock() +} + +type staticPolicy_kubernetes62464 struct{} + +func (p *staticPolicy_kubernetes62464) RemoveContainer(s State_kubernetes62464) { + s.GetDefaultCPUSet() + s.SetDefaultCPUSet() +} + +type manager_kubernetes62464 struct { + state *stateMemory_kubernetes62464 +} + +func (m *manager_kubernetes62464) reconcileState() { + m.state.GetCPUSetOrDefault() +} + +func NewPolicyAndManager_kubernetes62464() (*staticPolicy_kubernetes62464, *manager_kubernetes62464) { + s := &stateMemory_kubernetes62464{} + m := &manager_kubernetes62464{s} + p := &staticPolicy_kubernetes62464{} + return p, m +} + +/// +/// G1 G2 +/// m.reconcileState() +/// m.state.GetCPUSetOrDefault() +/// s.RLock() +/// s.GetCPUSet() +/// p.RemoveContainer() +/// s.GetDefaultCPUSet() +/// s.SetDefaultCPUSet() +/// s.Lock() +/// s.RLock() +/// ---------------------G1,G2 deadlock--------------------- +/// + +func Kubernetes62464() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 1000; i++ { + go func() { + p, m := NewPolicyAndManager_kubernetes62464() + // deadlocks: x > 0 + go m.reconcileState() + // deadlocks: x > 0 + go p.RemoveContainer(m.state) + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go new file mode 100644 index 00000000000000..f3222c46d436e1 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes6632.go @@ -0,0 +1,101 @@ +/* + * Project: kubernetes + * Issue or PR : https://github.com/kubernetes/kubernetes/pull/6632 + * Buggy version: e597b41d939573502c8dda1dde7bf3439325fb5d + * fix commit-id: 82afb7ab1fe12cf2efceede2322d082eaf5d5adc + * Flaky: 4/100 + * Description: + * This is a lock-channel bug. When resetChan is full, WriteFrame + * holds the lock and blocks on the channel. Then monitor() fails + * to close the resetChan because lock is already held by WriteFrame. + * Fix: create a goroutine to drain the channel + */ +package main + +import ( + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Kubernetes6632", Kubernetes6632) +} + +type Connection_kubernetes6632 struct { + closeChan chan bool +} + +type idleAwareFramer_kubernetes6632 struct { + resetChan chan bool + writeLock sync.Mutex + conn *Connection_kubernetes6632 +} + +func (i *idleAwareFramer_kubernetes6632) monitor() { + var resetChan = i.resetChan +Loop: + for { + select { + case <-i.conn.closeChan: + i.writeLock.Lock() + close(resetChan) + i.resetChan = nil + i.writeLock.Unlock() + break Loop + } + } +} + +func (i *idleAwareFramer_kubernetes6632) WriteFrame() { + i.writeLock.Lock() + defer i.writeLock.Unlock() + if i.resetChan == nil { + return + } + i.resetChan <- true +} + +func NewIdleAwareFramer_kubernetes6632() *idleAwareFramer_kubernetes6632 { + return &idleAwareFramer_kubernetes6632{ + resetChan: make(chan bool), + conn: &Connection_kubernetes6632{ + closeChan: make(chan bool), + }, + } +} + +/// +/// G1 G2 helper goroutine +/// i.monitor() +/// <-i.conn.closeChan +/// i.WriteFrame() +/// i.writeLock.Lock() +/// i.resetChan <- +/// i.conn.closeChan<- +/// i.writeLock.Lock() +/// ----------------------G1,G2 deadlock------------------------ +/// + +func Kubernetes6632() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { + i := NewIdleAwareFramer_kubernetes6632() + + go func() { // helper goroutine + i.conn.closeChan <- true + }() + // deadlocks: x > 0 + go i.monitor() // G1 + // deadlocks: x > 0 + go i.WriteFrame() // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go b/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go new file mode 100644 index 00000000000000..472c65149e25b4 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/kubernetes70277.go @@ -0,0 +1,94 @@ +package main + +import ( + "os" + "runtime/pprof" + "time" +) + +func init() { + register("Kubernetes70277", Kubernetes70277) +} + +type WaitFunc_kubernetes70277 func(done <-chan struct{}) <-chan struct{} + +type ConditionFunc_kubernetes70277 func() (done bool, err error) + +func WaitFor_kubernetes70277(wait WaitFunc_kubernetes70277, fn ConditionFunc_kubernetes70277, done <-chan struct{}) error { + c := wait(done) + for { + _, open := <-c + ok, err := fn() + if err != nil { + return err + } + if ok { + return nil + } + if !open { + break + } + } + return nil +} + +func poller_kubernetes70277(interval, timeout time.Duration) WaitFunc_kubernetes70277 { + return WaitFunc_kubernetes70277(func(done <-chan struct{}) <-chan struct{} { + ch := make(chan struct{}) + go func() { + defer close(ch) + + tick := time.NewTicker(interval) + defer tick.Stop() + + var after <-chan time.Time + if timeout != 0 { + timer := time.NewTimer(timeout) + after = timer.C + defer timer.Stop() + } + for { + select { + case <-tick.C: + select { + case ch <- struct{}{}: + default: + } + case <-after: + return + case <-done: + return + } + } + }() + + return ch + }) +} + +func Kubernetes70277() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: x > 0 + stopCh := make(chan struct{}) + defer close(stopCh) + waitFunc := poller_kubernetes70277(time.Millisecond, 80*time.Millisecond) + var doneCh <-chan struct{} + + WaitFor_kubernetes70277(func(done <-chan struct{}) <-chan struct{} { + doneCh = done + return waitFunc(done) + }, func() (bool, error) { + time.Sleep(10 * time.Millisecond) + return true, nil + }, stopCh) + + <-doneCh // block here + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/main.go b/src/runtime/testdata/testgoroutineleakgc/main.go new file mode 100644 index 00000000000000..ae491a2a978043 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/main.go @@ -0,0 +1,35 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "os" + +var cmds = map[string]func(){} + +func register(name string, f func()) { + if cmds[name] != nil { + panic("duplicate registration: " + name) + } + cmds[name] = f +} + +func registerInit(name string, f func()) { + if len(os.Args) >= 2 && os.Args[1] == name { + f() + } +} + +func main() { + if len(os.Args) < 2 { + println("usage: " + os.Args[0] + " name-of-test") + return + } + f := cmds[os.Args[1]] + if f == nil { + println("unknown function: " + os.Args[1]) + return + } + f() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby17176.go b/src/runtime/testdata/testgoroutineleakgc/moby17176.go new file mode 100644 index 00000000000000..484b255a2520d6 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby17176.go @@ -0,0 +1,76 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/17176 + * Buggy version: d295dc66521e2734390473ec1f1da8a73ad3288a + * fix commit-id: 2f16895ee94848e2d8ad72bc01968b4c88d84cb8 + * Flaky: 100/100 + * Description: + * devices.nrDeletedDevices takes devices.Lock() but does + * not drop it if there are no deleted devices. This will block + * other goroutines trying to acquire devices.Lock(). + * In general reason is that when device deletion is happning, + * we can try deletion/deactivation in a loop. And that that time + * we don't want to block rest of the device operations in parallel. + * So we drop the inner devices lock while continue to hold per + * device lock + * A test is added for this bug, and we need to try whether + * this bug can be reproduced. + */ +package main + +import ( + "errors" + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Moby17176", Moby17176) +} + +type DeviceSet_moby17176 struct { + sync.Mutex + nrDeletedDevices int +} + +func (devices *DeviceSet_moby17176) cleanupDeletedDevices() error { + devices.Lock() + if devices.nrDeletedDevices == 0 { + /// Missing devices.Unlock() + return nil + } + devices.Unlock() + return errors.New("Error") +} + +func testDevmapperLockReleasedDeviceDeletion_moby17176() { + ds := &DeviceSet_moby17176{ + nrDeletedDevices: 0, + } + ds.cleanupDeletedDevices() + doneChan := make(chan bool) + go func() { + // deadlocks: x > 0 + ds.Lock() + defer ds.Unlock() + doneChan <- true + }() + + select { + case <-time.After(time.Millisecond): + case <-doneChan: + } +} +func Moby17176() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go testDevmapperLockReleasedDeviceDeletion_moby17176() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby21233.go b/src/runtime/testdata/testgoroutineleakgc/moby21233.go new file mode 100644 index 00000000000000..60dcbdeaa29fda --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby21233.go @@ -0,0 +1,169 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/21233 + * Buggy version: cc12d2bfaae135e63b1f962ad80e6943dd995337 + * fix commit-id: 2f4aa9658408ac72a598363c6e22eadf93dbb8a7 + * Flaky:100/100 + * Description: + * This test was checking that it received every progress update that was + * produced. But delivery of these intermediate progress updates is not + * guaranteed. A new update can overwrite the previous one if the previous + * one hasn't been sent to the channel yet. + * The call to t.Fatalf exited the cur rent goroutine which was consuming + * the channel, which caused a deadlock and eventual test timeout rather + * than a proper failure message. + */ +package main + +import ( + "math/rand" + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Moby21233", Moby21233) +} + +type Progress_moby21233 struct{} + +type Output_moby21233 interface { + WriteProgress(Progress_moby21233) error +} + +type chanOutput_moby21233 chan<- Progress_moby21233 + +type TransferManager_moby21233 struct { + mu sync.Mutex +} + +type Transfer_moby21233 struct { + mu sync.Mutex +} + +type Watcher_moby21233 struct { + signalChan chan struct{} + releaseChan chan struct{} + running chan struct{} +} + +func ChanOutput_moby21233(progressChan chan<- Progress_moby21233) Output_moby21233 { + return chanOutput_moby21233(progressChan) +} +func (out chanOutput_moby21233) WriteProgress(p Progress_moby21233) error { + out <- p + return nil +} +func NewTransferManager_moby21233() *TransferManager_moby21233 { + return &TransferManager_moby21233{} +} +func NewTransfer_moby21233() *Transfer_moby21233 { + return &Transfer_moby21233{} +} +func (t *Transfer_moby21233) Release(watcher *Watcher_moby21233) { + t.mu.Lock() + t.mu.Unlock() + close(watcher.releaseChan) + <-watcher.running +} +func (t *Transfer_moby21233) Watch(progressOutput Output_moby21233) *Watcher_moby21233 { + t.mu.Lock() + defer t.mu.Unlock() + lastProgress := Progress_moby21233{} + w := &Watcher_moby21233{ + releaseChan: make(chan struct{}), + signalChan: make(chan struct{}), + running: make(chan struct{}), + } + go func() { // G2 + // deadlocks: x > 0 + defer func() { + close(w.running) + }() + done := false + for { + t.mu.Lock() + t.mu.Unlock() + if rand.Int31n(2) >= 1 { + progressOutput.WriteProgress(lastProgress) + } + if done { + return + } + select { + case <-w.signalChan: + case <-w.releaseChan: + done = true + } + } + }() + return w +} +func (tm *TransferManager_moby21233) Transfer(progressOutput Output_moby21233) (*Transfer_moby21233, *Watcher_moby21233) { + tm.mu.Lock() + defer tm.mu.Unlock() + t := NewTransfer_moby21233() + return t, t.Watch(progressOutput) +} + +func testTransfer_moby21233() { // G1 + // deadlocks: x > 0 + tm := NewTransferManager_moby21233() + progressChan := make(chan Progress_moby21233) + progressDone := make(chan struct{}) + go func() { // G3 + time.Sleep(1 * time.Millisecond) + for p := range progressChan { /// Chan consumer + if rand.Int31n(2) >= 1 { + return + } + _ = p + } + close(progressDone) + }() + time.Sleep(1 * time.Millisecond) + ids := []string{"id1", "id2", "id3"} + xrefs := make([]*Transfer_moby21233, len(ids)) + watchers := make([]*Watcher_moby21233, len(ids)) + for i := range ids { + xrefs[i], watchers[i] = tm.Transfer(ChanOutput_moby21233(progressChan)) /// Chan producer + time.Sleep(2 * time.Millisecond) + } + + for i := range xrefs { + xrefs[i].Release(watchers[i]) + } + + close(progressChan) + <-progressDone +} + +func Moby21233() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 100; i++ { + go testTransfer_moby21233() // G1 + } +} + +// Example deadlock trace: +// +// G1 G2 G3 +// ------------------------------------------------------------------------------------------------ +// testTransfer() +// tm.Transfer() +// t.Watch() +// . WriteProgress() +// . ProgressChan<- +// . . <-progressChan +// . . rand.Int31n(2) >= 1 +// . . return +// . ProgressChan<- . +// <-watcher.running +// ----------------------G1, G2 leak-------------------------- +// diff --git a/src/runtime/testdata/testgoroutineleakgc/moby25348.go b/src/runtime/testdata/testgoroutineleakgc/moby25348.go new file mode 100644 index 00000000000000..4ee34b1f08fec8 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby25348.go @@ -0,0 +1,58 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/25384 + * Buggy version: 58befe3081726ef74ea09198cd9488fb42c51f51 + * fix commit-id: 42360d164b9f25fb4b150ef066fcf57fa39559a7 + * Flaky: 100/100 + * Description: + * When n=1 (len(pm.plugins)), the location of group.Wait() doesn’t matter. + * When n is larger than 1, group.Wait() is invoked in each iteration. Whenever + * group.Wait() is invoked, it waits for group.Done() to be executed n times. + * However, group.Done() is only executed once in one iteration. + */ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Moby25348", Moby25348) +} + +type plugin_moby25348 struct{} + +type Manager_moby25348 struct { + plugins []*plugin_moby25348 +} + +func (pm *Manager_moby25348) init() { + var group sync.WaitGroup + group.Add(len(pm.plugins)) + for _, p := range pm.plugins { + go func(p *plugin_moby25348) { + defer group.Done() + }(p) + group.Wait() // Block here + } +} + +func Moby25348() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + p1 := &plugin_moby25348{} + p2 := &plugin_moby25348{} + pm := &Manager_moby25348{ + plugins: []*plugin_moby25348{p1, p2}, + } + // deadlocks: 1 + go pm.init() + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby27782.go b/src/runtime/testdata/testgoroutineleakgc/moby27782.go new file mode 100644 index 00000000000000..51212e77c1728a --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby27782.go @@ -0,0 +1,269 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/27782 + * Buggy version: 18768fdc2e76ec6c600c8ab57d2d487ee7877794 + * fix commit-id: a69a59ffc7e3d028a72d1195c2c1535f447eaa84 + * Flaky: 2/100 + */ +package main + +import ( + "errors" + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Moby27782", Moby27782) +} + +type Event_moby27782 struct { + Op Op_moby27782 +} + +type Op_moby27782 uint32 + +const ( + Create_moby27782 Op_moby27782 = 1 << iota + Write_moby27782 + Remove_moby27782 + Rename_moby27782 + Chmod_moby27782 +) + +func newEvent(op Op_moby27782) Event_moby27782 { + return Event_moby27782{op} +} + +func (e *Event_moby27782) ignoreLinux(w *Watcher_moby27782) bool { + if e.Op != Write_moby27782 { + w.mu.Lock() + defer w.mu.Unlock() + w.cv.Broadcast() + return true + } + runtime.Gosched() + return false +} + +type Watcher_moby27782 struct { + Events chan Event_moby27782 + mu sync.Mutex // L1 + cv *sync.Cond // C1 + done chan struct{} +} + +func NewWatcher_moby27782() *Watcher_moby27782 { + w := &Watcher_moby27782{ + Events: make(chan Event_moby27782), + done: make(chan struct{}), + } + w.cv = sync.NewCond(&w.mu) + // deadlocks: x > 0 + go w.readEvents() // G3 + return w +} + +func (w *Watcher_moby27782) readEvents() { + defer close(w.Events) + for { + if w.isClosed() { + return + } + event := newEvent(Write_moby27782) // MODIFY event + if !event.ignoreLinux(w) { + runtime.Gosched() + select { + case w.Events <- event: + case <-w.done: + return + } + } + } +} + +func (w *Watcher_moby27782) isClosed() bool { + select { + case <-w.done: + return true + default: + return false + } +} + +func (w *Watcher_moby27782) Close() { + if w.isClosed() { + return + } + close(w.done) +} + +func (w *Watcher_moby27782) Remove() { + w.mu.Lock() + defer w.mu.Unlock() + exists := true + for exists { + w.cv.Wait() + runtime.Gosched() + } +} + +type FileWatcher_moby27782 interface { + Events() <-chan Event_moby27782 + Remove() + Close() +} + +func New_moby27782() FileWatcher_moby27782 { + return NewEventWatcher_moby27782() +} + +func NewEventWatcher_moby27782() FileWatcher_moby27782 { + return &fsNotifyWatcher_moby27782{NewWatcher_moby27782()} +} + +type fsNotifyWatcher_moby27782 struct { + *Watcher_moby27782 +} + +func (w *fsNotifyWatcher_moby27782) Events() <-chan Event_moby27782 { + return w.Watcher_moby27782.Events +} + +func watchFile_moby27782() FileWatcher_moby27782 { + fileWatcher := New_moby27782() + return fileWatcher +} + +type LogWatcher_moby27782 struct { + closeOnce sync.Once + closeNotifier chan struct{} +} + +func (w *LogWatcher_moby27782) Close() { + w.closeOnce.Do(func() { + close(w.closeNotifier) + }) +} + +func (w *LogWatcher_moby27782) WatchClose() <-chan struct{} { + return w.closeNotifier +} + +func NewLogWatcher_moby27782() *LogWatcher_moby27782 { + return &LogWatcher_moby27782{ + closeNotifier: make(chan struct{}), + } +} + +func followLogs_moby27782(logWatcher *LogWatcher_moby27782) { + fileWatcher := watchFile_moby27782() + defer func() { + fileWatcher.Close() + }() + waitRead := func() { + runtime.Gosched() + select { + case <-fileWatcher.Events(): + case <-logWatcher.WatchClose(): + fileWatcher.Remove() + return + } + } + handleDecodeErr := func() { + waitRead() + } + handleDecodeErr() +} + +type Container_moby27782 struct { + LogDriver *JSONFileLogger_moby27782 +} + +func (container *Container_moby27782) InitializeStdio() { + if err := container.startLogging(); err != nil { + container.Reset() + } +} + +func (container *Container_moby27782) startLogging() error { + l := &JSONFileLogger_moby27782{ + readers: make(map[*LogWatcher_moby27782]struct{}), + } + container.LogDriver = l + l.ReadLogs() + return errors.New("Some error") +} + +func (container *Container_moby27782) Reset() { + if container.LogDriver != nil { + container.LogDriver.Close() + } +} + +type JSONFileLogger_moby27782 struct { + readers map[*LogWatcher_moby27782]struct{} +} + +func (l *JSONFileLogger_moby27782) ReadLogs() *LogWatcher_moby27782 { + logWatcher := NewLogWatcher_moby27782() + // deadlocks: x > 0 + go l.readLogs(logWatcher) // G2 + return logWatcher +} + +func (l *JSONFileLogger_moby27782) readLogs(logWatcher *LogWatcher_moby27782) { + l.readers[logWatcher] = struct{}{} + followLogs_moby27782(logWatcher) +} + +func (l *JSONFileLogger_moby27782) Close() { + for r := range l.readers { + r.Close() + delete(l.readers, r) + } +} + +func Moby27782() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 10000; i++ { + go (&Container_moby27782{}).InitializeStdio() // G1 + } +} + +// +// Example deadlock trace: +// +// G1 G2 G3 +// ------------------------------------------------------------------------------- +// InitializeStdio() +// startLogging() +// l.ReadLogs() +// NewLogWatcher() +// go l.readLogs() [G2] l.readLogs() +// container.Reset() . +// LogDriver.Close() . +// r.Close() . +// close(w.closeNotifier) . +// . followLogs(logWatcher) +// . watchFile() +// . New() +// . NewEventWatcher() +// . NewWatcher() +// . . w.readEvents() +// . . event.ignoreLinux() +// . . return false +// . <-logWatcher.WatchClose() . +// . fileWatcher.Remove() . +// . w.cv.Wait() . +// . . w.Events <- event +// --------------------------------G2,G3 leak------------------------------------- +// diff --git a/src/runtime/testdata/testgoroutineleakgc/moby28462.go b/src/runtime/testdata/testgoroutineleakgc/moby28462.go new file mode 100644 index 00000000000000..933d9c78fd853f --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby28462.go @@ -0,0 +1,142 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/28462 + * Buggy version: b184bdabf7a01c4b802304ac64ac133743c484be + * fix commit-id: 89b123473774248fc3a0356dd3ce5b116cc69b29 + * Flaky: 69/100 + * Description: + * There are three goroutines mentioned in the bug report Moby#28405. + * Actually, only two goroutines are needed to trigger this bug. This bug + * is another example where lock and channel are mixed with each other. + * + * Moby#28405 : https://github.com/moby/moby/issues/28405 + */ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Moby28462", Moby28462) +} + +type State_moby28462 struct { + Health *Health_moby28462 +} + +type Container_moby28462 struct { + sync.Mutex + State *State_moby28462 +} + +func (ctr *Container_moby28462) start() { + go ctr.waitExit() +} +func (ctr *Container_moby28462) waitExit() { + +} + +type Store_moby28462 struct { + ctr *Container_moby28462 +} + +func (s *Store_moby28462) Get() *Container_moby28462 { + return s.ctr +} + +type Daemon_moby28462 struct { + containers Store_moby28462 +} + +func (d *Daemon_moby28462) StateChanged() { + c := d.containers.Get() + c.Lock() + d.updateHealthMonitorElseBranch(c) + defer c.Unlock() +} + +func (d *Daemon_moby28462) updateHealthMonitorIfBranch(c *Container_moby28462) { + h := c.State.Health + if stop := h.OpenMonitorChannel(); stop != nil { + go monitor_moby28462(c, stop) + } +} +func (d *Daemon_moby28462) updateHealthMonitorElseBranch(c *Container_moby28462) { + h := c.State.Health + h.CloseMonitorChannel() +} + +type Health_moby28462 struct { + stop chan struct{} +} + +func (s *Health_moby28462) OpenMonitorChannel() chan struct{} { + return s.stop +} + +func (s *Health_moby28462) CloseMonitorChannel() { + if s.stop != nil { + s.stop <- struct{}{} + } +} + +func monitor_moby28462(c *Container_moby28462, stop chan struct{}) { + for { + select { + case <-stop: + return + default: + handleProbeResult_moby28462(c) + } + } +} + +func handleProbeResult_moby28462(c *Container_moby28462) { + runtime.Gosched() + c.Lock() + defer c.Unlock() +} + +func NewDaemonAndContainer_moby28462() (*Daemon_moby28462, *Container_moby28462) { + c := &Container_moby28462{ + State: &State_moby28462{&Health_moby28462{make(chan struct{})}}, + } + d := &Daemon_moby28462{Store_moby28462{c}} + return d, c +} + +/// +/// G1 G2 +/// monitor() +/// handleProbeResult() +/// d.StateChanged() +/// c.Lock() +/// d.updateHealthMonitorElseBranch() +/// h.CloseMonitorChannel() +/// s.stop <- struct{}{} +/// c.Lock() +/// ----------------------G1,G2 deadlock------------------------ +/// + +func Moby28462() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 10000; i++ { + go func() { + d, c := NewDaemonAndContainer_moby28462() + // deadlocks: x > 0 + go monitor_moby28462(c, c.State.Health.OpenMonitorChannel()) // G1 + // deadlocks: x > 0 + go d.StateChanged() // G2 + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby29733.go b/src/runtime/testdata/testgoroutineleakgc/moby29733.go new file mode 100644 index 00000000000000..4348cce5964187 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby29733.go @@ -0,0 +1,74 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Moby29733", Moby29733) +} + +type Plugin_moby29733 struct { + activated bool + activateWait *sync.Cond +} + +type plugins_moby29733 struct { + sync.Mutex + plugins map[int]*Plugin_moby29733 +} + +func (p *Plugin_moby29733) waitActive() { + p.activateWait.L.Lock() + for !p.activated { + p.activateWait.Wait() + } + p.activateWait.L.Unlock() +} + +type extpointHandlers_moby29733 struct { + sync.RWMutex + extpointHandlers map[int]struct{} +} + +func Handle_moby29733(storage plugins_moby29733, handlers extpointHandlers_moby29733) { + handlers.Lock() + for _, p := range storage.plugins { + p.activated = false + } + handlers.Unlock() +} + +func testActive_moby29733(p *Plugin_moby29733) { + done := make(chan struct{}) + go func() { + // deadlocks: x > 0 + p.waitActive() + close(done) + }() + <-done +} + +func Moby29733() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + go func() { + // deadlocks: x > 0 + storage := plugins_moby29733{plugins: make(map[int]*Plugin_moby29733)} + handlers := extpointHandlers_moby29733{extpointHandlers: make(map[int]struct{})} + + p := &Plugin_moby29733{activateWait: sync.NewCond(&sync.Mutex{})} + storage.plugins[0] = p + + testActive_moby29733(p) + Handle_moby29733(storage, handlers) + testActive_moby29733(p) + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby30408.go b/src/runtime/testdata/testgoroutineleakgc/moby30408.go new file mode 100644 index 00000000000000..1c39fcd1b47988 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby30408.go @@ -0,0 +1,62 @@ +package main + +import ( + "errors" + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Moby30408", Moby30408) +} + +type Manifest_moby30408 struct { + Implements []string +} + +type Plugin_moby30408 struct { + activateWait *sync.Cond + activateErr error + Manifest *Manifest_moby30408 +} + +func (p *Plugin_moby30408) waitActive() error { + p.activateWait.L.Lock() + for !p.activated() { + p.activateWait.Wait() + } + p.activateWait.L.Unlock() + return p.activateErr +} + +func (p *Plugin_moby30408) activated() bool { + return p.Manifest != nil +} + +func testActive_moby30408(p *Plugin_moby30408) { + done := make(chan struct{}) + go func() { + // deadlocks: 1 + p.waitActive() + close(done) + }() + <-done +} + +func Moby30408() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + go func() { + // deadlocks: 1 + p := &Plugin_moby30408{activateWait: sync.NewCond(&sync.Mutex{})} + p.activateErr = errors.New("some junk happened") + + testActive_moby30408(p) + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby33781.go b/src/runtime/testdata/testgoroutineleakgc/moby33781.go new file mode 100644 index 00000000000000..7c820b0ebd9d88 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby33781.go @@ -0,0 +1,86 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/33781 + * Buggy version: 33fd3817b0f5ca4b87f0a75c2bd583b4425d392b + * fix commit-id: 67297ba0051d39be544009ba76abea14bc0be8a4 + * Flaky: 25/100 + * Description: + * The goroutine created using anonymous function is blocked at + * sending message to a unbuffered channel. However there exists a + * path in the parent goroutine where the parent function will + * return without draining the channel. + */ + +package main + +import ( + "context" + "os" + "runtime/pprof" + "time" +) + +func init() { + register("Moby33781", Moby33781) +} + +func monitor_moby33781(stop chan bool) { + probeInterval := time.Millisecond + probeTimeout := time.Millisecond + for { + select { + case <-stop: + return + case <-time.After(probeInterval): + results := make(chan bool) + ctx, cancelProbe := context.WithTimeout(context.Background(), probeTimeout) + go func() { // G3 + // deadlocks: x > 0 + results <- true + close(results) + }() + select { + case <-stop: + // results should be drained here + cancelProbe() + return + case <-results: + cancelProbe() + case <-ctx.Done(): + cancelProbe() + <-results + } + } + } +} + +/// +/// G1 G2 G3 +/// monitor() +/// <-time.After() +/// stop <- +/// <-stop +/// return +/// cancelProbe() +/// return +/// result<- +///----------------G3 leak------------------ +/// + +func Moby33781() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + for i := 0; i < 100; i++ { + go func(i int) { + stop := make(chan bool) + go monitor_moby33781(stop) // G1 + go func() { // G2 + time.Sleep(time.Duration(i) * time.Millisecond) + stop <- true + }() + }(i) + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby36114.go b/src/runtime/testdata/testgoroutineleakgc/moby36114.go new file mode 100644 index 00000000000000..c3155b9135969b --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby36114.go @@ -0,0 +1,54 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/36114 + * Buggy version: 6d4d3c52ae7c3f910bfc7552a2a673a8338e5b9f + * fix commit-id: a44fcd3d27c06aaa60d8d1cbce169f0d982e74b1 + * Flaky: 100/100 + * Description: + * This is a double lock bug. The the lock for the + * struct svm has already been locked when calling + * svm.hotRemoveVHDsAtStart() + */ +package main + +import ( + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Moby36114", Moby36114) +} + +type serviceVM_moby36114 struct { + sync.Mutex +} + +func (svm *serviceVM_moby36114) hotAddVHDsAtStart() { + svm.Lock() + defer svm.Unlock() + svm.hotRemoveVHDsAtStart() +} + +func (svm *serviceVM_moby36114) hotRemoveVHDsAtStart() { + svm.Lock() + defer svm.Unlock() +} + +func Moby36114() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 100; i++ { + go func() { + s := &serviceVM_moby36114{} + // deadlocks: x > 0 + go s.hotAddVHDsAtStart() + }() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby4951.go b/src/runtime/testdata/testgoroutineleakgc/moby4951.go new file mode 100644 index 00000000000000..8d0abbca1359b0 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby4951.go @@ -0,0 +1,104 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/4951 + * Buggy version: 81f148be566ab2b17810ad4be61a5d8beac8330f + * fix commit-id: 2ffef1b7eb618162673c6ffabccb9ca57c7dfce3 + * Flaky: 100/100 + * Description: + * The root cause and patch is clearly explained in the commit + * description. The global lock is devices.Lock(), and the device + * lock is baseInfo.lock.Lock(). It is very likely that this bug + * can be reproduced. + */ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Moby4951", Moby4951) +} + +type DeviceSet_moby4951 struct { + sync.Mutex + infos map[string]*DevInfo_moby4951 + nrDeletedDevices int +} + +func (devices *DeviceSet_moby4951) DeleteDevice(hash string) { + devices.Lock() + defer devices.Unlock() + + info := devices.lookupDevice(hash) + + info.lock.Lock() + runtime.Gosched() + defer info.lock.Unlock() + + devices.deleteDevice(info) +} + +func (devices *DeviceSet_moby4951) lookupDevice(hash string) *DevInfo_moby4951 { + existing, ok := devices.infos[hash] + if !ok { + return nil + } + return existing +} + +func (devices *DeviceSet_moby4951) deleteDevice(info *DevInfo_moby4951) { + devices.removeDeviceAndWait(info.Name()) +} + +func (devices *DeviceSet_moby4951) removeDeviceAndWait(devname string) { + /// remove devices by devname + devices.Unlock() + time.Sleep(300 * time.Nanosecond) + devices.Lock() +} + +type DevInfo_moby4951 struct { + lock sync.Mutex + name string +} + +func (info *DevInfo_moby4951) Name() string { + return info.name +} + +func NewDeviceSet_moby4951() *DeviceSet_moby4951 { + devices := &DeviceSet_moby4951{ + infos: make(map[string]*DevInfo_moby4951), + } + info1 := &DevInfo_moby4951{ + name: "info1", + } + info2 := &DevInfo_moby4951{ + name: "info2", + } + devices.infos[info1.name] = info1 + devices.infos[info2.name] = info2 + return devices +} + +func Moby4951() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + go func() { + ds := NewDeviceSet_moby4951() + /// Delete devices by the same info + // deadlocks: x > 0 + go ds.DeleteDevice("info1") + // deadlocks: x > 0 + go ds.DeleteDevice("info1") + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/moby7559.go b/src/runtime/testdata/testgoroutineleakgc/moby7559.go new file mode 100644 index 00000000000000..8b7ba9cae564bf --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/moby7559.go @@ -0,0 +1,51 @@ +/* + * Project: moby + * Issue or PR : https://github.com/moby/moby/pull/7559 + * Buggy version: 64579f51fcb439c36377c0068ccc9a007b368b5a + * fix commit-id: 6cbb8e070d6c3a66bf48fbe5cbf689557eee23db + * Flaky: 100/100 + */ +package main + +import ( + "net" + "os" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Moby7559", Moby7559) +} + +type UDPProxy_moby7559 struct { + connTrackLock sync.Mutex +} + +func (proxy *UDPProxy_moby7559) Run() { + for i := 0; i < 2; i++ { + proxy.connTrackLock.Lock() + _, err := net.DialUDP("udp", nil, nil) + if err != nil { + /// Missing unlock here + continue + } + if i == 0 { + break + } + } + proxy.connTrackLock.Unlock() +} + +func Moby7559() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 20; i++ { + go (&UDPProxy_moby7559{}).Run() + } +} diff --git a/src/runtime/testdata/testgoroutineleakgc/serving2137.go b/src/runtime/testdata/testgoroutineleakgc/serving2137.go new file mode 100644 index 00000000000000..45ebb3c6e64519 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/serving2137.go @@ -0,0 +1,146 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" + "time" +) + +func init() { + register("Serving2137", Serving2137) +} + +type token_serving2137 struct{} + +type request_serving2137 struct { + lock *sync.Mutex + accepted chan bool +} + +type Breaker_serving2137 struct { + pendingRequests chan token_serving2137 + activeRequests chan token_serving2137 +} + +func (b *Breaker_serving2137) Maybe(thunk func()) bool { + var t token_serving2137 + select { + default: + // Pending request queue is full. Report failure. + return false + case b.pendingRequests <- t: + // Pending request has capacity. + // Wait for capacity in the active queue. + b.activeRequests <- t + // Defer releasing capacity in the active and pending request queue. + defer func() { + <-b.activeRequests + runtime.Gosched() + <-b.pendingRequests + }() + // Do the thing. + thunk() + // Report success + return true + } +} + +func (b *Breaker_serving2137) concurrentRequest() request_serving2137 { + r := request_serving2137{lock: &sync.Mutex{}, accepted: make(chan bool, 1)} + r.lock.Lock() + var start sync.WaitGroup + start.Add(1) + go func() { // G2, G3 + // deadlocks: x > 0 + start.Done() + runtime.Gosched() + ok := b.Maybe(func() { + // Will block on locked mutex. + r.lock.Lock() + runtime.Gosched() + r.lock.Unlock() + }) + r.accepted <- ok + }() + start.Wait() // Ensure that the go func has had a chance to execute. + return r +} + +// Perform n requests against the breaker, returning mutexes for each +// request which succeeded, and a slice of bools for all requests. +func (b *Breaker_serving2137) concurrentRequests(n int) []request_serving2137 { + requests := make([]request_serving2137, n) + for i := range requests { + requests[i] = b.concurrentRequest() + } + return requests +} + +func NewBreaker_serving2137(queueDepth, maxConcurrency int32) *Breaker_serving2137 { + return &Breaker_serving2137{ + pendingRequests: make(chan token_serving2137, queueDepth+maxConcurrency), + activeRequests: make(chan token_serving2137, maxConcurrency), + } +} + +func unlock_serving2137(req request_serving2137) { + req.lock.Unlock() + runtime.Gosched() + // Verify that function has completed + ok := <-req.accepted + runtime.Gosched() + // Requeue for next usage + req.accepted <- ok +} + +func unlockAll_serving2137(requests []request_serving2137) { + for _, lc := range requests { + unlock_serving2137(lc) + } +} + +func Serving2137() { + prof := pprof.Lookup("goroutineleak") + defer func() { + time.Sleep(100 * time.Millisecond) + prof.WriteTo(os.Stdout, 2) + }() + + for i := 0; i < 1000; i++ { + go func() { + // deadlocks: x > 0 + b := NewBreaker_serving2137(1, 1) + + locks := b.concurrentRequests(2) // G1 + unlockAll_serving2137(locks) + }() + } +} + +// +// Example deadlock trace: +// G1 G2 G3 +// ------------------------------------------------------------------------------- +// b.concurrentRequests(2) +// b.concurrentRequest() +// r.lock.Lock() +// start.Done() +// start.Wait() +// b.concurrentRequest() +// r.lock.Lock() +// start.Done() +// start.Wait() +// unlockAll(locks) +// unlock(lc) +// req.lock.Unlock() +// ok := <-req.accepted +// b.Maybe() +// b.activeRequests <- t +// thunk() +// r.lock.Lock() +// b.Maybe() +// b.activeRequests <- t +// ----------------------------G1,G2,G3 deadlock----------------------------- +// diff --git a/src/runtime/testdata/testgoroutineleakgc/simple.go b/src/runtime/testdata/testgoroutineleakgc/simple.go new file mode 100644 index 00000000000000..0412b9cbdca5b5 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/simple.go @@ -0,0 +1,207 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" +) + +// This is a set of micro-tests with obvious goroutine leaks that +// ensures goroutine leak detection works. +// +// Tests in this file are not flaky iff. run with GOMAXPROCS=1. +// The main goroutine forcefully yields via `runtime.Gosched()` before +// running the profiler. This moves them to the back of the run queue, +// allowing the leaky goroutines to be scheduled beforehand and get stuck. + +func init() { + register("NilRecv", NilRecv) + register("NilSend", NilSend) + register("SelectNoCases", SelectNoCases) + register("ChanRecv", ChanRecv) + register("ChanSend", ChanSend) + register("Select", Select) + register("WaitGroup", WaitGroup) + register("MutexStack", MutexStack) + register("MutexHeap", MutexHeap) + register("RWMutexRLock", RWMutexRLock) + register("RWMutexLock", RWMutexLock) + register("Cond", Cond) + register("Mixed", Mixed) + register("NoLeakGlobal", NoLeakGlobal) +} + +func NilRecv() { + prof := pprof.Lookup("goroutineleak") + go func() { + var c chan int + <-c + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func NilSend() { + prof := pprof.Lookup("goroutineleak") + go func() { + var c chan int + c <- 0 + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func ChanRecv() { + prof := pprof.Lookup("goroutineleak") + go func() { + <-make(chan int) + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func SelectNoCases() { + prof := pprof.Lookup("goroutineleak") + go func() { + select {} + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func ChanSend() { + prof := pprof.Lookup("goroutineleak") + go func() { + make(chan int) <- 0 + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func Select() { + prof := pprof.Lookup("goroutineleak") + go func() { + select { + case make(chan int) <- 0: + case <-make(chan int): + } + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func WaitGroup() { + prof := pprof.Lookup("goroutineleak") + go func() { + var wg sync.WaitGroup + wg.Add(1) + wg.Wait() + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func MutexStack() { + prof := pprof.Lookup("goroutineleak") + for i := 0; i < 1000; i++ { + go func() { + var mu sync.Mutex + mu.Lock() + mu.Lock() + panic("should not be reached") + }() + } + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func MutexHeap() { + prof := pprof.Lookup("goroutineleak") + for i := 0; i < 1000; i++ { + go func() { + mu := &sync.Mutex{} + go func() { + mu.Lock() + mu.Lock() + panic("should not be reached") + }() + }() + } + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func RWMutexRLock() { + prof := pprof.Lookup("goroutineleak") + go func() { + mu := &sync.RWMutex{} + mu.Lock() + mu.RLock() + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func RWMutexLock() { + prof := pprof.Lookup("goroutineleak") + go func() { + mu := &sync.RWMutex{} + mu.Lock() + mu.Lock() + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func Cond() { + prof := pprof.Lookup("goroutineleak") + go func() { + cond := sync.NewCond(&sync.Mutex{}) + cond.L.Lock() + cond.Wait() + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +func Mixed() { + prof := pprof.Lookup("goroutineleak") + go func() { + ch := make(chan int) + wg := sync.WaitGroup{} + wg.Add(1) + go func() { + ch <- 0 + wg.Done() + panic("should not be reached") + }() + wg.Wait() + <-ch + panic("should not be reached") + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +var ch = make(chan int) + +// No leak should be reported by this test +func NoLeakGlobal() { + prof := pprof.Lookup("goroutineleak") + go func() { + <-ch + }() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} diff --git a/src/runtime/testdata/testgoroutineleakgc/stresstests.go b/src/runtime/testdata/testgoroutineleakgc/stresstests.go new file mode 100644 index 00000000000000..397963b26120e1 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/stresstests.go @@ -0,0 +1,44 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" +) + +const spawnGCMaxDepth = 5 + +func init() { + register("Spawn", SpawnGC) +} + +func spawnGC(i int) { + prof := pprof.Lookup("goroutineleak") + if i == 0 { + return + } + wg := &sync.WaitGroup{} + wg.Add(i + 1) + go func() { + wg.Done() + // deadlocks: x > 0 + <-make(chan int) + }() + for j := 0; j < i; j++ { + go func() { + wg.Done() + spawnGC(i - 1) + }() + } + wg.Wait() + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) +} + +// SpawnGC spawns a tree of goroutine leaks and calls the goroutine leak profiler +// for each node in the tree. It is supposed to stress the goroutine leak profiler +// under a heavily concurrent workload. +func SpawnGC() { + spawnGC(spawnGCMaxDepth) +} diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go new file mode 100644 index 00000000000000..89ac2c42d5a104 --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing4829.go @@ -0,0 +1,85 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Syncthing4829", Syncthing4829) +} + +type Address_syncthing4829 int + +type Mapping_syncthing4829 struct { + mut sync.RWMutex + + extAddresses map[string]Address_syncthing4829 +} + +func (m *Mapping_syncthing4829) clearAddresses() { + m.mut.Lock() // First locking + var removed []Address_syncthing4829 + for id, addr := range m.extAddresses { + removed = append(removed, addr) + delete(m.extAddresses, id) + } + if len(removed) > 0 { + m.notify(nil, removed) + } + m.mut.Unlock() +} + +func (m *Mapping_syncthing4829) notify(added, remove []Address_syncthing4829) { + m.mut.RLock() + m.mut.RUnlock() +} + +type Service_syncthing4829 struct { + mut sync.RWMutex + + mappings []*Mapping_syncthing4829 +} + +func (s *Service_syncthing4829) NewMapping() *Mapping_syncthing4829 { + mapping := &Mapping_syncthing4829{ + extAddresses: make(map[string]Address_syncthing4829), + } + s.mut.Lock() + s.mappings = append(s.mappings, mapping) + s.mut.Unlock() + return mapping +} + +func (s *Service_syncthing4829) RemoveMapping(mapping *Mapping_syncthing4829) { + s.mut.Lock() + defer s.mut.Unlock() + for _, existing := range s.mappings { + if existing == mapping { + mapping.clearAddresses() + } + } +} + +func NewService_syncthing4829() *Service_syncthing4829 { + return &Service_syncthing4829{} +} + +func Syncthing4829() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + + go func() { + // deadlocks: 1 + natSvc := NewService_syncthing4829() + m := natSvc.NewMapping() + m.extAddresses["test"] = 0 + + natSvc.RemoveMapping(m) + }() +} diff --git a/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go new file mode 100644 index 00000000000000..25f9cc420837bf --- /dev/null +++ b/src/runtime/testdata/testgoroutineleakgc/syncthing5795.go @@ -0,0 +1,125 @@ +package main + +import ( + "os" + "runtime" + "runtime/pprof" + "sync" +) + +func init() { + register("Syncthing5795", Syncthing5795) +} + +type message_syncthing5795 interface{} + +type ClusterConfig_syncthing5795 struct{} + +type Model_syncthing5795 interface { + ClusterConfig(message_syncthing5795) +} + +type TestModel_syncthing5795 struct { + ccFn func() +} + +func (t *TestModel_syncthing5795) ClusterConfig(msg message_syncthing5795) { + if t.ccFn != nil { + t.ccFn() + } +} + +func newTestModel_syncthing5795() *TestModel_syncthing5795 { + return &TestModel_syncthing5795{} +} + +type Connection_syncthing5795 interface { + Start() + Close() +} + +type rawConnection_syncthing5795 struct { + receiver Model_syncthing5795 + + inbox chan message_syncthing5795 + dispatcherLoopStopped chan struct{} + closed chan struct{} + closeOnce sync.Once +} + +func (c *rawConnection_syncthing5795) Start() { + go c.readerLoop() + go func() { + // deadlocks: 1 + c.dispatcherLoop() + }() +} + +func (c *rawConnection_syncthing5795) readerLoop() { + for { + select { + case <-c.closed: + return + default: + } + } +} + +func (c *rawConnection_syncthing5795) dispatcherLoop() { + defer close(c.dispatcherLoopStopped) + var msg message_syncthing5795 + for { + select { + case msg = <-c.inbox: + case <-c.closed: + return + } + switch msg := msg.(type) { + case *ClusterConfig_syncthing5795: + c.receiver.ClusterConfig(msg) + default: + return + } + } +} + +func (c *rawConnection_syncthing5795) internalClose() { + c.closeOnce.Do(func() { + close(c.closed) + <-c.dispatcherLoopStopped + }) +} + +func (c *rawConnection_syncthing5795) Close() { + c.internalClose() +} + +func NewConnection_syncthing5795(receiver Model_syncthing5795) Connection_syncthing5795 { + return &rawConnection_syncthing5795{ + dispatcherLoopStopped: make(chan struct{}), + closed: make(chan struct{}), + inbox: make(chan message_syncthing5795), + receiver: receiver, + } +} + +func Syncthing5795() { + prof := pprof.Lookup("goroutineleak") + defer func() { + runtime.Gosched() + prof.WriteTo(os.Stdout, 2) + }() + go func() { + // deadlocks: 1 + m := newTestModel_syncthing5795() + c := NewConnection_syncthing5795(m).(*rawConnection_syncthing5795) + m.ccFn = func() { + c.Close() + } + + c.Start() + c.inbox <- &ClusterConfig_syncthing5795{} + + <-c.dispatcherLoopStopped + }() +} diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 00c0f08e5593c8..9e70821891494e 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1206,6 +1206,7 @@ var gStatusStrings = [...]string{ _Gwaiting: "waiting", _Gdead: "dead", _Gcopystack: "copystack", + _Gleaked: "leaked", _Gpreempted: "preempted", } @@ -1226,7 +1227,7 @@ func goroutineheader(gp *g) { } // Override. - if gpstatus == _Gwaiting && gp.waitreason != waitReasonZero { + if (gpstatus == _Gwaiting || gpstatus == _Gleaked) && gp.waitreason != waitReasonZero { status = gp.waitreason.String() } @@ -1245,6 +1246,9 @@ func goroutineheader(gp *g) { } } print(" [", status) + if gpstatus == _Gleaked { + print(" (leaked)") + } if isScan { print(" (scan)") } diff --git a/src/runtime/tracestatus.go b/src/runtime/tracestatus.go index 03ec81fc0262a1..8b5eafd170f488 100644 --- a/src/runtime/tracestatus.go +++ b/src/runtime/tracestatus.go @@ -122,7 +122,7 @@ func goStatusToTraceGoStatus(status uint32, wr waitReason) tracev2.GoStatus { tgs = tracev2.GoRunning case _Gsyscall: tgs = tracev2.GoSyscall - case _Gwaiting, _Gpreempted: + case _Gwaiting, _Gpreempted, _Gleaked: // There are a number of cases where a G might end up in // _Gwaiting but it's actually running in a non-preemptive // state but needs to present itself as preempted to the