Skip to content

Commit f5e66f0

Browse files
committed
Collect goroutine leaks via profiling.
Remove debug flag.
1 parent 3e40e33 commit f5e66f0

File tree

8 files changed

+213
-46
lines changed

8 files changed

+213
-46
lines changed

src/net/http/pprof/pprof.go

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -351,25 +351,27 @@ func collectProfile(p *pprof.Profile) (*profile.Profile, error) {
351351
}
352352

353353
var profileSupportsDelta = map[handler]bool{
354-
"allocs": true,
355-
"block": true,
356-
"goroutine": true,
357-
"heap": true,
358-
"mutex": true,
359-
"threadcreate": true,
354+
"allocs": true,
355+
"block": true,
356+
"goroutine": true,
357+
"goroutineleak": true,
358+
"heap": true,
359+
"mutex": true,
360+
"threadcreate": true,
360361
}
361362

362363
var profileDescriptions = map[string]string{
363-
"allocs": "A sampling of all past memory allocations",
364-
"block": "Stack traces that led to blocking on synchronization primitives",
365-
"cmdline": "The command line invocation of the current program",
366-
"goroutine": "Stack traces of all current goroutines. Use debug=2 as a query parameter to export in the same format as an unrecovered panic.",
367-
"heap": "A sampling of memory allocations of live objects. You can specify the gc GET parameter to run GC before taking the heap sample.",
368-
"mutex": "Stack traces of holders of contended mutexes",
369-
"profile": "CPU profile. You can specify the duration in the seconds GET parameter. After you get the profile file, use the go tool pprof command to investigate the profile.",
370-
"symbol": "Maps given program counters to function names. Counters can be specified in a GET raw query or POST body, multiple counters are separated by '+'.",
371-
"threadcreate": "Stack traces that led to the creation of new OS threads",
372-
"trace": "A trace of execution of the current program. You can specify the duration in the seconds GET parameter. After you get the trace file, use the go tool trace command to investigate the trace.",
364+
"allocs": "A sampling of all past memory allocations",
365+
"block": "Stack traces that led to blocking on synchronization primitives",
366+
"cmdline": "The command line invocation of the current program",
367+
"goroutine": "Stack traces of all current goroutines. Use debug=2 as a query parameter to export in the same format as an unrecovered panic.",
368+
"goroutineleak": "Stack traces of all leaked goroutines. Use debug=2 as a query parameter to export in the same format as an unrecovered panic.",
369+
"heap": "A sampling of memory allocations of live objects. You can specify the gc GET parameter to run GC before taking the heap sample.",
370+
"mutex": "Stack traces of holders of contended mutexes",
371+
"profile": "CPU profile. You can specify the duration in the seconds GET parameter. After you get the profile file, use the go tool pprof command to investigate the profile.",
372+
"symbol": "Maps given program counters to function names. Counters can be specified in a GET raw query or POST body, multiple counters are separated by '+'.",
373+
"threadcreate": "Stack traces that led to the creation of new OS threads",
374+
"trace": "A trace of execution of the current program. You can specify the duration in the seconds GET parameter. After you get the trace file, use the go tool trace command to investigate the trace.",
373375
}
374376

375377
type profileEntry struct {

src/runtime/mgc.go

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,25 @@ func GC() {
569569
releasem(mp)
570570
}
571571

572+
//go:linkname runtime_goroutineLeakGC runtime/pprof.runtime_goroutineLeakGC
573+
func runtime_goroutineLeakGC() {
574+
goroutineLeakGC()
575+
}
576+
577+
// goroutineLeakGC runs a GC cycle that performs goroutine leak detection.
578+
func goroutineLeakGC() {
579+
// Set the pending flag to true, instructing the next GC cycle to
580+
// perform goroutine leak detection.
581+
work.goroutineLeakFinder.pending.Store(true)
582+
583+
// Spin GC cycles until the pending flag is unset.
584+
// This ensures that goroutineLeakGC waits for a GC cycle that
585+
// actually performs goroutine leak detection.
586+
for work.goroutineLeakFinder.pending.Load() {
587+
GC()
588+
}
589+
}
590+
572591
// gcWaitOnMark blocks until GC finishes the Nth mark phase. If GC has
573592
// already completed this mark phase, it returns immediately.
574593
func gcWaitOnMark(n uint32) {
@@ -714,9 +733,9 @@ func gcStart(trigger gcTrigger) {
714733
mode = gcForceMode
715734
} else if debug.gcstoptheworld == 2 {
716735
mode = gcForceBlockMode
717-
} else if work.goroutineLeakFinder.pending.Load() || debug.gcgoroutineleaks > 0 {
718-
// If goroutine leak detection has been enabled (via GODEBUG=gcgoroutineleaks=1),
719-
// or via profiling, stop the world during the marking phase.
736+
} else if work.goroutineLeakFinder.pending.Load() {
737+
// If goroutine leak detection has been enabled via profiling,
738+
// stop the world during the marking phase.
720739
mode = gcForceMode
721740
}
722741

@@ -795,8 +814,7 @@ func gcStart(trigger gcTrigger) {
795814
schedEnableUser(false)
796815
}
797816

798-
if work.goroutineLeakFinder.pending.Load() ||
799-
debug.gcgoroutineleaks > 0 {
817+
if work.goroutineLeakFinder.pending.Load() {
800818
work.goroutineLeakFinder.enabled = true
801819
work.goroutineLeakFinder.pending.Store(false)
802820
gcUntrackSyncObjects()
@@ -1199,16 +1217,6 @@ func findGoleaks() bool {
11991217
for i := work.nMaybeRunnableStackRoots; i < work.nStackRoots; i++ {
12001218
gp := work.stackRoots[i]
12011219
casgstatus(gp, _Gwaiting, _Gleaked)
1202-
fn := findfunc(gp.startpc)
1203-
if fn.valid() {
1204-
print("goroutine leak! goroutine ", gp.goid, ": ", funcname(fn), " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes ",
1205-
"[", waitReasonStrings[gp.waitreason], "]\n")
1206-
} else {
1207-
print("goroutine leak! goroutine ", gp.goid, ": !unnamed goroutine!", " Stack size: ", gp.stack.hi-gp.stack.lo, " bytes ",
1208-
"[", waitReasonStrings[gp.waitreason], "]\n")
1209-
}
1210-
traceback(gp.sched.pc, gp.sched.sp, gp.sched.lr, gp)
1211-
println()
12121220
}
12131221
// Put the remaining roots as ready for marking and drain them.
12141222
work.markrootJobs.Add(int32(work.nStackRoots - work.nMaybeRunnableStackRoots))

src/runtime/mprof.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,20 @@ func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.P
12591259
return goroutineProfileWithLabelsConcurrent(p, labels)
12601260
}
12611261

1262+
//go:linkname pprof_goroutineLeakProfileWithLabels
1263+
func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1264+
return goroutineLeakProfileWithLabelsConcurrent(p, labels)
1265+
}
1266+
1267+
// labels may be nil. If labels is non-nil, it must have the same length as p.
1268+
func goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1269+
if labels != nil && len(labels) != len(p) {
1270+
labels = nil
1271+
}
1272+
1273+
return goroutineLeakProfileWithLabelsConcurrent(p, labels)
1274+
}
1275+
12621276
var goroutineProfile = struct {
12631277
sema uint32
12641278
active bool
@@ -1302,6 +1316,89 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt
13021316
return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new))
13031317
}
13041318

1319+
func goroutineLeakProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1320+
if len(p) == 0 {
1321+
// An empty slice is obviously too small. Return a rough
1322+
// allocation estimate without bothering to STW. As long as
1323+
// this is close, then we'll only need to STW once (on the next
1324+
// call).
1325+
return int(gleakcount()), false
1326+
}
1327+
1328+
// Use the same semaphore as goroutineProfileWithLabelsConcurrent,
1329+
// because ultimately we still use goroutine profiles.
1330+
semacquire(&goroutineProfile.sema)
1331+
1332+
// Unlike in goroutineProfileWithLabelsConcurrent, we don't save the current
1333+
// goroutine stack, because it is obviously not a leaked goroutine.
1334+
1335+
pcbuf := makeProfStack() // see saveg() for explanation
1336+
stw := stopTheWorld(stwGoroutineProfile)
1337+
// Using gleakcount while the world is stopped should give us a consistent view
1338+
// of the number of leaked goroutines.
1339+
n = int(gleakcount())
1340+
1341+
if n > len(p) {
1342+
// There's not enough space in p to store the whole profile, so (per the
1343+
// contract of runtime.GoroutineProfile) we're not allowed to write to p
1344+
// at all and must return n, false.
1345+
startTheWorld(stw)
1346+
semrelease(&goroutineProfile.sema)
1347+
return n, false
1348+
}
1349+
1350+
// Prepare for all other goroutines to enter the profile. Every goroutine struct in the allgs list
1351+
// has its goroutineProfiled field cleared. Any goroutine created from this point on (while
1352+
// goroutineProfile.active is set) will start with its goroutineProfiled
1353+
// field set to goroutineProfileSatisfied.
1354+
goroutineProfile.active = true
1355+
goroutineProfile.records = p
1356+
goroutineProfile.labels = labels
1357+
startTheWorld(stw)
1358+
1359+
// Visit each leaked goroutine that existed as of the startTheWorld call above.
1360+
forEachGRace(func(gp1 *g) {
1361+
if readgstatus(gp1) == _Gleaked {
1362+
tryRecordGoroutineProfile(gp1, pcbuf, Gosched)
1363+
}
1364+
})
1365+
1366+
stw = stopTheWorld(stwGoroutineProfileCleanup)
1367+
endOffset := goroutineProfile.offset.Swap(0)
1368+
goroutineProfile.active = false
1369+
goroutineProfile.records = nil
1370+
goroutineProfile.labels = nil
1371+
startTheWorld(stw)
1372+
1373+
// Restore the invariant that every goroutine struct in allgs has its
1374+
// goroutineProfiled field cleared.
1375+
forEachGRace(func(gp1 *g) {
1376+
gp1.goroutineProfiled.Store(goroutineProfileAbsent)
1377+
})
1378+
1379+
if raceenabled {
1380+
raceacquire(unsafe.Pointer(&labelSync))
1381+
}
1382+
1383+
if n != int(endOffset) {
1384+
// It's a big surprise that the number of goroutines changed while we
1385+
// were collecting the profile. But probably better to return a
1386+
// truncated profile than to crash the whole process.
1387+
//
1388+
// For instance, needm moves a goroutine out of the _Gdead state and so
1389+
// might be able to change the goroutine count without interacting with
1390+
// the scheduler. For code like that, the race windows are small and the
1391+
// combination of features is uncommon, so it's hard to be (and remain)
1392+
// sure we've caught them all.
1393+
//
1394+
// FIXME(vsaioc): I kept this in because goroutineProfileWithLabelsConcurrent
1395+
// also uses it, but... is this dead code?
1396+
}
1397+
1398+
semrelease(&goroutineProfile.sema)
1399+
return n, true
1400+
}
1401+
13051402
func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
13061403
if len(p) == 0 {
13071404
// An empty slice is obviously too small. Return a rough

src/runtime/pprof/pprof.go

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,13 @@ import (
105105
//
106106
// Each Profile has a unique name. A few profiles are predefined:
107107
//
108-
// goroutine - stack traces of all current goroutines
109-
// heap - a sampling of memory allocations of live objects
110-
// allocs - a sampling of all past memory allocations
111-
// threadcreate - stack traces that led to the creation of new OS threads
112-
// block - stack traces that led to blocking on synchronization primitives
113-
// mutex - stack traces of holders of contended mutexes
108+
// goroutine - stack traces of all current goroutines
109+
// goroutineleak - stack traces of all leaked goroutines
110+
// allocs - a sampling of all past memory allocations
111+
// heap - a sampling of memory allocations of live objects
112+
// threadcreate - stack traces that led to the creation of new OS threads
113+
// block - stack traces that led to blocking on synchronization primitives
114+
// mutex - stack traces of holders of contended mutexes
114115
//
115116
// These predefined profiles maintain themselves and panic on an explicit
116117
// [Profile.Add] or [Profile.Remove] method call.
@@ -169,6 +170,7 @@ import (
169170
// holds a lock for 1s while 5 other goroutines are waiting for the entire
170171
// second to acquire the lock, its unlock call stack will report 5s of
171172
// contention.
173+
172174
type Profile struct {
173175
name string
174176
mu sync.Mutex
@@ -189,6 +191,12 @@ var goroutineProfile = &Profile{
189191
write: writeGoroutine,
190192
}
191193

194+
var goroutineLeakProfile = &Profile{
195+
name: "goroutineleak",
196+
count: countGoroutineLeak,
197+
write: writeGoroutineLeak,
198+
}
199+
192200
var threadcreateProfile = &Profile{
193201
name: "threadcreate",
194202
count: countThreadCreate,
@@ -224,12 +232,13 @@ func lockProfiles() {
224232
if profiles.m == nil {
225233
// Initial built-in profiles.
226234
profiles.m = map[string]*Profile{
227-
"goroutine": goroutineProfile,
228-
"threadcreate": threadcreateProfile,
229-
"heap": heapProfile,
230-
"allocs": allocsProfile,
231-
"block": blockProfile,
232-
"mutex": mutexProfile,
235+
"goroutine": goroutineProfile,
236+
"goroutineleak": goroutineLeakProfile,
237+
"threadcreate": threadcreateProfile,
238+
"heap": heapProfile,
239+
"allocs": allocsProfile,
240+
"block": blockProfile,
241+
"mutex": mutexProfile,
233242
}
234243
}
235244
}
@@ -739,6 +748,11 @@ func countGoroutine() int {
739748
return runtime.NumGoroutine()
740749
}
741750

751+
// countGoroutineLeak returns the number of leaked goroutines.
752+
func countGoroutineLeak() int {
753+
return int(runtime_gleakcount())
754+
}
755+
742756
// writeGoroutine writes the current runtime GoroutineProfile to w.
743757
func writeGoroutine(w io.Writer, debug int) error {
744758
if debug >= 2 {
@@ -747,6 +761,23 @@ func writeGoroutine(w io.Writer, debug int) error {
747761
return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfileWithLabels)
748762
}
749763

764+
// writeGoroutineLeak first invokes a GC cycle that performs goroutine leak detection.
765+
// It then writes the goroutine profile, filtering for leaked goroutines.
766+
func writeGoroutineLeak(w io.Writer, debug int) error {
767+
// Run the GC with leak detection first so that leaked goroutines
768+
// may transition to the leaked state.
769+
runtime_goroutineLeakGC()
770+
771+
// If the debug flag is set sufficiently high, just defer to writing goroutine stacks
772+
// like in a regular goroutine profile. Include non-leaked goroutines, too.
773+
if debug >= 2 {
774+
return writeGoroutineStacks(w)
775+
}
776+
777+
// Otherwise, write the goroutine leak profile.
778+
return writeRuntimeProfile(w, debug, "goroutineleak", pprof_goroutineLeakProfileWithLabels)
779+
}
780+
750781
func writeGoroutineStacks(w io.Writer) error {
751782
// We don't know how big the buffer needs to be to collect
752783
// all the goroutines. Start with 1 MB and try a few times, doubling each time.
@@ -969,6 +1000,9 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
9691000
//go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels
9701001
func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
9711002

1003+
//go:linkname pprof_goroutineLeakProfileWithLabels runtime.pprof_goroutineLeakProfileWithLabels
1004+
func pprof_goroutineLeakProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
1005+
9721006
//go:linkname pprof_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
9731007
func pprof_cyclesPerSecond() int64
9741008

src/runtime/pprof/runtime.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ func runtime_setProfLabel(labels unsafe.Pointer)
2929
// runtime_getProfLabel is defined in runtime/proflabel.go.
3030
func runtime_getProfLabel() unsafe.Pointer
3131

32+
// runtime_gleakcount is defined in runtime/proc.go.
33+
func runtime_gleakcount() int32
34+
35+
// runtime_goroutineLeakGC is defined in runtime/mgc.go.
36+
func runtime_goroutineLeakGC()
37+
3238
// SetGoroutineLabels sets the current goroutine's labels to match ctx.
3339
// A new goroutine inherits the labels of the goroutine that created it.
3440
// This is a lower-level API than [Do], which should be used instead when possible.

src/runtime/proc.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5553,6 +5553,25 @@ func gcount() int32 {
55535553
return n
55545554
}
55555555

5556+
//go:linkname runtime_gleakcount runtime/pprof.runtime_gleakcount
5557+
func runtime_gleakcount() int32 {
5558+
return gleakcount()
5559+
}
5560+
5561+
// gleakcount returns the number of leaked goroutines currently reported by
5562+
// the runtime. Protected by allglock.
5563+
func gleakcount() int32 {
5564+
n := int32(0)
5565+
lock(&allglock)
5566+
for _, g := range allgs {
5567+
if readgstatus(g) == _Gleaked {
5568+
n++
5569+
}
5570+
}
5571+
unlock(&allglock)
5572+
return n
5573+
}
5574+
55565575
func mcount() int32 {
55575576
return int32(sched.mnext - sched.nmfreed)
55585577
}

src/runtime/runtime1.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,6 @@ var debug struct {
316316
dontfreezetheworld int32
317317
efence int32
318318
gccheckmark int32
319-
gcgoroutineleaks int32
320319
gcpacertrace int32
321320
gcshrinkstackoff int32
322321
gcstoptheworld int32
@@ -382,7 +381,6 @@ var dbgvars = []*dbgVar{
382381
{name: "efence", value: &debug.efence},
383382
{name: "gccheckmark", value: &debug.gccheckmark},
384383
{name: "gcpacertrace", value: &debug.gcpacertrace},
385-
{name: "gcgoroutineleaks", value: &debug.gcgoroutineleaks},
386384
{name: "gcshrinkstackoff", value: &debug.gcshrinkstackoff},
387385
{name: "gcstoptheworld", value: &debug.gcstoptheworld},
388386
{name: "gctrace", value: &debug.gctrace},

0 commit comments

Comments
 (0)