Skip to content

Commit 8555ffb

Browse files
committed
runtime/pprof: add debug=26257 goroutine profile with labels and reduced STW
This adds a new goroutine profile mode (debug=26257) that emits one entry per goroutine in a format nearly identical to debug=2 that additionally includes any pprof labels in the header, following the [state] segment, e.g: goroutine 123 [select, 2 minutes]{"svc": "users"}: The implementation of this mode is however significantly different from debug=2, and instead based on the same underlying mechanism as debug=1. Unlike the collection of debug=2 profiles, which walks all stacks while the world is stopped, the implementation of this mode uses the concurrent collection integrated with the scheduler that backs debug=1. As a result, this new mode reduces the duration of the stop-the-world phase, particularly when profiling processes with many goroutines, compared to debug=2, and demonstrated by the included benchmark: │ debug=2 │ debug=26257 │ max_latency_ns │ max_latency_ns vs base goroutines=100x3-14 1013.17k ± 47% 84.06k ± 27% -91.70% (p=0.002 n=6) goroutines=100x10-14 769.23k ± 7% 80.29k ± 22% -89.56% (p=0.002 n=6) goroutines=100x50-14 2172.4k ± 9% 181.8k ± 46% -91.63% (p=0.002 n=6) goroutines=1000x3-14 7133.9k ± 3% 195.7k ± 42% -97.26% (p=0.002 n=6) goroutines=1000x10-14 11787.6k ± 48% 494.4k ± 77% -95.81% (p=0.002 n=6) goroutines=1000x50-14 20234.0k ± 87% 174.8k ± 137% -99.14% (p=0.002 n=6) goroutines=10000x3-14 68611.0k ± 49% 168.5k ± 2768% -99.75% (p=0.002 n=6) goroutines=10000x10-14 60.261M ± 95% 3.460M ± 166% -94.26% (p=0.002 n=6) goroutines=10000x50-14 284.144M ± 40% 4.672M ± 89% -98.36% (p=0.002 n=6) goroutines=25000x3-14 171.290M ± 48% 4.287M ± 394% -97.50% (p=0.002 n=6) goroutines=25000x10-14 150.827M ± 92% 6.424M ± 158% -95.74% (p=0.002 n=6) goroutines=25000x50-14 708.238M ± 34% 2.249M ± 410% -99.68% (p=0.002 n=6) geomean 25.08M 624.2k -97.51% This concurrent collection approach and its relaxed consistency compared to keeping the world stopped doesm mean the behavior of this new mode is not exactly identical to debug=2. Additionally, currently this mode always elides argument values when printing stacks, whereas debug=2 includes them most of the time. These behavior differences mean that despite the performance benefits, the new implementation is not used for the existing debug=2 mode, but rather only for the new mode making it an opt-in alternative.
1 parent 8f7bb0b commit 8555ffb

File tree

4 files changed

+517
-2
lines changed

4 files changed

+517
-2
lines changed

src/internal/profilerecord/profilerecord.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,11 @@ type BlockProfileRecord struct {
2626
Cycles int64
2727
Stack []uintptr
2828
}
29+
30+
type GoroutineInfo struct {
31+
ID uint64
32+
State string
33+
CreatorID uint64
34+
CreationPC uintptr
35+
WaitSince int64 // approx time when the g became blocked, in nanoseconds
36+
}

src/runtime/mprof.go

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,7 +1320,7 @@ func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.P
13201320
labels = nil
13211321
}
13221322

1323-
return goroutineProfileWithLabelsConcurrent(p, labels)
1323+
return goroutineProfileWithLabelsConcurrent(p, labels, nil)
13241324
}
13251325

13261326
var goroutineProfile = struct {
@@ -1329,10 +1329,48 @@ var goroutineProfile = struct {
13291329
offset atomic.Int64
13301330
records []profilerecord.StackRecord
13311331
labels []unsafe.Pointer
1332+
infos []profilerecord.GoroutineInfo
13321333
}{
13331334
sema: 1,
13341335
}
13351336

1337+
// goroutineStatusString converts goroutine status to human-readable string
1338+
// Uses the same logic as goroutineheader() in traceback.go
1339+
func goroutineStatusString(gp1 *g) string {
1340+
gpstatus := readgstatus(gp1)
1341+
gpstatus &^= _Gscan // drop the scan bit
1342+
1343+
// Basic string status
1344+
var status string
1345+
switch gpstatus {
1346+
case _Gidle:
1347+
status = "idle"
1348+
case _Grunnable:
1349+
status = "runnable"
1350+
case _Grunning:
1351+
status = "running"
1352+
case _Gsyscall:
1353+
status = "syscall"
1354+
case _Gwaiting:
1355+
status = "waiting"
1356+
case _Gdead:
1357+
status = "dead"
1358+
case _Gcopystack:
1359+
status = "copystack"
1360+
case _Gpreempted:
1361+
status = "preempted"
1362+
default:
1363+
status = "unknown"
1364+
}
1365+
1366+
// Override with wait reason if available (same logic as goroutineheader)
1367+
if gpstatus == _Gwaiting && gp1.waitreason != waitReasonZero {
1368+
status = gp1.waitreason.String()
1369+
}
1370+
1371+
return status
1372+
}
1373+
13361374
// goroutineProfileState indicates the status of a goroutine's stack for the
13371375
// current in-progress goroutine profile. Goroutines' stacks are initially
13381376
// "Absent" from the profile, and end up "Satisfied" by the time the profile is
@@ -1366,7 +1404,7 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt
13661404
return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new))
13671405
}
13681406

1369-
func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1407+
func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo) (n int, ok bool) {
13701408
if len(p) == 0 {
13711409
// An empty slice is obviously too small. Return a rough
13721410
// allocation estimate without bothering to STW. As long as
@@ -1411,6 +1449,16 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
14111449
if labels != nil {
14121450
labels[0] = ourg.labels
14131451
}
1452+
1453+
// If individual id collection is enabled, also store ID.
1454+
if len(infos) > 0 {
1455+
infos[0].ID = ourg.goid
1456+
infos[0].State = goroutineStatusString(ourg)
1457+
infos[0].CreatorID = ourg.parentGoid
1458+
infos[0].CreationPC = ourg.gopc
1459+
infos[0].WaitSince = ourg.waitsince
1460+
}
1461+
14141462
ourg.goroutineProfiled.Store(goroutineProfileSatisfied)
14151463
goroutineProfile.offset.Store(1)
14161464

@@ -1422,6 +1470,8 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
14221470
goroutineProfile.active = true
14231471
goroutineProfile.records = p
14241472
goroutineProfile.labels = labels
1473+
goroutineProfile.infos = infos
1474+
14251475
// The finalizer goroutine needs special handling because it can vary over
14261476
// time between being a user goroutine (eligible for this profile) and a
14271477
// system goroutine (to be excluded). Pick one before restarting the world.
@@ -1453,6 +1503,7 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
14531503
goroutineProfile.active = false
14541504
goroutineProfile.records = nil
14551505
goroutineProfile.labels = nil
1506+
goroutineProfile.infos = nil
14561507
startTheWorld(stw)
14571508

14581509
// Restore the invariant that every goroutine struct in allgs has its
@@ -1578,6 +1629,15 @@ func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) {
15781629
if goroutineProfile.labels != nil {
15791630
goroutineProfile.labels[offset] = gp1.labels
15801631
}
1632+
1633+
// If goroutine info collection is enabled, store ID, state, and creation info.
1634+
if goroutineProfile.infos != nil && offset < len(goroutineProfile.infos) {
1635+
goroutineProfile.infos[offset].ID = gp1.goid
1636+
goroutineProfile.infos[offset].State = goroutineStatusString(gp1)
1637+
goroutineProfile.infos[offset].CreatorID = gp1.parentGoid
1638+
goroutineProfile.infos[offset].CreationPC = gp1.gopc
1639+
goroutineProfile.infos[offset].WaitSince = gp1.waitsince
1640+
}
15811641
}
15821642

15831643
func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
@@ -1733,3 +1793,8 @@ func Stack(buf []byte, all bool) int {
17331793
}
17341794
return n
17351795
}
1796+
1797+
//go:linkname pprof_goroutineStacksWithLabels
1798+
func pprof_goroutineStacksWithLabels(stacks []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo) (n int, ok bool) {
1799+
return goroutineProfileWithLabelsConcurrent(stacks, labels, infos)
1800+
}

src/runtime/pprof/pprof.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,9 @@ func countGoroutine() int {
749749

750750
// writeGoroutine writes the current runtime GoroutineProfile to w.
751751
func writeGoroutine(w io.Writer, debug int) error {
752+
if debug == 26257 {
753+
return writeGoroutineStacksWithLabels(w)
754+
}
752755
if debug >= 2 {
753756
return writeGoroutineStacks(w)
754757
}
@@ -776,6 +779,107 @@ func writeGoroutineStacks(w io.Writer) error {
776779
return err
777780
}
778781

782+
// writeGoroutineStacksWithLabels writes individual goroutine stack traces like
783+
// writeGoroutineStacks, but uses the concurrent, reduced-stop time approach of
784+
// goroutineProfileWithLabelsConcurrent. It includes the ID, state, and labels
785+
// for each goroutine, although as these are captured after the world is resumed
786+
// they are not guaranteed to be entirely consistent as of a single point in
787+
// time.
788+
func writeGoroutineStacksWithLabels(w io.Writer) error {
789+
n, ok := pprof_goroutineStacksWithLabels(nil, nil, nil)
790+
791+
for {
792+
// Allocate slices for individual goroutine data
793+
stacks := make([]profilerecord.StackRecord, n+10)
794+
labels := make([]unsafe.Pointer, n+10)
795+
infos := make([]profilerecord.GoroutineInfo, n+10)
796+
797+
n, ok = pprof_goroutineStacksWithLabels(stacks, labels, infos)
798+
if ok {
799+
return printGoroutineStacksWithLabels(w, stacks[:n], labels[:n], infos[:n])
800+
}
801+
// Profile grew; try again with larger slices
802+
}
803+
}
804+
805+
// printGoroutineStacksWithLabels formats goroutine records in a format similar
806+
// to runtime.Stack, but also includes labels as well.
807+
func printGoroutineStacksWithLabels(
808+
w io.Writer, records []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo,
809+
) error {
810+
for i, r := range records {
811+
goroutineID := infos[i].ID
812+
state := "unknown"
813+
if i < len(infos) && infos[i].State != "" {
814+
state = infos[i].State
815+
}
816+
817+
// Calculate blocked time (same logic as goroutineheader in traceback.go)
818+
blockedStr := ""
819+
if i < len(infos) && infos[i].WaitSince != 0 {
820+
// Check if this is a waiting or syscall state where blocked time applies.
821+
if strings.Contains(state, "waiting") || strings.Contains(state, "syscall") {
822+
waitfor := (runtime_nanotime() - infos[i].WaitSince) / 60e9 // convert to minutes
823+
if waitfor >= 1 {
824+
blockedStr = fmt.Sprintf(", %d minutes", waitfor)
825+
}
826+
}
827+
}
828+
829+
// Get label information using the same format as debug=1.
830+
var labelStr string
831+
if i < len(labels) && labels[i] != nil {
832+
labelMap := (*labelMap)(labels[i])
833+
if labelMap != nil && len(labelMap.list) > 0 {
834+
labelStr = labelMap.String()
835+
}
836+
}
837+
838+
fmt.Fprintf(w, "goroutine %d [%s%s]%s:\n", goroutineID, state, blockedStr, labelStr)
839+
840+
// Print stack trace with runtime filtering (same logic as debug=2).
841+
show := false
842+
frames := runtime.CallersFrames(r.Stack)
843+
for {
844+
frame, more := frames.Next()
845+
name := frame.Function
846+
// Hide runtime.goexit, runtime.main as well as any runtime prior to the
847+
// first non-runtime function.
848+
if name != "runtime.goexit" && name != "runtime.main" && (show || !(strings.HasPrefix(name, "runtime.") || strings.HasPrefix(name, "internal/runtime/"))) {
849+
show = true
850+
if name == "" {
851+
name = "unknown"
852+
}
853+
fmt.Fprintf(w, "%s(...)\n", name)
854+
fmt.Fprintf(w, "\t%s:%d +0x%x\n", frame.File, frame.Line, frame.PC-frame.Entry)
855+
}
856+
if !more {
857+
break
858+
}
859+
}
860+
861+
// Print "created by" information if available (skip main goroutine)
862+
if i < len(infos) && goroutineID != 1 && infos[i].CreationPC != 0 {
863+
if f := runtime.FuncForPC(infos[i].CreationPC - 1); f != nil {
864+
name := f.Name()
865+
file, line := f.FileLine(infos[i].CreationPC - 1)
866+
if infos[i].CreatorID != 0 {
867+
fmt.Fprintf(w, "created by %s in goroutine %d\n", name, infos[i].CreatorID)
868+
} else {
869+
fmt.Fprintf(w, "created by %s\n", name)
870+
}
871+
fmt.Fprintf(w, "\t%s:%d +0x%x\n", file, line, infos[i].CreationPC)
872+
}
873+
}
874+
875+
if i < len(records)-1 {
876+
fmt.Fprintln(w)
877+
}
878+
}
879+
880+
return nil
881+
}
882+
779883
func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profilerecord.StackRecord, []unsafe.Pointer) (int, bool)) error {
780884
// Find out how many records there are (fetch(nil)),
781885
// allocate that many records, and get the data.
@@ -977,9 +1081,15 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
9771081
//go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels
9781082
func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
9791083

1084+
//go:linkname pprof_goroutineStacksWithLabels runtime.pprof_goroutineStacksWithLabels
1085+
func pprof_goroutineStacksWithLabels(stacks []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo) (n int, ok bool)
1086+
9801087
//go:linkname pprof_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
9811088
func pprof_cyclesPerSecond() int64
9821089

1090+
//go:linkname runtime_nanotime runtime.nanotime
1091+
func runtime_nanotime() int64
1092+
9831093
//go:linkname pprof_memProfileInternal runtime.pprof_memProfileInternal
9841094
func pprof_memProfileInternal(p []profilerecord.MemProfileRecord, inuseZero bool) (n int, ok bool)
9851095

0 commit comments

Comments
 (0)