Skip to content

Commit abf8ca5

Browse files
committed
runtime/pprof: add debug=26257 goroutine profile with labels and reduced STW
This adds a new goroutine profile mode (debug=26257) that emits one entry per goroutine in a format nearly identical to debug=2 that additionally includes any pprof labels in the header, following the [state] segment, e.g: goroutine 123 [select, 2 minutes]{"svc": "users"}: The implementation of this mode is however significantly different from debug=2, and instead based on the same underlying mechanism as debug=1. Unlike the collection of debug=2 profiles, which walks all stacks while the world is stopped, the implementation of this mode uses the concurrent collection integrated with the scheduler that backs debug=1. As a result, this new mode reduces the duration of the stop-the-world phase, particularly when profiling processes with many goroutines, compared to debug=2, and demonstrated by the included benchmark: │ debug=2 │ debug=26257 │ max_latency_ns │ max_latency_ns vs base goroutines=100x3-14 1013.17k ± 47% 84.06k ± 27% -91.70% (p=0.002 n=6) goroutines=100x10-14 769.23k ± 7% 80.29k ± 22% -89.56% (p=0.002 n=6) goroutines=100x50-14 2172.4k ± 9% 181.8k ± 46% -91.63% (p=0.002 n=6) goroutines=1000x3-14 7133.9k ± 3% 195.7k ± 42% -97.26% (p=0.002 n=6) goroutines=1000x10-14 11787.6k ± 48% 494.4k ± 77% -95.81% (p=0.002 n=6) goroutines=1000x50-14 20234.0k ± 87% 174.8k ± 137% -99.14% (p=0.002 n=6) goroutines=10000x3-14 68611.0k ± 49% 168.5k ± 2768% -99.75% (p=0.002 n=6) goroutines=10000x10-14 60.261M ± 95% 3.460M ± 166% -94.26% (p=0.002 n=6) goroutines=10000x50-14 284.144M ± 40% 4.672M ± 89% -98.36% (p=0.002 n=6) goroutines=25000x3-14 171.290M ± 48% 4.287M ± 394% -97.50% (p=0.002 n=6) goroutines=25000x10-14 150.827M ± 92% 6.424M ± 158% -95.74% (p=0.002 n=6) goroutines=25000x50-14 708.238M ± 34% 2.249M ± 410% -99.68% (p=0.002 n=6) geomean 25.08M 624.2k -97.51% This concurrent collection approach and its relaxed consistency compared to keeping the world stopped doesm mean the behavior of this new mode is not exactly identical to debug=2. Additionally, currently this mode always elides argument values when printing stacks, whereas debug=2 includes them most of the time. These behavior differences mean that despite the performance benefits, the new implementation is not used for the existing debug=2 mode, but rather only for the new mode making it an opt-in alternative.
1 parent 8f7bb0b commit abf8ca5

File tree

4 files changed

+516
-2
lines changed

4 files changed

+516
-2
lines changed

src/internal/profilerecord/profilerecord.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,11 @@ type BlockProfileRecord struct {
2626
Cycles int64
2727
Stack []uintptr
2828
}
29+
30+
type GoroutineInfo struct {
31+
ID uint64
32+
State string
33+
CreatorID uint64
34+
CreationPC uintptr
35+
WaitSince int64 // approx time when the g became blocked, in nanoseconds
36+
}

src/runtime/mprof.go

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,7 +1320,7 @@ func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.P
13201320
labels = nil
13211321
}
13221322

1323-
return goroutineProfileWithLabelsConcurrent(p, labels)
1323+
return goroutineProfileWithLabelsConcurrent(p, labels, nil)
13241324
}
13251325

13261326
var goroutineProfile = struct {
@@ -1329,10 +1329,48 @@ var goroutineProfile = struct {
13291329
offset atomic.Int64
13301330
records []profilerecord.StackRecord
13311331
labels []unsafe.Pointer
1332+
infos []profilerecord.GoroutineInfo
13321333
}{
13331334
sema: 1,
13341335
}
13351336

1337+
// goroutineStatusString converts goroutine status to human-readable string
1338+
// Uses the same logic as goroutineheader() in traceback.go
1339+
func goroutineStatusString(gp1 *g) string {
1340+
gpstatus := readgstatus(gp1)
1341+
gpstatus &^= _Gscan // drop the scan bit
1342+
1343+
// Basic string status
1344+
var status string
1345+
switch gpstatus {
1346+
case _Gidle:
1347+
status = "idle"
1348+
case _Grunnable:
1349+
status = "runnable"
1350+
case _Grunning:
1351+
status = "running"
1352+
case _Gsyscall:
1353+
status = "syscall"
1354+
case _Gwaiting:
1355+
status = "waiting"
1356+
case _Gdead:
1357+
status = "dead"
1358+
case _Gcopystack:
1359+
status = "copystack"
1360+
case _Gpreempted:
1361+
status = "preempted"
1362+
default:
1363+
status = "unknown"
1364+
}
1365+
1366+
// Override with wait reason if available (same logic as goroutineheader)
1367+
if gpstatus == _Gwaiting && gp1.waitreason != waitReasonZero {
1368+
status = gp1.waitreason.String()
1369+
}
1370+
1371+
return status
1372+
}
1373+
13361374
// goroutineProfileState indicates the status of a goroutine's stack for the
13371375
// current in-progress goroutine profile. Goroutines' stacks are initially
13381376
// "Absent" from the profile, and end up "Satisfied" by the time the profile is
@@ -1366,7 +1404,7 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt
13661404
return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new))
13671405
}
13681406

1369-
func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1407+
func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo) (n int, ok bool) {
13701408
if len(p) == 0 {
13711409
// An empty slice is obviously too small. Return a rough
13721410
// allocation estimate without bothering to STW. As long as
@@ -1411,6 +1449,16 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
14111449
if labels != nil {
14121450
labels[0] = ourg.labels
14131451
}
1452+
1453+
// If individual id collection is enabled, also store ID.
1454+
if len(infos) > 0 {
1455+
infos[0].ID = ourg.goid
1456+
infos[0].State = goroutineStatusString(ourg)
1457+
infos[0].CreatorID = ourg.parentGoid
1458+
infos[0].CreationPC = ourg.gopc
1459+
infos[0].WaitSince = ourg.waitsince
1460+
}
1461+
14141462
ourg.goroutineProfiled.Store(goroutineProfileSatisfied)
14151463
goroutineProfile.offset.Store(1)
14161464

@@ -1422,6 +1470,8 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
14221470
goroutineProfile.active = true
14231471
goroutineProfile.records = p
14241472
goroutineProfile.labels = labels
1473+
goroutineProfile.infos = infos
1474+
14251475
// The finalizer goroutine needs special handling because it can vary over
14261476
// time between being a user goroutine (eligible for this profile) and a
14271477
// system goroutine (to be excluded). Pick one before restarting the world.
@@ -1453,6 +1503,7 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
14531503
goroutineProfile.active = false
14541504
goroutineProfile.records = nil
14551505
goroutineProfile.labels = nil
1506+
goroutineProfile.infos = nil
14561507
startTheWorld(stw)
14571508

14581509
// Restore the invariant that every goroutine struct in allgs has its
@@ -1578,6 +1629,15 @@ func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) {
15781629
if goroutineProfile.labels != nil {
15791630
goroutineProfile.labels[offset] = gp1.labels
15801631
}
1632+
1633+
// If goroutine info collection is enabled, store ID, state, and creation info.
1634+
if goroutineProfile.infos != nil && offset < len(goroutineProfile.infos) {
1635+
goroutineProfile.infos[offset].ID = gp1.goid
1636+
goroutineProfile.infos[offset].State = goroutineStatusString(gp1)
1637+
goroutineProfile.infos[offset].CreatorID = gp1.parentGoid
1638+
goroutineProfile.infos[offset].CreationPC = gp1.gopc
1639+
goroutineProfile.infos[offset].WaitSince = gp1.waitsince
1640+
}
15811641
}
15821642

15831643
func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
@@ -1733,3 +1793,8 @@ func Stack(buf []byte, all bool) int {
17331793
}
17341794
return n
17351795
}
1796+
1797+
//go:linkname pprof_goroutineStacksWithLabels
1798+
func pprof_goroutineStacksWithLabels(stacks []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo) (n int, ok bool) {
1799+
return goroutineProfileWithLabelsConcurrent(stacks, labels, infos)
1800+
}

src/runtime/pprof/pprof.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,9 @@ func countGoroutine() int {
749749

750750
// writeGoroutine writes the current runtime GoroutineProfile to w.
751751
func writeGoroutine(w io.Writer, debug int) error {
752+
if debug == 26257 {
753+
return writeGoroutineStacksWithLabels(w)
754+
}
752755
if debug >= 2 {
753756
return writeGoroutineStacks(w)
754757
}
@@ -776,6 +779,106 @@ func writeGoroutineStacks(w io.Writer) error {
776779
return err
777780
}
778781

782+
// writeGoroutineStacksWithLabels writes individual goroutine stack traces like
783+
// writeGoroutineStacks, but uses the concurrent, reduced-stop time approach of
784+
// goroutineProfileWithLabelsConcurrent. It the ID, state, and labels for each
785+
// goroutine, although as these are captured after the world is resumed they are
786+
// not guaranteed to be entirely consistent as of a single point in time.
787+
func writeGoroutineStacksWithLabels(w io.Writer) error {
788+
n, ok := pprof_goroutineStacksWithLabels(nil, nil, nil)
789+
790+
for {
791+
// Allocate slices for individual goroutine data
792+
stacks := make([]profilerecord.StackRecord, n+10)
793+
labels := make([]unsafe.Pointer, n+10)
794+
infos := make([]profilerecord.GoroutineInfo, n+10)
795+
796+
n, ok = pprof_goroutineStacksWithLabels(stacks, labels, infos)
797+
if ok {
798+
return printGoroutineStacksWithLabels(w, stacks[:n], labels[:n], infos[:n])
799+
}
800+
// Profile grew; try again with larger slices
801+
}
802+
}
803+
804+
// printGoroutineStacksWithLabels formats goroutine records in a format similar
805+
// to runtime.Stack, but also includes labels as well.
806+
func printGoroutineStacksWithLabels(
807+
w io.Writer, records []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo,
808+
) error {
809+
for i, r := range records {
810+
goroutineID := infos[i].ID
811+
state := "unknown"
812+
if i < len(infos) && infos[i].State != "" {
813+
state = infos[i].State
814+
}
815+
816+
// Calculate blocked time (same logic as goroutineheader in traceback.go)
817+
blockedStr := ""
818+
if i < len(infos) && infos[i].WaitSince != 0 {
819+
// Check if this is a waiting or syscall state where blocked time applies.
820+
if strings.Contains(state, "waiting") || strings.Contains(state, "syscall") {
821+
waitfor := (runtime_nanotime() - infos[i].WaitSince) / 60e9 // convert to minutes
822+
if waitfor >= 1 {
823+
blockedStr = fmt.Sprintf(", %d minutes", waitfor)
824+
}
825+
}
826+
}
827+
828+
// Get label information using the same format as debug=1.
829+
var labelStr string
830+
if i < len(labels) && labels[i] != nil {
831+
labelMap := (*labelMap)(labels[i])
832+
if labelMap != nil && len(labelMap.list) > 0 {
833+
labelStr = labelMap.String()
834+
}
835+
}
836+
837+
fmt.Fprintf(w, "goroutine %d [%s%s]%s:\n", goroutineID, state, blockedStr, labelStr)
838+
839+
// Print stack trace with runtime filtering (same logic as debug=2).
840+
show := false
841+
frames := runtime.CallersFrames(r.Stack)
842+
for {
843+
frame, more := frames.Next()
844+
name := frame.Function
845+
if name == "" {
846+
show = true
847+
fmt.Fprintf(w, "%s\n", name)
848+
fmt.Fprintf(w, "\t%s:%d +0x%x\n", frame.File, frame.Line, frame.PC-frame.Entry)
849+
} else if name != "runtime.goexit" && name != "runtime.main" && (show || !(strings.HasPrefix(name, "runtime.") || strings.HasPrefix(name, "internal/runtime/"))) {
850+
// Hide runtime.goexit, runtime.main and any runtime functions at the beginning.
851+
show = true
852+
fmt.Fprintf(w, "%s\n", name)
853+
fmt.Fprintf(w, "\t%s:%d +0x%x\n", frame.File, frame.Line, frame.PC-frame.Entry)
854+
}
855+
if !more {
856+
break
857+
}
858+
}
859+
860+
// Print "created by" information if available (skip main goroutine)
861+
if i < len(infos) && goroutineID != 1 && infos[i].CreationPC != 0 {
862+
if f := runtime.FuncForPC(infos[i].CreationPC - 1); f != nil {
863+
name := f.Name()
864+
file, line := f.FileLine(infos[i].CreationPC - 1)
865+
if infos[i].CreatorID != 0 {
866+
fmt.Fprintf(w, "created by %s in goroutine %d\n", name, infos[i].CreatorID)
867+
} else {
868+
fmt.Fprintf(w, "created by %s\n", name)
869+
}
870+
fmt.Fprintf(w, "\t%s:%d +0x%x\n", file, line, infos[i].CreationPC)
871+
}
872+
}
873+
874+
if i < len(records)-1 {
875+
fmt.Fprintln(w)
876+
}
877+
}
878+
879+
return nil
880+
}
881+
779882
func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profilerecord.StackRecord, []unsafe.Pointer) (int, bool)) error {
780883
// Find out how many records there are (fetch(nil)),
781884
// allocate that many records, and get the data.
@@ -977,9 +1080,15 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
9771080
//go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels
9781081
func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
9791082

1083+
//go:linkname pprof_goroutineStacksWithLabels runtime.pprof_goroutineStacksWithLabels
1084+
func pprof_goroutineStacksWithLabels(stacks []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo) (n int, ok bool)
1085+
9801086
//go:linkname pprof_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
9811087
func pprof_cyclesPerSecond() int64
9821088

1089+
//go:linkname runtime_nanotime runtime.nanotime
1090+
func runtime_nanotime() int64
1091+
9831092
//go:linkname pprof_memProfileInternal runtime.pprof_memProfileInternal
9841093
func pprof_memProfileInternal(p []profilerecord.MemProfileRecord, inuseZero bool) (n int, ok bool)
9851094

0 commit comments

Comments
 (0)