Skip to content

Commit 7242d7e

Browse files
committed
runtime/pprof: add debug=26257 goroutine profile with labels and reduced STW
This adds a new goroutine profile mode (debug=26257) that emits one entry per goroutine in a format nearly identical to debug=2 that additionally includes any pprof labels in the header, following the [state] segment, e.g: goroutine 123 [select, 2 minutes]{"svc": "users"}: The implementation of this mode is however significantly different from debug=2, and instead based on the same underlying mechanism as debug=1. Unlike the collection of debug=2 profiles, which walks all stacks while the world is stopped, the implementation of this mode uses the concurrent collection integrated with the scheduler that backs debug=1. As a result, this new mode reduces the duration of the stop-the-world phase, particularly when profiling processes with many goroutines, compared to debug=2, and demonstrated by the included benchmark: │ debug=2 │ debug=26257 │ max_latency_ns │ max_latency_ns vs base goroutines=100x3-14 1013.17k ± 47% 84.06k ± 27% -91.70% (p=0.002 n=6) goroutines=100x10-14 769.23k ± 7% 80.29k ± 22% -89.56% (p=0.002 n=6) goroutines=100x50-14 2172.4k ± 9% 181.8k ± 46% -91.63% (p=0.002 n=6) goroutines=1000x3-14 7133.9k ± 3% 195.7k ± 42% -97.26% (p=0.002 n=6) goroutines=1000x10-14 11787.6k ± 48% 494.4k ± 77% -95.81% (p=0.002 n=6) goroutines=1000x50-14 20234.0k ± 87% 174.8k ± 137% -99.14% (p=0.002 n=6) goroutines=10000x3-14 68611.0k ± 49% 168.5k ± 2768% -99.75% (p=0.002 n=6) goroutines=10000x10-14 60.261M ± 95% 3.460M ± 166% -94.26% (p=0.002 n=6) goroutines=10000x50-14 284.144M ± 40% 4.672M ± 89% -98.36% (p=0.002 n=6) goroutines=25000x3-14 171.290M ± 48% 4.287M ± 394% -97.50% (p=0.002 n=6) goroutines=25000x10-14 150.827M ± 92% 6.424M ± 158% -95.74% (p=0.002 n=6) goroutines=25000x50-14 708.238M ± 34% 2.249M ± 410% -99.68% (p=0.002 n=6) geomean 25.08M 624.2k -97.51% This concurrent collection approach and its relaxed consistency compared to keeping the world stopped doesm mean the behavior of this new mode is not exactly identical to debug=2. Additionally, currently this mode always elides argument values when printing stacks, whereas debug=2 includes them most of the time. These behavior differences mean that despite the performance benefits, the new implementation is not used for the existing debug=2 mode, but rather only for the new mode making it an opt-in alternative.
1 parent 8f7bb0b commit 7242d7e

File tree

4 files changed

+538
-3
lines changed

4 files changed

+538
-3
lines changed

src/internal/profilerecord/profilerecord.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,11 @@ type BlockProfileRecord struct {
2626
Cycles int64
2727
Stack []uintptr
2828
}
29+
30+
type GoroutineInfo struct {
31+
ID uint64
32+
State string
33+
CreatorID uint64
34+
CreationPC uintptr
35+
WaitSince int64 // approx time when the g became blocked, in nanoseconds
36+
}

src/runtime/mprof.go

Lines changed: 84 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,19 +1320,59 @@ func goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.P
13201320
labels = nil
13211321
}
13221322

1323-
return goroutineProfileWithLabelsConcurrent(p, labels)
1323+
return goroutineProfileWithLabelsConcurrent(p, labels, nil)
13241324
}
13251325

13261326
var goroutineProfile = struct {
13271327
sema uint32
13281328
active bool
13291329
offset atomic.Int64
13301330
records []profilerecord.StackRecord
1331-
labels []unsafe.Pointer
1331+
// labels, if non-nil, should have the same length as records.
1332+
labels []unsafe.Pointer
1333+
// infos, if non-nil, should have the same length as records.
1334+
infos []profilerecord.GoroutineInfo
13321335
}{
13331336
sema: 1,
13341337
}
13351338

1339+
// goroutineStatusString converts goroutine status to human-readable string
1340+
// Uses the same logic as goroutineheader() in traceback.go
1341+
func goroutineStatusString(gp1 *g) string {
1342+
gpstatus := readgstatus(gp1)
1343+
gpstatus &^= _Gscan // drop the scan bit
1344+
1345+
// Basic string status
1346+
var status string
1347+
switch gpstatus {
1348+
case _Gidle:
1349+
status = "idle"
1350+
case _Grunnable:
1351+
status = "runnable"
1352+
case _Grunning:
1353+
status = "running"
1354+
case _Gsyscall:
1355+
status = "syscall"
1356+
case _Gwaiting:
1357+
status = "waiting"
1358+
case _Gdead:
1359+
status = "dead"
1360+
case _Gcopystack:
1361+
status = "copystack"
1362+
case _Gpreempted:
1363+
status = "preempted"
1364+
default:
1365+
status = "unknown"
1366+
}
1367+
1368+
// Override with wait reason if available (same logic as goroutineheader)
1369+
if gpstatus == _Gwaiting && gp1.waitreason != waitReasonZero {
1370+
status = gp1.waitreason.String()
1371+
}
1372+
1373+
return status
1374+
}
1375+
13361376
// goroutineProfileState indicates the status of a goroutine's stack for the
13371377
// current in-progress goroutine profile. Goroutines' stacks are initially
13381378
// "Absent" from the profile, and end up "Satisfied" by the time the profile is
@@ -1366,7 +1406,18 @@ func (p *goroutineProfileStateHolder) CompareAndSwap(old, new goroutineProfileSt
13661406
return (*atomic.Uint32)(p).CompareAndSwap(uint32(old), uint32(new))
13671407
}
13681408

1369-
func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
1409+
// goroutineProfileWithLabelsConcurrent collects the stacks of all user
1410+
// goroutines into the passed slice, provided it has sufficeient length to do
1411+
// so, returning the number collected and ok=true. If the passed slice does
1412+
// not have sufficient length, it returns the required length and ok=false
1413+
// instead.
1414+
//
1415+
// Additional information about each goroutine can optionally be collected into
1416+
// the labels and/or infos slices if they are non-nil. Their length must match
1417+
// len(p) if non-nil.
1418+
func goroutineProfileWithLabelsConcurrent(
1419+
p []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo,
1420+
) (n int, ok bool) {
13701421
if len(p) == 0 {
13711422
// An empty slice is obviously too small. Return a rough
13721423
// allocation estimate without bothering to STW. As long as
@@ -1411,6 +1462,17 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
14111462
if labels != nil {
14121463
labels[0] = ourg.labels
14131464
}
1465+
1466+
// If extended goroutine info collection is enabled, gather the additional
1467+
// fields as well.
1468+
if len(infos) > 0 {
1469+
infos[0].ID = ourg.goid
1470+
infos[0].CreatorID = ourg.parentGoid
1471+
infos[0].CreationPC = ourg.gopc
1472+
infos[0].State = goroutineStatusString(ourg)
1473+
infos[0].WaitSince = ourg.waitsince
1474+
}
1475+
14141476
ourg.goroutineProfiled.Store(goroutineProfileSatisfied)
14151477
goroutineProfile.offset.Store(1)
14161478

@@ -1422,6 +1484,8 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
14221484
goroutineProfile.active = true
14231485
goroutineProfile.records = p
14241486
goroutineProfile.labels = labels
1487+
goroutineProfile.infos = infos
1488+
14251489
// The finalizer goroutine needs special handling because it can vary over
14261490
// time between being a user goroutine (eligible for this profile) and a
14271491
// system goroutine (to be excluded). Pick one before restarting the world.
@@ -1453,6 +1517,7 @@ func goroutineProfileWithLabelsConcurrent(p []profilerecord.StackRecord, labels
14531517
goroutineProfile.active = false
14541518
goroutineProfile.records = nil
14551519
goroutineProfile.labels = nil
1520+
goroutineProfile.infos = nil
14561521
startTheWorld(stw)
14571522

14581523
// Restore the invariant that every goroutine struct in allgs has its
@@ -1575,9 +1640,20 @@ func doRecordGoroutineProfile(gp1 *g, pcbuf []uintptr) {
15751640
// to avoid schedule delays.
15761641
systemstack(func() { saveg(^uintptr(0), ^uintptr(0), gp1, &goroutineProfile.records[offset], pcbuf) })
15771642

1643+
// If label collection is enabled, collect the labels.
15781644
if goroutineProfile.labels != nil {
15791645
goroutineProfile.labels[offset] = gp1.labels
15801646
}
1647+
1648+
// If extended goroutine info collection is enabled and there is sufficient
1649+
// capacity to do so, gather the additional goroutine fields as well.
1650+
if goroutineProfile.infos != nil && offset < len(goroutineProfile.infos) {
1651+
goroutineProfile.infos[offset].ID = gp1.goid
1652+
goroutineProfile.infos[offset].CreatorID = gp1.parentGoid
1653+
goroutineProfile.infos[offset].CreationPC = gp1.gopc
1654+
goroutineProfile.infos[offset].State = goroutineStatusString(gp1)
1655+
goroutineProfile.infos[offset].WaitSince = gp1.waitsince
1656+
}
15811657
}
15821658

15831659
func goroutineProfileWithLabelsSync(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) {
@@ -1733,3 +1809,8 @@ func Stack(buf []byte, all bool) int {
17331809
}
17341810
return n
17351811
}
1812+
1813+
//go:linkname pprof_goroutineStacksWithLabels
1814+
func pprof_goroutineStacksWithLabels(stacks []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo) (n int, ok bool) {
1815+
return goroutineProfileWithLabelsConcurrent(stacks, labels, infos)
1816+
}

src/runtime/pprof/pprof.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,6 +749,9 @@ func countGoroutine() int {
749749

750750
// writeGoroutine writes the current runtime GoroutineProfile to w.
751751
func writeGoroutine(w io.Writer, debug int) error {
752+
if debug == 26257 {
753+
return writeGoroutineStacksWithLabels(w)
754+
}
752755
if debug >= 2 {
753756
return writeGoroutineStacks(w)
754757
}
@@ -776,6 +779,107 @@ func writeGoroutineStacks(w io.Writer) error {
776779
return err
777780
}
778781

782+
// writeGoroutineStacksWithLabels writes individual goroutine stack traces like
783+
// writeGoroutineStacks, but uses the concurrent, reduced-stop time approach of
784+
// goroutineProfileWithLabelsConcurrent. It includes the ID, state, and labels
785+
// for each goroutine, although as these are captured after the world is resumed
786+
// they are not guaranteed to be entirely consistent as of a single point in
787+
// time.
788+
func writeGoroutineStacksWithLabels(w io.Writer) error {
789+
n, ok := pprof_goroutineStacksWithLabels(nil, nil, nil)
790+
791+
for {
792+
// Allocate slices for individual goroutine data
793+
stacks := make([]profilerecord.StackRecord, n+10)
794+
labels := make([]unsafe.Pointer, n+10)
795+
infos := make([]profilerecord.GoroutineInfo, n+10)
796+
797+
n, ok = pprof_goroutineStacksWithLabels(stacks, labels, infos)
798+
if ok {
799+
return printGoroutineStacksWithLabels(w, stacks[:n], labels[:n], infos[:n])
800+
}
801+
// Profile grew; try again with larger slices
802+
}
803+
}
804+
805+
// printGoroutineStacksWithLabels formats goroutine records in a format similar
806+
// to runtime.Stack, but also includes labels as well.
807+
func printGoroutineStacksWithLabels(
808+
w io.Writer, records []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo,
809+
) error {
810+
for i, r := range records {
811+
goroutineID := infos[i].ID
812+
state := "unknown"
813+
if i < len(infos) && infos[i].State != "" {
814+
state = infos[i].State
815+
}
816+
817+
// Calculate blocked time (same logic as goroutineheader in traceback.go)
818+
blockedStr := ""
819+
if i < len(infos) && infos[i].WaitSince != 0 {
820+
// Check if this is a waiting or syscall state where blocked time applies.
821+
if strings.Contains(state, "waiting") || strings.Contains(state, "syscall") {
822+
waitfor := (runtime_nanotime() - infos[i].WaitSince) / 60e9 // convert to minutes
823+
if waitfor >= 1 {
824+
blockedStr = fmt.Sprintf(", %d minutes", waitfor)
825+
}
826+
}
827+
}
828+
829+
// Get label information using the same format as debug=1.
830+
var labelStr string
831+
if i < len(labels) && labels[i] != nil {
832+
labelMap := (*labelMap)(labels[i])
833+
if labelMap != nil && len(labelMap.list) > 0 {
834+
labelStr = labelMap.String()
835+
}
836+
}
837+
838+
fmt.Fprintf(w, "goroutine %d [%s%s]%s:\n", goroutineID, state, blockedStr, labelStr)
839+
840+
// Print stack trace with runtime filtering (same logic as debug=2).
841+
show := false
842+
frames := runtime.CallersFrames(r.Stack)
843+
for {
844+
frame, more := frames.Next()
845+
name := frame.Function
846+
// Hide runtime.goexit, runtime.main as well as any runtime prior to the
847+
// first non-runtime function.
848+
if name != "runtime.goexit" && name != "runtime.main" && (show || !(strings.HasPrefix(name, "runtime.") || strings.HasPrefix(name, "internal/runtime/"))) {
849+
show = true
850+
if name == "" {
851+
name = "unknown"
852+
}
853+
fmt.Fprintf(w, "%s(...)\n", name)
854+
fmt.Fprintf(w, "\t%s:%d +0x%x\n", frame.File, frame.Line, frame.PC-frame.Entry)
855+
}
856+
if !more {
857+
break
858+
}
859+
}
860+
861+
// Print "created by" information if available (skip main goroutine)
862+
if i < len(infos) && goroutineID != 1 && infos[i].CreationPC != 0 {
863+
if f := runtime.FuncForPC(infos[i].CreationPC - 1); f != nil {
864+
name := f.Name()
865+
file, line := f.FileLine(infos[i].CreationPC - 1)
866+
if infos[i].CreatorID != 0 {
867+
fmt.Fprintf(w, "created by %s in goroutine %d\n", name, infos[i].CreatorID)
868+
} else {
869+
fmt.Fprintf(w, "created by %s\n", name)
870+
}
871+
fmt.Fprintf(w, "\t%s:%d +0x%x\n", file, line, infos[i].CreationPC)
872+
}
873+
}
874+
875+
if i < len(records)-1 {
876+
fmt.Fprintln(w)
877+
}
878+
}
879+
880+
return nil
881+
}
882+
779883
func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profilerecord.StackRecord, []unsafe.Pointer) (int, bool)) error {
780884
// Find out how many records there are (fetch(nil)),
781885
// allocate that many records, and get the data.
@@ -977,9 +1081,15 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
9771081
//go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels
9781082
func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool)
9791083

1084+
//go:linkname pprof_goroutineStacksWithLabels runtime.pprof_goroutineStacksWithLabels
1085+
func pprof_goroutineStacksWithLabels(stacks []profilerecord.StackRecord, labels []unsafe.Pointer, infos []profilerecord.GoroutineInfo) (n int, ok bool)
1086+
9801087
//go:linkname pprof_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
9811088
func pprof_cyclesPerSecond() int64
9821089

1090+
//go:linkname runtime_nanotime runtime.nanotime
1091+
func runtime_nanotime() int64
1092+
9831093
//go:linkname pprof_memProfileInternal runtime.pprof_memProfileInternal
9841094
func pprof_memProfileInternal(p []profilerecord.MemProfileRecord, inuseZero bool) (n int, ok bool)
9851095

0 commit comments

Comments
 (0)