From 49e1dc95c82ecab549654b0319d7179ac6f555fc Mon Sep 17 00:00:00 2001 From: shuakami Date: Mon, 6 Oct 2025 17:30:00 +0800 Subject: [PATCH] runtime: limit total frames traversed during stack walking Stack walking in tracebackPCs now limits the total number of frames traversed, including wrapper frames, to prevent excessive CPU time when unwinding stacks with very deep context chains. Previously, tracebackPCs only limited the number of output frames (64) but would traverse all wrapper frames without limit. With extremely deep context chains (e.g., 10 million layers), this could cause stack walking to take seconds, as wrapper frames for methods like context.(*valueCtx).Deadline are traversed but not counted toward the output limit. This change adds a maxTotalFrames constant (1024) to cap the total number of physical frames walked. This is high enough for normal stack traces while preventing multi-second delays in CPU profiling and other stack walking scenarios. Fixes #75583 --- src/runtime/testdata/testprog/deepcontext.go | 58 ++++++++++++++++++++ src/runtime/traceback.go | 11 ++++ src/runtime/traceback_test.go | 13 +++++ 3 files changed, 82 insertions(+) create mode 100644 src/runtime/testdata/testprog/deepcontext.go diff --git a/src/runtime/testdata/testprog/deepcontext.go b/src/runtime/testdata/testprog/deepcontext.go new file mode 100644 index 00000000000000..95bd44c1006e37 --- /dev/null +++ b/src/runtime/testdata/testprog/deepcontext.go @@ -0,0 +1,58 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "context" + "fmt" + "runtime" + "time" +) + +func init() { + register("DeepContextChain", DeepContextChain) +} + +// DeepContextChain tests that traceback completes in reasonable time +// even with very deep context chains (issue #75583). +func DeepContextChain() { + // Create a context chain deep enough to trigger the frame limit. + // We use 2000 layers to ensure we exceed the limit (1024) while + // keeping the test fast. + const depth = 2000 + ctx := context.Background() + for i := 0; i < depth; i++ { + ctx = context.WithValue(ctx, i, i) + } + + // Start profiling to trigger stack walking with deep context + start := time.Now() + + // Get a stack trace multiple times + // This simulates what happens during CPU profiling + for i := 0; i < 10; i++ { + var pcs [64]uintptr + n := runtime.Callers(0, pcs[:]) + if n == 0 { + fmt.Println("FAIL: got 0 callers") + return + } + + // Call Deadline to ensure the deep context chain is traversed + // during any potential stack walking + _, _ = ctx.Deadline() + } + + elapsed := time.Since(start) + + // The test should complete quickly. If it takes more than 1 second, + // something is wrong (likely walking too many frames). + if elapsed > time.Second { + fmt.Printf("FAIL: test took %v, expected < 1s\n", elapsed) + return + } + + fmt.Println("OK") +} diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 8882c306edb736..a9edfabcc5b34d 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -620,7 +620,18 @@ func (u *unwinder) cgoCallers(pcBuf []uintptr) int { func tracebackPCs(u *unwinder, skip int, pcBuf []uintptr) int { var cgoBuf [32]uintptr n := 0 + // maxTotalFrames limits the total number of frames we'll walk through, + // including wrapper frames. This prevents excessive CPU time when + // unwinding stacks with very deep context chains (issue #75583). + // The limit of 1024 is high enough for normal stacks while preventing + // multi-second delays from walking millions of wrapper frames. + const maxTotalFrames = 1024 + totalFrames := 0 for ; n < len(pcBuf) && u.valid(); u.next() { + totalFrames++ + if totalFrames >= maxTotalFrames { + break + } f := u.frame.fn cgoN := u.cgoCallers(cgoBuf[:]) diff --git a/src/runtime/traceback_test.go b/src/runtime/traceback_test.go index 1dac91311ca9a3..ec979c2296e3d1 100644 --- a/src/runtime/traceback_test.go +++ b/src/runtime/traceback_test.go @@ -882,3 +882,16 @@ func TestSetCgoTracebackNoCgo(t *testing.T) { t.Fatalf("want %s, got %s\n", want, output) } } + +// TestDeepContextChainTraceback tests that tracebackPCs completes in reasonable +// time even with very deep context chains (issue #75583). +// This test creates a context chain deep enough to trigger the frame limit +// and verifies that tracing completes quickly. +func TestDeepContextChainTraceback(t *testing.T) { + output := runTestProg(t, "testprog", "DeepContextChain") + if !strings.Contains(output, "OK") { + t.Fatalf("expected OK, got:\n%s", output) + } + // The test should complete in reasonable time. If it hangs or takes + // multiple seconds, the frame limit is not working. +}