Skip to content

Commit 5ff3b2b

Browse files
authored
Unwind past Go runtime.morestack (#184)
In Go, [`runtime.morestack`](https://github.com/golang/go/blob/7b60d06739/src/runtime/asm_amd64.s#L680-L680) is called by goroutines that have exhausted their stack space and need to create more. This can be a significant bottleneck in some programs. As can be seen from the above link, the function in question clears the frame pointer before calling into `newstack`, making it impossible for us to unwind further. Luckily, before doing so, it stashes the necessary registers on the current goroutine. This PR introduces a new unwinding command specifically for unwinding past `runtime.morestack`. It uses the infrastructure already developed for Go Custom Labels to find the current goroutine, then grabs the appropriate values. Currently, this only works in Go 1.25 and does not work for kernel stacks; making it more general will be done in a [later follow-up](parca-dev/parca-agent#3124).
1 parent e425f7f commit 5ff3b2b

File tree

13 files changed

+139
-59
lines changed

13 files changed

+139
-59
lines changed

nativeunwind/elfunwindinfo/elfgopclntab.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ var goFunctionsStopDelta = map[string]*sdtypes.UnwindInfo{
3636
// signal return frame
3737
"runtime.sigreturn": &sdtypes.UnwindInfoSignal,
3838
"runtime.sigreturn__sigaction": &sdtypes.UnwindInfoSignal,
39+
40+
"runtime.morestack": &sdtypes.UnwindInfoGoMorestack,
3941
}
4042

4143
const (

nativeunwind/stackdeltatypes/stackdeltatypes.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@ var UnwindInfoLR = UnwindInfo{
6060
FPOpcode: support.UnwindOpcodeBaseLR,
6161
}
6262

63+
// UnwindInfoGoMorestack contains the description to unwind past the Go
64+
// "runtime.morestack" function.
65+
var UnwindInfoGoMorestack = UnwindInfo{
66+
Opcode: support.UnwindOpcodeCommand,
67+
Param: support.UnwindCommandGoMorestack,
68+
}
69+
6370
// StackDelta defines the start address for the delta interval, along with
6471
// the unwind information.
6572
type StackDelta struct {

support/ebpf/go_support.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#ifndef OPTI_GOROUTINE_H
2+
#define OPTI_GOROUTINE_H
3+
4+
#include "bpfdefs.h"
5+
#include "tsd.h"
6+
#include "types.h"
7+
8+
static EBPF_INLINE void *get_go_m_ptr(struct GoLabelsOffsets *offs, UNUSED UnwindState *state)
9+
{
10+
u64 g_addr = 0;
11+
void *tls_base = NULL;
12+
if (tsd_get_base(&tls_base) < 0) {
13+
DEBUG_PRINT("cl mptr new: failed to get tsd base; can't read m_ptr");
14+
return NULL;
15+
}
16+
DEBUG_PRINT(
17+
"cl mptr new: read tsd_base at 0x%lx, g offset: %d", (unsigned long)tls_base, offs->tls_offset);
18+
19+
if (offs->tls_offset == 0 || tls_base == 0) {
20+
#if defined(__aarch64__)
21+
// On aarch64 for !iscgo programs the g is only stored in r28 register.
22+
g_addr = state->r28;
23+
#elif defined(__x86_64__)
24+
DEBUG_PRINT("cl mptr new: TLS offset for g pointer missing for amd64");
25+
return NULL;
26+
#endif
27+
}
28+
29+
if (g_addr == 0) {
30+
if (bpf_probe_read_user(&g_addr, sizeof(void *), (void *)((s64)tls_base + offs->tls_offset))) {
31+
DEBUG_PRINT("cl mptr new: failed to read g_addr, tls_base(%lx)", (unsigned long)tls_base);
32+
return NULL;
33+
}
34+
}
35+
36+
DEBUG_PRINT(
37+
"cl mptr new: reading m_ptr_addr at 0x%lx + 0x%x", (unsigned long)g_addr, offs->m_offset);
38+
void *m_ptr_addr;
39+
if (bpf_probe_read_user(&m_ptr_addr, sizeof(void *), (void *)(g_addr + offs->m_offset))) {
40+
DEBUG_PRINT("cl: failed m_ptr_addr");
41+
return NULL;
42+
}
43+
DEBUG_PRINT("cl mptr new: returning 0x%lx", (unsigned long)m_ptr_addr);
44+
return m_ptr_addr;
45+
}
46+
47+
#endif

support/ebpf/interpreter_dispatcher.ebpf.c

Lines changed: 7 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// perf event and will call the appropriate tracer for a given process
44

55
#include "bpfdefs.h"
6+
#include "go_support.h"
67
#include "kernel.h"
78
#include "tracemgmt.h"
89
#include "tsd.h"
@@ -151,65 +152,28 @@ get_m_ptr_legacy(struct GoCustomLabelsOffsets *offs, UNUSED UnwindState *state)
151152
void *tls_base = NULL;
152153
res = tsd_get_base(&tls_base);
153154
if (res < 0) {
154-
DEBUG_PRINT("cl: failed to get tsd base; can't read m_ptr");
155+
DEBUG_PRINT("cl mptr legacy: failed to get tsd base; can't read m_ptr");
155156
return NULL;
156157
}
157158

158159
res = bpf_probe_read_user(&g_addr, sizeof(void *), (void *)((u64)tls_base + g_addr_offset));
159160
if (res < 0) {
160-
DEBUG_PRINT("cl: failed to read g_addr, tls_base(%lx)", (unsigned long)tls_base);
161+
DEBUG_PRINT("cl mptr legacy: failed to read g_addr, tls_base(%lx)", (unsigned long)tls_base);
161162
return NULL;
162163
}
163164
#elif defined(__aarch64__)
164165
g_addr = state->r28;
165166
#endif
166167

167-
DEBUG_PRINT("cl: reading m_ptr_addr at 0x%lx + 0x%x", g_addr, offs->m_offset);
168+
DEBUG_PRINT("cl mptr legacy: reading m_ptr_addr at 0x%lx + 0x%x", g_addr, offs->m_offset);
168169
void *m_ptr_addr;
169170
res = bpf_probe_read_user(&m_ptr_addr, sizeof(void *), (void *)(g_addr + offs->m_offset));
170171
if (res < 0) {
171-
DEBUG_PRINT("cl: failed m_ptr_addr");
172+
DEBUG_PRINT("cl mptr legacy: failed m_ptr_addr");
172173
return NULL;
173174
}
174175

175-
return m_ptr_addr;
176-
}
177-
178-
static EBPF_INLINE void *get_m_ptr(struct GoLabelsOffsets *offs, UNUSED UnwindState *state)
179-
{
180-
u64 g_addr = 0;
181-
void *tls_base = NULL;
182-
if (tsd_get_base(&tls_base) < 0) {
183-
DEBUG_PRINT("cl: failed to get tsd base; can't read m_ptr");
184-
return NULL;
185-
}
186-
DEBUG_PRINT(
187-
"cl: read tsd_base at 0x%lx, g offset: %d", (unsigned long)tls_base, offs->tls_offset);
188-
189-
if (offs->tls_offset == 0) {
190-
#if defined(__aarch64__)
191-
// On aarch64 for !iscgo programs the g is only stored in r28 register.
192-
g_addr = state->r28;
193-
#elif defined(__x86_64__)
194-
DEBUG_PRINT("cl: TLS offset for g pointer missing for amd64");
195-
return NULL;
196-
#endif
197-
}
198-
199-
if (g_addr == 0) {
200-
if (bpf_probe_read_user(&g_addr, sizeof(void *), (void *)((s64)tls_base + offs->tls_offset))) {
201-
DEBUG_PRINT("cl: failed to read g_addr, tls_base(%lx)", (unsigned long)tls_base);
202-
return NULL;
203-
}
204-
}
205-
206-
DEBUG_PRINT("cl: reading m_ptr_addr at 0x%lx + 0x%x", (unsigned long)g_addr, offs->m_offset);
207-
void *m_ptr_addr;
208-
if (bpf_probe_read_user(&m_ptr_addr, sizeof(void *), (void *)(g_addr + offs->m_offset))) {
209-
DEBUG_PRINT("cl: failed m_ptr_addr");
210-
return NULL;
211-
}
212-
DEBUG_PRINT("cl: m_ptr_addr 0x%lx", (unsigned long)m_ptr_addr);
176+
DEBUG_PRINT("cl mptr legacy: returning 0x%llx", (u64)m_ptr_addr);
213177
return m_ptr_addr;
214178
}
215179

@@ -244,7 +208,7 @@ static EBPF_INLINE void maybe_add_go_custom_labels(struct pt_regs *ctx, PerCPURe
244208
return;
245209
}
246210

247-
void *m_ptr_addr = get_m_ptr(offsets, &record->state);
211+
void *m_ptr_addr = get_go_m_ptr(offsets, &record->state);
248212
if (!m_ptr_addr) {
249213
return;
250214
}

support/ebpf/native_stack_trace.ebpf.c

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -357,9 +357,10 @@ static EBPF_INLINE u64 unwind_register_address(UnwindState *state, u64 cfa, u8 o
357357
// is marked with UNWIND_COMMAND_STOP which marks entry points (main function,
358358
// thread spawn function, signal handlers, ...).
359359
#if defined(__x86_64__)
360-
static EBPF_INLINE ErrorCode unwind_one_frame(UnwindState *state, bool *stop)
360+
static EBPF_INLINE ErrorCode unwind_one_frame(PerCPURecord *record, bool *stop)
361361
{
362-
*stop = false;
362+
UnwindState *state = &record->state;
363+
*stop = false;
363364

364365
u32 unwindInfo = 0;
365366
u64 rt_regs[18];
@@ -408,6 +409,11 @@ static EBPF_INLINE ErrorCode unwind_one_frame(UnwindState *state, bool *stop)
408409
goto err_native_pc_read;
409410
}
410411
goto frame_ok;
412+
case UNWIND_COMMAND_GO_MORESTACK:
413+
if (!unwinder_unwind_go_morestack(record)) {
414+
goto err_native_pc_read;
415+
}
416+
goto frame_ok;
411417
default: return ERR_UNREACHABLE;
412418
}
413419
} else {
@@ -451,9 +457,10 @@ static EBPF_INLINE ErrorCode unwind_one_frame(UnwindState *state, bool *stop)
451457
return ERR_OK;
452458
}
453459
#elif defined(__aarch64__)
454-
static EBPF_INLINE ErrorCode unwind_one_frame(struct UnwindState *state, bool *stop)
460+
static EBPF_INLINE ErrorCode unwind_one_frame(struct PerCPURecord *record, bool *stop)
455461
{
456-
*stop = false;
462+
UnwindState *state = &record->state;
463+
*stop = false;
457464

458465
u32 unwindInfo = 0;
459466
int addrDiff = 0;
@@ -497,6 +504,11 @@ static EBPF_INLINE ErrorCode unwind_one_frame(struct UnwindState *state, bool *s
497504
goto err_native_pc_read;
498505
}
499506
goto frame_ok;
507+
case UNWIND_COMMAND_GO_MORESTACK:
508+
if (!unwinder_unwind_go_morestack(record)) {
509+
goto err_native_pc_read;
510+
}
511+
goto frame_ok;
500512
default: return ERR_UNREACHABLE;
501513
}
502514
}
@@ -618,7 +630,7 @@ static EBPF_INLINE int unwind_native(struct pt_regs *ctx)
618630

619631
// Unwind the native frame using stack deltas. Stop if no next frame.
620632
bool stop;
621-
error = unwind_one_frame(&record->state, &stop);
633+
error = unwind_one_frame(record, &stop);
622634
if (error || stop) {
623635
break;
624636
}

support/ebpf/stackdeltatypes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#define UNWIND_COMMAND_SIGNAL 3
2727
// Unwind using standard frame pointer
2828
#define UNWIND_COMMAND_FRAME_POINTER 4
29+
// Unwind past the Go runtime.morestack function
30+
#define UNWIND_COMMAND_GO_MORESTACK 5
2931

3032
// If opcode has UNWIND_OPCODEF_DEREF set, the lowest bits of 'param' are used
3133
// as second adder as post-deref operation. This contains the mask for that.

support/ebpf/tracemgmt.h

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "errors.h"
88
#include "extmaps.h"
99
#include "frametypes.h"
10+
#include "go_support.h"
1011
#include "types.h"
1112

1213
#if defined(TESTING_COREDUMP)
@@ -32,17 +33,6 @@
3233

3334
#endif // TESTING_COREDUMP
3435

35-
// increment_metric increments the value of the given metricID by 1
36-
static inline EBPF_INLINE void increment_metric(u32 metricID)
37-
{
38-
u64 *count = bpf_map_lookup_elem(&metrics, &metricID);
39-
if (count) {
40-
++*count;
41-
} else {
42-
DEBUG_PRINT("Failed to lookup metrics map for metricID %d", metricID);
43-
}
44-
}
45-
4636
// Send immediate notifications for event triggers to Go.
4737
// Notifications for GENERIC_PID and TRACES_FOR_SYMBOLIZATION will be
4838
// automatically inhibited until HA resets the type.
@@ -316,6 +306,44 @@ static inline EBPF_INLINE bool unwinder_unwind_frame_pointer(UnwindState *state)
316306
return true;
317307
}
318308

309+
static inline EBPF_INLINE bool unwinder_unwind_go_morestack(PerCPURecord *record)
310+
{
311+
GoLabelsOffsets *offs = bpf_map_lookup_elem(&go_labels_procs, &record->trace.pid);
312+
if (!offs) {
313+
DEBUG_PRINT("morestack: failed to read go labels offsets");
314+
return false;
315+
}
316+
void *mptr = get_go_m_ptr(offs, &record->state);
317+
DEBUG_PRINT("morestack: curg offset: %d, mptr: %llx\n", offs->curg, (u64)mptr);
318+
319+
size_t curg_ptr_addr;
320+
if (bpf_probe_read_user(&curg_ptr_addr, sizeof(void *), (void *)((u64)mptr + offs->curg))) {
321+
DEBUG_PRINT("morestack: failed to read value for m_ptr->curg");
322+
return false;
323+
}
324+
325+
DEBUG_PRINT("morestack: curg is %lx\n", curg_ptr_addr);
326+
327+
// Valid since go 1.25:
328+
// https://github.com/golang/go/blob/7b60d06739/src/runtime/runtime2.go#L303-L322
329+
// On previous versions, there was an extra "ret" value, so "bp" is one spot later.
330+
// TODO - make this work on earlier versions.
331+
unsigned long regs[6];
332+
if (bpf_probe_read_user(regs, sizeof(regs), (void *)(curg_ptr_addr + 56 /* XXX */))) {
333+
DEBUG_PRINT("morestack: failed to read regs");
334+
return false;
335+
}
336+
record->state.sp = regs[0];
337+
record->state.pc = regs[1];
338+
record->state.fp = regs[5];
339+
DEBUG_PRINT(
340+
"morestack: success, sp is %llx, pc is %llx, fp is %llx",
341+
record->state.sp,
342+
record->state.pc,
343+
record->state.fp);
344+
return true;
345+
}
346+
319347
// Push the file ID, line number and frame type into FrameList with a user-defined
320348
// maximum stack size.
321349
//

support/ebpf/tracer.ebpf.amd64

112 KB
Binary file not shown.

support/ebpf/tracer.ebpf.arm64

111 KB
Binary file not shown.

support/ebpf/tsd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#define OPTI_TSD_H
33

44
#include "bpfdefs.h"
5+
#include "types.h"
6+
#include "util.h"
57

68
// tsd_read reads from the Thread Specific Data location associated with the provided key.
79
static inline EBPF_INLINE int

0 commit comments

Comments
 (0)