Skip to content

Pyroscope: Report method is hung in spin lock #252

@REASY

Description

@REASY

Hello, team,

We upgraded pyroscope that got a fix with jemalloc and we started experience our app becoming unresponsive.

Investigation with rust-gdb points that the suspect is memory allocation during Linux signal handling in pprof-rs which is fixed by framehop: remove the allocation in signal handle of framehop #282.

Dependencies

pyroscope = { git = "https://github.com/grafana/pyroscope-rs.git", rev = "b80063acb02765323ebdaf699884be5716d5544b" }
pyroscope_pprofrs = { git = "https://github.com/grafana/pyroscope-rs.git", rev = "b80063acb02765323ebdaf699884be5716d5544b" }

pyroscope_pprofrs at b80063acb02765323ebdaf699884be5716d5544b points to pprof 0.15

Thread dumps

The threads below are waiting for a mutex at the address 0x7fb366e146a8.

(gdb) p *(pthread_mutex_t *)0x7fb366e146a8
$1 = {__data = {__lock = 2, __count = 0, __owner = 26, __nusers = 1, __kind = 512, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0x0}}, __size = "\002\000\000\000\000\000\000\000\032\000\000\000\001\000\000\000\000\002", '\000' <repeats 21 times>, __align = 2}

The thread 170 that owns the lock (LWP 26) is itself in a futex_wait state.

(gdb) thread 170
[Switching to thread 170 (Thread 0x7fb367dfe6c0 (LWP 26))]
#0  futex_wait (private=0, expected=2, futex_word=0x7fb366e146a8) at ../sysdeps/nptl/futex-internal.h:146
146	in ../sysdeps/nptl/futex-internal.h
(gdb) bt
#0  futex_wait (private=0, expected=2, futex_word=0x7fb366e146a8) at ../sysdeps/nptl/futex-internal.h:146
#1  __GI___lll_lock_wait (futex=futex@entry=0x7fb366e146a8, private=0) at ./nptl/lowlevellock.c:49
#2  0x00007fb3d3f26101 in lll_mutex_lock_optimized (mutex=0x7fb366e146a8) at ./nptl/pthread_mutex_lock.c:48
#3  ___pthread_mutex_lock (mutex=mutex@entry=0x7fb366e146a8) at ./nptl/pthread_mutex_lock.c:93
#4  0x00005648ca696ac2 in malloc_mutex_lock_final (mutex=0x7fb366e14668) at include/jemalloc/internal/mutex.h:151
#5  _rjem_je_malloc_mutex_lock_slow (mutex=mutex@entry=0x7fb366e14668) at src/mutex.c:90
#6  0x00005648ca63d74f in malloc_mutex_lock (mutex=<optimized out>, tsdn=<optimized out>) at include/jemalloc/internal/mutex.h:217
#7  _rjem_je_arena_cache_bin_fill_small (tsdn=<optimized out>, arena=<optimized out>, cache_bin=<optimized out>, cache_bin_info=<optimized out>, binind=<optimized out>, nfill=<optimized out>) at src/arena.c:972
#8  0x00005648ca69f28e in _rjem_je_tcache_alloc_small_hard (tsdn=tsdn@entry=0x7fb367dfd7f0, arena=arena@entry=0x7fb366e01040, tcache=tcache@entry=0x7fb367dfdb48, cache_bin=cache_bin@entry=0x7fb367dfdb80, binind=binind@entry=2, tcache_success=tcache_success@entry=0x7fb367df4890) at src/tcache.c:238
#9  0x00005648ca633f8d in tcache_alloc_small (slow_path=<optimized out>, zero=false, binind=2, size=<optimized out>, tcache=<optimized out>, arena=<optimized out>, tsd=0x7fb367dfd7f0) at include/jemalloc/internal/tcache_inlines.h:68
#10 arena_malloc (slow_path=<optimized out>, tcache=<optimized out>, zero=false, ind=2, size=<optimized out>, arena=0x0, tsdn=0x7fb367dfd7f0) at include/jemalloc/internal/arena_inlines_b.h:151
#11 iallocztm (slow_path=<optimized out>, arena=0x0, is_internal=false, tcache=<optimized out>, zero=false, ind=2, size=<optimized out>, tsdn=0x7fb367dfd7f0) at include/jemalloc/internal/jemalloc_internal_inlines_c.h:55
#12 imalloc_no_sample (ind=2, usize=32, size=<optimized out>, tsd=0x7fb367dfd7f0, dopts=<synthetic pointer>, sopts=<synthetic pointer>) at src/jemalloc.c:2398
#13 imalloc_body (tsd=0x7fb367dfd7f0, dopts=<synthetic pointer>, sopts=<synthetic pointer>) at src/jemalloc.c:2573
#14 imalloc (dopts=<optimized out>, sopts=<optimized out>) at src/jemalloc.c:2687
#15 _rjem_je_malloc_default (size=<optimized out>) at src/jemalloc.c:2722

Thread 184 that is busy-waiting on a spin-lock

(gdb) thread 184
[Switching to thread 184 (Thread 0x7fb3c83ff6c0 (LWP 12))]
#0  core::sync::atomic::atomic_compare_exchange_weak<usize> (dst=0x5648cbfc0e10 <pprof::profiler::PROFILER+8>, old=0, new=1, success=core::sync::atomic::Ordering::Acquire, failure=core::sync::atomic::Ordering::Relaxed) at /rustc/29483883eed69d5fb4db01964cdf2af4d86e9cb2/library/core/src/sync/atomic.rs:4097
warning: 4097	/rustc/29483883eed69d5fb4db01964cdf2af4d86e9cb2/library/core/src/sync/atomic.rs: No such file or directory
(gdb) bt
#0  core::sync::atomic::atomic_compare_exchange_weak<usize> (dst=0x5648cbfc0e10 <pprof::profiler::PROFILER+8>, old=0, new=1, success=core::sync::atomic::Ordering::Acquire, failure=core::sync::atomic::Ordering::Relaxed) at /rustc/29483883eed69d5fb4db01964cdf2af4d86e9cb2/library/core/src/sync/atomic.rs:4097
#1  core::sync::atomic::AtomicUsize::compare_exchange_weak (self=0x5648cbfc0e10 <pprof::profiler::PROFILER+8>) at /rustc/29483883eed69d5fb4db01964cdf2af4d86e9cb2/library/core/src/sync/atomic.rs:3133
#2  spin::rwlock::compare_exchange (atomic=0x5648cbfc0e10 <pprof::profiler::PROFILER+8>, strong=false) at /usr/local/cargo/registry/src/index.crates.io-1949cf8c6b5b557f/spin-0.10.0/src/rwlock.rs:816
#3  spin::rwlock::RwLock<core::result::Result<pprof::profiler::Profiler, pprof::error::Error>, spin::relax::Spin>::try_write_internal<core::result::Result<pprof::profiler::Profiler, pprof::error::Error>, spin::relax::Spin> (self=0x5648cbfc0e10 <pprof::profiler::PROFILER+8>, strong=false)
    at /usr/local/cargo/registry/src/index.crates.io-1949cf8c6b5b557f/spin-0.10.0/src/rwlock.rs:368
#4  spin::rwlock::RwLock<core::result::Result<pprof::profiler::Profiler, pprof::error::Error>, spin::relax::Spin>::write<core::result::Result<pprof::profiler::Profiler, pprof::error::Error>, spin::relax::Spin> (self=0x5648cbfc0e10 <pprof::profiler::PROFILER+8>)
    at /usr/local/cargo/registry/src/index.crates.io-1949cf8c6b5b557f/spin-0.10.0/src/rwlock.rs:239
#5  pprof::report::ReportBuilder::build (self=0x7fb3c83fd210) at src/report.rs:104
#6  0x00005648ca574a1f in pyroscope_pprofrs::Pprof::dump_report (self=0x7fb3d3825030) at src/lib.rs:147
#7  0x00005648ca57410c in pyroscope_pprofrs::{impl#3}::report (self=0xc9d876de125dd695) at src/lib.rs:104
#8  0x00005648ca602f47 in pyroscope::pyroscope::{impl#10}::start::{closure#0} () at src/pyroscope.rs:637

threads_waiting_futex.txt

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions