Skip to content

Commit 278b961

Browse files
committed
MEDIUM: debug: on panic, make the target thread automatically allocate its buf
One main problem with panic dumps is that they're filling the dumping thread's trash, and that the global thread_dump_buffer is too small to catch enough of them. Here we're proceeding differently. When dumping threads for a panic, we're passing the magic value 0x2 as the buffer, and it will instruct the target thread to allocate its own buffer using get_trash_chunk() (which is signal safe), so that each thread dumps into its own buffer. Then the thread will wait for the buffer to be consumed, and will assign its own thread_dump_buffer to it. This way we can simply dump all threads' buffers from gdb like this: (gdb) set $t=0 while ($t < global.nbthread) printf "%s\n", ha_thread_ctx[$t].thread_dump_buffer.area set $t=$t+1 end For now we make it wait forever since it's only called on panic and we want to make sure the thread doesn't leave and continues to use that trash buffer or do other nasty stuff. That way the dumping thread will make all of them die. This would be useful to backport to the most recent branches to help troubleshooting. It backports well to 2.9, except for some trivial context in tinfo-t.h for an updated comment. 2.8 and older would also require TAINTED_PANIC. The following previous patches are required: MINOR: debug: make mark_tainted() return the previous value MINOR: chunk: drop the global thread_dump_buffer MINOR: debug: split ha_thread_dump() in two parts MINOR: debug: slightly change the thread_dump_pointer signification MINOR: debug: make ha_thread_dump_done() take the pointer to be used MINOR: debug: replace ha_thread_dump() with its two components
1 parent afeac4b commit 278b961

File tree

2 files changed

+35
-11
lines changed

2 files changed

+35
-11
lines changed

include/haproxy/tinfo-t.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ struct thread_ctx {
179179

180180
unsigned long long out_bytes; /* total #of bytes emitted */
181181
unsigned long long spliced_out_bytes; /* total #of bytes emitted though a kernel pipe */
182-
struct buffer *thread_dump_buffer; /* NULL out of dump, valid during a dump, 0x01 once done */
182+
struct buffer *thread_dump_buffer; /* NULL out of dump, 0x02=to alloc, valid during a dump, |0x01 once done */
183183
unsigned long long total_streams; /* Total number of streams created on this thread */
184184
unsigned int stream_cnt; /* Number of streams attached to this thread */
185185

src/debug.c

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -672,18 +672,22 @@ void ha_panic()
672672
return;
673673
}
674674

675-
buf = get_trash_chunk();
676-
677-
chunk_reset(&trash);
678-
chunk_appendf(&trash, "Thread %u is about to kill the process.\n", tid + 1);
675+
chunk_printf(&trash, "Thread %u is about to kill the process.\n", tid + 1);
676+
DISGUISE(write(2, trash.area, trash.data));
679677

680678
for (thr = 0; thr < global.nbthread; thr++) {
681-
if (!ha_thread_dump_fill(&trash, thr))
679+
if (thr == tid)
680+
buf = get_trash_chunk();
681+
else
682+
buf = (void *)0x2UL; // let the target thread allocate it
683+
684+
buf = ha_thread_dump_fill(buf, thr);
685+
if (!buf)
682686
continue;
683-
DISGUISE(write(2, trash.area, trash.data));
684-
ha_thread_dump_done(NULL, thr);
685-
b_force_xfer(buf, &trash, b_room(buf));
686-
chunk_reset(&trash);
687+
688+
DISGUISE(write(2, buf->area, buf->data));
689+
/* restore the thread's dump pointer for easier post-mortem analysis */
690+
ha_thread_dump_done(buf, thr);
687691
}
688692

689693
#ifdef USE_LUA
@@ -2103,19 +2107,33 @@ static void debug_release_memstats(struct appctx *appctx)
21032107

21042108
/* handles DEBUGSIG to dump the state of the thread it's working on. This is
21052109
* appended at the end of thread_dump_buffer which must be protected against
2106-
* reentrance from different threads (a thread-local buffer works fine).
2110+
* reentrance from different threads (a thread-local buffer works fine). If
2111+
* the buffer pointer is equal to 0x2, then it's a panic. The thread allocates
2112+
* the buffer from its own trash chunks so that the contents remain visible in
2113+
* the core, and it never returns.
21072114
*/
21082115
void debug_handler(int sig, siginfo_t *si, void *arg)
21092116
{
21102117
struct buffer *buf = HA_ATOMIC_LOAD(&th_ctx->thread_dump_buffer);
21112118
int harmless = is_thread_harmless();
2119+
int no_return = 0;
21122120

21132121
/* first, let's check it's really for us and that we didn't just get
21142122
* a spurious DEBUGSIG.
21152123
*/
21162124
if (!buf || (ulong)buf & 0x1UL)
21172125
return;
21182126

2127+
/* Special value 0x2 is used during panics and requires that the thread
2128+
* allocates its own dump buffer among its own trash buffers. The goal
2129+
* is that all threads keep a copy of their own dump.
2130+
*/
2131+
if ((ulong)buf == 0x2UL) {
2132+
no_return = 1;
2133+
buf = get_trash_chunk();
2134+
HA_ATOMIC_STORE(&th_ctx->thread_dump_buffer, buf);
2135+
}
2136+
21192137
/* now dump the current state into the designated buffer, and indicate
21202138
* we come from a sig handler.
21212139
*/
@@ -2127,6 +2145,12 @@ void debug_handler(int sig, siginfo_t *si, void *arg)
21272145
if (!harmless &&
21282146
!(_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_SLEEPING))
21292147
_HA_ATOMIC_OR(&th_ctx->flags, TH_FL_STUCK);
2148+
2149+
/* in case of panic, no return is planned so that we don't destroy
2150+
* the buffer's contents and we make sure not to trigger in loops.
2151+
*/
2152+
while (no_return)
2153+
wait(NULL);
21302154
}
21312155

21322156
static int init_debug_per_thread()

0 commit comments

Comments
 (0)