Skip to content

Commit 0a838b8

Browse files
committed
gyuheon0h/report-unhandled-exceptions
1 parent 99181d9 commit 0a838b8

File tree

10 files changed

+293
-14
lines changed

10 files changed

+293
-14
lines changed

bin_tests/src/bin/crashtracker_bin_test.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ mod unix {
2525
use libdd_common::{tag, Endpoint};
2626
use libdd_crashtracker::{
2727
self as crashtracker, CrashtrackerConfiguration, CrashtrackerReceiverConfig, Metadata,
28+
StackFrame, StackTrace,
2829
};
2930

3031
const TEST_COLLECTOR_TIMEOUT: Duration = Duration::from_secs(15);
@@ -154,6 +155,31 @@ mod unix {
154155
"raise_sigill" => raise(Signal::SIGILL)?,
155156
"raise_sigbus" => raise(Signal::SIGBUS)?,
156157
"raise_sigsegv" => raise(Signal::SIGSEGV)?,
158+
"unhandled_exception" => {
159+
let mut stacktrace = StackTrace::new_incomplete();
160+
let mut stackframe1 = StackFrame::new();
161+
stackframe1.with_ip(1234);
162+
stackframe1.with_function("test_function1".to_string());
163+
stackframe1.with_file("test_file1".to_string());
164+
165+
let mut stackframe2 = StackFrame::new();
166+
stackframe2.with_ip(5678);
167+
stackframe2.with_function("test_function2".to_string());
168+
stackframe2.with_file("test_file2".to_string());
169+
170+
stacktrace.push_frame(stackframe1, true).unwrap();
171+
stacktrace.push_frame(stackframe2, true).unwrap();
172+
173+
stacktrace.set_complete().unwrap();
174+
175+
crashtracker::report_unhandled_exception(
176+
Some("RuntimeException"),
177+
Some("an exception occured"),
178+
stacktrace,
179+
)?;
180+
181+
process::exit(0);
182+
}
157183
_ => anyhow::bail!("Unexpected crash_typ: {crash_typ}"),
158184
}
159185
crashtracker::end_op(crashtracker::OpTypes::ProfilerCollectingSample)?;

bin_tests/src/test_types.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ pub enum CrashType {
114114
RaiseSigBus,
115115
/// Raise SIGSEGV
116116
RaiseSigSegv,
117+
/// Unhandled Exception
118+
UnhandledException,
117119
}
118120

119121
impl CrashType {
@@ -129,6 +131,7 @@ impl CrashType {
129131
Self::RaiseSigIll => "raise_sigill",
130132
Self::RaiseSigBus => "raise_sigbus",
131133
Self::RaiseSigSegv => "raise_sigsegv",
134+
Self::UnhandledException => "unhandled_exception",
132135
}
133136
}
134137

@@ -138,7 +141,11 @@ impl CrashType {
138141
pub const fn expects_success(self) -> bool {
139142
matches!(
140143
self,
141-
Self::KillSigBus | Self::KillSigSegv | Self::RaiseSigBus | Self::RaiseSigSegv
144+
Self::KillSigBus
145+
| Self::KillSigSegv
146+
| Self::RaiseSigBus
147+
| Self::RaiseSigSegv
148+
| Self::UnhandledException
142149
)
143150
}
144151

@@ -150,6 +157,7 @@ impl CrashType {
150157
Self::KillSigAbrt | Self::RaiseSigAbrt => 6, // SIGABRT
151158
Self::KillSigIll | Self::RaiseSigIll => 4, // SIGILL
152159
Self::KillSigBus | Self::RaiseSigBus => 7, // SIGBUS
160+
Self::UnhandledException => 0, // no signal
153161
}
154162
}
155163

@@ -160,6 +168,7 @@ impl CrashType {
160168
Self::KillSigAbrt | Self::RaiseSigAbrt => "SIGABRT",
161169
Self::KillSigIll | Self::RaiseSigIll => "SIGILL",
162170
Self::KillSigBus | Self::RaiseSigBus => "SIGBUS",
171+
Self::UnhandledException => "Unhandled Exception",
163172
}
164173
}
165174
}
@@ -184,6 +193,7 @@ impl std::str::FromStr for CrashType {
184193
"raise_sigill" => Ok(Self::RaiseSigIll),
185194
"raise_sigbus" => Ok(Self::RaiseSigBus),
186195
"raise_sigsegv" => Ok(Self::RaiseSigSegv),
196+
"unhandled_exception" => Ok(Self::UnhandledException),
187197
_ => Err(format!("Unknown crash type: {}", s)),
188198
}
189199
}
@@ -220,5 +230,6 @@ mod tests {
220230
assert!(!CrashType::KillSigAbrt.expects_success());
221231
assert!(CrashType::KillSigBus.expects_success());
222232
assert!(CrashType::KillSigSegv.expects_success());
233+
assert!(CrashType::UnhandledException.expects_success());
223234
}
224235
}

bin_tests/tests/crashtracker_bin_test.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,39 @@ fn run_standard_crash_test_refactored(
9696
// These tests below use the new infrastructure but require custom validation logic
9797
// that doesn't fit the simple macro-generated pattern.
9898

99+
#[test]
100+
#[cfg_attr(miri, ignore)]
101+
fn test_crash_tracking_bin_unhandled_exception() {
102+
let config = CrashTestConfig::new(
103+
BuildProfile::Release,
104+
TestMode::DoNothing,
105+
CrashType::UnhandledException,
106+
);
107+
let artifacts = StandardArtifacts::new(config.profile);
108+
let artifacts_map = build_artifacts(&artifacts.as_slice()).unwrap();
109+
110+
let validator: ValidatorFn = Box::new(|payload, _fixtures| {
111+
PayloadValidator::new(payload)
112+
.validate_counters()?
113+
.validate_error_kind("UnixSignal")?
114+
.validate_error_message_contains("Process was terminated due to an unhandled exception of type 'RuntimeException'. Message: \"an exception occured\"")?
115+
// The two frames emitted in the bin: test_function1 and test_function2
116+
.validate_callstack_functions(&["test_function1", "test_function2"])?;
117+
118+
// Unhandled exceptions have no signal info
119+
let sig_info = &payload["sig_info"];
120+
assert!(
121+
sig_info.is_null()
122+
|| sig_info.is_object() && sig_info.as_object().is_none_or(|m| m.is_empty()),
123+
"Expected no sig_info for unhandled exception, got: {sig_info:?}"
124+
);
125+
126+
Ok(())
127+
});
128+
129+
run_crash_test_with_artifacts(&config, &artifacts_map, &artifacts, validator).unwrap();
130+
}
131+
99132
#[test]
100133
#[cfg_attr(miri, ignore)]
101134
fn test_crash_tracking_bin_runtime_callback_frame() {
@@ -1027,6 +1060,12 @@ fn assert_siginfo_message(sig_info: &Value, crash_typ: &str) {
10271060
assert_eq!(sig_info["si_signo"], libc::SIGILL);
10281061
assert_eq!(sig_info["si_signo_human_readable"], "SIGILL");
10291062
}
1063+
"unhandled_exception" => {
1064+
assert!(
1065+
sig_info.is_null()
1066+
|| sig_info.is_object() && sig_info.as_object().is_none_or(|m| m.is_empty())
1067+
);
1068+
}
10301069
_ => panic!("unexpected crash_typ {crash_typ}"),
10311070
}
10321071
}

libdd-crashtracker/src/collector/collector_manager.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,9 @@ pub(crate) fn run_collector_child(
118118
config_str,
119119
metadata_str,
120120
message_ptr,
121-
sig_info,
122-
ucontext,
121+
Some(sig_info),
122+
None, // stacktrace is none; this is collected in the signal handler
123+
Some(ucontext),
123124
ppid,
124125
crashing_tid,
125126
);

libdd-crashtracker/src/collector/crash_handler.rs

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@ use super::receiver_manager::Receiver;
88
use super::signal_handler_manager::chain_signal_handler;
99
use crate::crash_info::Metadata;
1010
use crate::shared::configuration::CrashtrackerConfiguration;
11+
use crate::StackTrace;
1112
use libc::{c_void, siginfo_t, ucontext_t};
1213
use libdd_common::timeout::TimeoutManager;
14+
use std::os::unix::{io::FromRawFd, net::UnixStream};
1315
use std::panic;
1416
use std::panic::PanicHookInfo;
1517
use std::ptr;
@@ -301,6 +303,121 @@ fn handle_posix_signal_impl(
301303
Ok(())
302304
}
303305

306+
/// Gets a clone of the current metadata, if set.
307+
/// Unlike the signal handler path, this reads without consuming the stored value.
308+
///
309+
/// SAFETY:
310+
/// This function must not be called concurrently with `update_metadata`.
311+
fn get_metadata() -> Option<(crate::crash_info::Metadata, String)> {
312+
let ptr = METADATA.load(SeqCst);
313+
if ptr.is_null() {
314+
None
315+
} else {
316+
// Safety: ptr was created by Box::into_raw in update_metadata
317+
let (metadata, metadata_string) = unsafe { &*ptr };
318+
Some((metadata.clone(), metadata_string.clone()))
319+
}
320+
}
321+
322+
/// Gets a clone of the current config, if set.
323+
/// Unlike the signal handler path, this reads without consuming the stored value.
324+
///
325+
/// SAFETY:
326+
/// This function must not be called concurrently with `update_config`.
327+
fn get_config() -> Option<(
328+
crate::shared::configuration::CrashtrackerConfiguration,
329+
String,
330+
)> {
331+
let ptr = CONFIG.load(SeqCst);
332+
if ptr.is_null() {
333+
None
334+
} else {
335+
// Safety: ptr was created by Box::into_raw in update_config
336+
let (config, config_string) = unsafe { &*ptr };
337+
Some((config.clone(), config_string.clone()))
338+
}
339+
}
340+
341+
/// This function is designed to be when a program is at a terminal state
342+
/// and the application wants to report an unhandled exception to the crashtracker
343+
///
344+
/// Preconditions:
345+
/// - The crashtracker must be started
346+
/// - The stacktrace must be valid
347+
///
348+
/// This function will spawn the receiver process and call an emit function to pipe over
349+
/// the crash data. We don't use the collector process because we are not in a signal handler
350+
/// Rather, we call emit_crashreport directly and pipe over data to the receiver
351+
pub fn report_unhandled_exception(
352+
exception_type: Option<&str>,
353+
exception_message: Option<&str>,
354+
stacktrace: StackTrace,
355+
) -> Result<(), CrashHandlerError> {
356+
let Some((config, config_str)) = get_config() else {
357+
return Err(CrashHandlerError::NoConfig);
358+
};
359+
let Some((_metadata, metadata_str)) = get_metadata() else {
360+
return Err(CrashHandlerError::NoMetadata);
361+
};
362+
363+
// Turn crashtracker off to prevent a recursive crash report emission
364+
// We do not turn it back on because this function is not intended to be used as
365+
// a recurring mechanism to report exceptions. We expect the application to exit
366+
// after
367+
disable();
368+
369+
let unix_socket_path = config.unix_socket_path().as_deref().unwrap_or_default();
370+
371+
let receiver = if unix_socket_path.is_empty() {
372+
Receiver::spawn_from_stored_config()?
373+
} else {
374+
Receiver::from_socket(unix_socket_path)?
375+
};
376+
377+
let timeout_manager = TimeoutManager::new(config.timeout());
378+
379+
let pid = unsafe { libc::getpid() };
380+
let tid = libdd_common::threading::get_current_thread_id() as libc::pid_t;
381+
382+
let error_type_str = exception_type.unwrap_or("<unknown>");
383+
let error_message_str = exception_message.unwrap_or("<no message>");
384+
let message = format!(
385+
"Process was terminated due to an unhandled exception of type '{error_type_str}'. \
386+
Message: \"{error_message_str}\""
387+
);
388+
389+
let message_ptr = Box::into_raw(Box::new(message));
390+
391+
// Duplicate the socket fd so we can poll for receiver completion after we close the write end.
392+
// UnixStream::from_raw_fd takes ownership of uds_fd, so we need a separate fd to poll.
393+
let poll_fd = unsafe { libc::dup(receiver.handle.uds_fd) };
394+
let receiver_pid = receiver.handle.pid;
395+
396+
{
397+
let mut unix_stream = unsafe { UnixStream::from_raw_fd(receiver.handle.uds_fd) };
398+
let _ = super::emitters::emit_crashreport(
399+
&mut unix_stream,
400+
&config,
401+
&config_str,
402+
&metadata_str,
403+
message_ptr,
404+
None,
405+
Some(stacktrace),
406+
None,
407+
pid,
408+
tid,
409+
);
410+
// unix_stream is dropped here, closing the write end of the socket.
411+
// This signals EOF to the receiver so it can finish writing the crash report.
412+
}
413+
414+
// Wait for the receiver to signal it is done (POLLHUP on the dup'd fd), then reap it.
415+
let finish_handle = super::process_handle::ProcessHandle::new(poll_fd, receiver_pid);
416+
finish_handle.finish(&timeout_manager);
417+
unsafe { libc::close(poll_fd) };
418+
419+
Ok(())
420+
}
304421
#[cfg(test)]
305422
mod tests {
306423
use super::*;

0 commit comments

Comments
 (0)