diff --git a/bin_tests/src/bin/crashtracker_bin_test.rs b/bin_tests/src/bin/crashtracker_bin_test.rs index 6ab95b6512..a9c3f379db 100644 --- a/bin_tests/src/bin/crashtracker_bin_test.rs +++ b/bin_tests/src/bin/crashtracker_bin_test.rs @@ -25,6 +25,7 @@ mod unix { use libdd_common::{tag, Endpoint}; use libdd_crashtracker::{ self as crashtracker, CrashtrackerConfiguration, CrashtrackerReceiverConfig, Metadata, + StackFrame, StackTrace, }; const TEST_COLLECTOR_TIMEOUT: Duration = Duration::from_secs(15); @@ -154,6 +155,31 @@ mod unix { "raise_sigill" => raise(Signal::SIGILL)?, "raise_sigbus" => raise(Signal::SIGBUS)?, "raise_sigsegv" => raise(Signal::SIGSEGV)?, + "unhandled_exception" => { + let mut stacktrace = StackTrace::new_incomplete(); + let mut stackframe1 = StackFrame::new(); + stackframe1.with_ip(1234); + stackframe1.with_function("test_function1".to_string()); + stackframe1.with_file("test_file1".to_string()); + + let mut stackframe2 = StackFrame::new(); + stackframe2.with_ip(5678); + stackframe2.with_function("test_function2".to_string()); + stackframe2.with_file("test_file2".to_string()); + + stacktrace.push_frame(stackframe1, true).unwrap(); + stacktrace.push_frame(stackframe2, true).unwrap(); + + stacktrace.set_complete().unwrap(); + + crashtracker::report_unhandled_exception( + Some("RuntimeException"), + Some("an exception occured"), + stacktrace, + )?; + + process::exit(0); + } _ => anyhow::bail!("Unexpected crash_typ: {crash_typ}"), } crashtracker::end_op(crashtracker::OpTypes::ProfilerCollectingSample)?; diff --git a/bin_tests/src/test_types.rs b/bin_tests/src/test_types.rs index 1666264f77..ec3faf235c 100644 --- a/bin_tests/src/test_types.rs +++ b/bin_tests/src/test_types.rs @@ -114,6 +114,8 @@ pub enum CrashType { RaiseSigBus, /// Raise SIGSEGV RaiseSigSegv, + /// Unhandled Exception + UnhandledException, } impl CrashType { @@ -129,6 +131,7 @@ impl CrashType { Self::RaiseSigIll => "raise_sigill", Self::RaiseSigBus => "raise_sigbus", Self::RaiseSigSegv => "raise_sigsegv", + Self::UnhandledException => "unhandled_exception", } } @@ -138,7 +141,11 @@ impl CrashType { pub const fn expects_success(self) -> bool { matches!( self, - Self::KillSigBus | Self::KillSigSegv | Self::RaiseSigBus | Self::RaiseSigSegv + Self::KillSigBus + | Self::KillSigSegv + | Self::RaiseSigBus + | Self::RaiseSigSegv + | Self::UnhandledException ) } @@ -150,6 +157,7 @@ impl CrashType { Self::KillSigAbrt | Self::RaiseSigAbrt => 6, // SIGABRT Self::KillSigIll | Self::RaiseSigIll => 4, // SIGILL Self::KillSigBus | Self::RaiseSigBus => 7, // SIGBUS + Self::UnhandledException => 0, // no signal } } @@ -160,6 +168,7 @@ impl CrashType { Self::KillSigAbrt | Self::RaiseSigAbrt => "SIGABRT", Self::KillSigIll | Self::RaiseSigIll => "SIGILL", Self::KillSigBus | Self::RaiseSigBus => "SIGBUS", + Self::UnhandledException => "Unhandled Exception", } } } @@ -184,6 +193,7 @@ impl std::str::FromStr for CrashType { "raise_sigill" => Ok(Self::RaiseSigIll), "raise_sigbus" => Ok(Self::RaiseSigBus), "raise_sigsegv" => Ok(Self::RaiseSigSegv), + "unhandled_exception" => Ok(Self::UnhandledException), _ => Err(format!("Unknown crash type: {}", s)), } } @@ -220,5 +230,6 @@ mod tests { assert!(!CrashType::KillSigAbrt.expects_success()); assert!(CrashType::KillSigBus.expects_success()); assert!(CrashType::KillSigSegv.expects_success()); + assert!(CrashType::UnhandledException.expects_success()); } } diff --git a/bin_tests/tests/crashtracker_bin_test.rs b/bin_tests/tests/crashtracker_bin_test.rs index 5e0683a03b..5262229477 100644 --- a/bin_tests/tests/crashtracker_bin_test.rs +++ b/bin_tests/tests/crashtracker_bin_test.rs @@ -96,6 +96,39 @@ fn run_standard_crash_test_refactored( // These tests below use the new infrastructure but require custom validation logic // that doesn't fit the simple macro-generated pattern. +#[test] +#[cfg_attr(miri, ignore)] +fn test_crash_tracking_bin_unhandled_exception() { + let config = CrashTestConfig::new( + BuildProfile::Release, + TestMode::DoNothing, + CrashType::UnhandledException, + ); + let artifacts = StandardArtifacts::new(config.profile); + let artifacts_map = build_artifacts(&artifacts.as_slice()).unwrap(); + + let validator: ValidatorFn = Box::new(|payload, _fixtures| { + PayloadValidator::new(payload) + .validate_counters()? + .validate_error_kind("UnhandledException")? + .validate_error_message_contains("Process was terminated due to an unhandled exception of type 'RuntimeException'. Message: \"an exception occured\"")? + // The two frames emitted in the bin: test_function1 and test_function2 + .validate_callstack_functions(&["test_function1", "test_function2"])?; + + // Unhandled exceptions have no signal info + let sig_info = &payload["sig_info"]; + assert!( + sig_info.is_null() + || sig_info.is_object() && sig_info.as_object().is_none_or(|m| m.is_empty()), + "Expected no sig_info for unhandled exception, got: {sig_info:?}" + ); + + Ok(()) + }); + + run_crash_test_with_artifacts(&config, &artifacts_map, &artifacts, validator).unwrap(); +} + #[test] #[cfg_attr(miri, ignore)] fn test_crash_tracking_bin_runtime_callback_frame() { @@ -1027,6 +1060,12 @@ fn assert_siginfo_message(sig_info: &Value, crash_typ: &str) { assert_eq!(sig_info["si_signo"], libc::SIGILL); assert_eq!(sig_info["si_signo_human_readable"], "SIGILL"); } + "unhandled_exception" => { + assert!( + sig_info.is_null() + || sig_info.is_object() && sig_info.as_object().is_none_or(|m| m.is_empty()) + ); + } _ => panic!("unexpected crash_typ {crash_typ}"), } } diff --git a/libdd-crashtracker/src/collector/collector_manager.rs b/libdd-crashtracker/src/collector/collector_manager.rs index 9abbe9d652..be8383aff6 100644 --- a/libdd-crashtracker/src/collector/collector_manager.rs +++ b/libdd-crashtracker/src/collector/collector_manager.rs @@ -7,6 +7,7 @@ use libdd_common::timeout::TimeoutManager; use super::emitters::emit_crashreport; use crate::shared::configuration::CrashtrackerConfiguration; +use crate::ErrorKind; use libc::{siginfo_t, ucontext_t}; use libdd_common::unix_utils::{alt_fork, terminate}; use nix::sys::signal::{self, SaFlags, SigAction, SigHandler, SigSet}; @@ -118,10 +119,12 @@ pub(crate) fn run_collector_child( config_str, metadata_str, message_ptr, - sig_info, - ucontext, + Some(sig_info), + None, // stacktrace is none; this is collected in the signal handler + Some(ucontext), ppid, crashing_tid, + ErrorKind::UnixSignal, ); if let Err(e) = report { eprintln!("Failed to flush crash report: {e}"); diff --git a/libdd-crashtracker/src/collector/crash_handler.rs b/libdd-crashtracker/src/collector/crash_handler.rs index 917329295a..47e55ac73f 100644 --- a/libdd-crashtracker/src/collector/crash_handler.rs +++ b/libdd-crashtracker/src/collector/crash_handler.rs @@ -8,8 +8,10 @@ use super::receiver_manager::Receiver; use super::signal_handler_manager::chain_signal_handler; use crate::crash_info::Metadata; use crate::shared::configuration::CrashtrackerConfiguration; +use crate::{ErrorKind, StackTrace}; use libc::{c_void, siginfo_t, ucontext_t}; use libdd_common::timeout::TimeoutManager; +use std::os::unix::{io::FromRawFd, net::UnixStream}; use std::panic; use std::panic::PanicHookInfo; use std::ptr; @@ -301,6 +303,122 @@ fn handle_posix_signal_impl( Ok(()) } +/// Gets a clone of the current metadata, if set. +/// Unlike the signal handler path, this reads without consuming the stored value. +/// +/// SAFETY: +/// This function must not be called concurrently with `update_metadata`. +fn get_metadata() -> Option<(crate::crash_info::Metadata, String)> { + let ptr = METADATA.load(SeqCst); + if ptr.is_null() { + None + } else { + // Safety: ptr was created by Box::into_raw in update_metadata + let (metadata, metadata_string) = unsafe { &*ptr }; + Some((metadata.clone(), metadata_string.clone())) + } +} + +/// Gets a clone of the current config, if set. +/// Unlike the signal handler path, this reads without consuming the stored value. +/// +/// SAFETY: +/// This function must not be called concurrently with `update_config`. +fn get_config() -> Option<( + crate::shared::configuration::CrashtrackerConfiguration, + String, +)> { + let ptr = CONFIG.load(SeqCst); + if ptr.is_null() { + None + } else { + // Safety: ptr was created by Box::into_raw in update_config + let (config, config_string) = unsafe { &*ptr }; + Some((config.clone(), config_string.clone())) + } +} + +/// This function is designed to be when a program is at a terminal state +/// and the application wants to report an unhandled exception to the crashtracker +/// +/// Preconditions: +/// - The crashtracker must be started +/// - The stacktrace must be valid +/// +/// This function will spawn the receiver process and call an emit function to pipe over +/// the crash data. We don't use the collector process because we are not in a signal handler +/// Rather, we call emit_crashreport directly and pipe over data to the receiver +pub fn report_unhandled_exception( + exception_type: Option<&str>, + exception_message: Option<&str>, + stacktrace: StackTrace, +) -> Result<(), CrashHandlerError> { + let Some((config, config_str)) = get_config() else { + return Err(CrashHandlerError::NoConfig); + }; + let Some((_metadata, metadata_str)) = get_metadata() else { + return Err(CrashHandlerError::NoMetadata); + }; + + // Turn crashtracker off to prevent a recursive crash report emission + // We do not turn it back on because this function is not intended to be used as + // a recurring mechanism to report exceptions. We expect the application to exit + // after + disable(); + + let unix_socket_path = config.unix_socket_path().as_deref().unwrap_or_default(); + + let receiver = if unix_socket_path.is_empty() { + Receiver::spawn_from_stored_config()? + } else { + Receiver::from_socket(unix_socket_path)? + }; + + let timeout_manager = TimeoutManager::new(config.timeout()); + + let pid = unsafe { libc::getpid() }; + let tid = libdd_common::threading::get_current_thread_id() as libc::pid_t; + + let error_type_str = exception_type.unwrap_or(""); + let error_message_str = exception_message.unwrap_or(""); + let message = format!( + "Process was terminated due to an unhandled exception of type '{error_type_str}'. \ + Message: \"{error_message_str}\"" + ); + + let message_ptr = Box::into_raw(Box::new(message)); + + // Duplicate the socket fd so we can poll for receiver completion after we close the write end. + // UnixStream::from_raw_fd takes ownership of uds_fd, so we need a separate fd to poll. + let poll_fd = unsafe { libc::dup(receiver.handle.uds_fd) }; + let receiver_pid = receiver.handle.pid; + + { + let mut unix_stream = unsafe { UnixStream::from_raw_fd(receiver.handle.uds_fd) }; + let _ = super::emitters::emit_crashreport( + &mut unix_stream, + &config, + &config_str, + &metadata_str, + message_ptr, + None, + Some(stacktrace), + None, + pid, + tid, + ErrorKind::UnhandledException, + ); + // unix_stream is dropped here, closing the write end of the socket. + // This signals EOF to the receiver so it can finish writing the crash report. + } + + // Wait for the receiver to signal it is done (POLLHUP on the dup'd fd), then reap it. + let finish_handle = super::process_handle::ProcessHandle::new(poll_fd, receiver_pid); + finish_handle.finish(&timeout_manager); + unsafe { libc::close(poll_fd) }; + + Ok(()) +} #[cfg(test)] mod tests { use super::*; diff --git a/libdd-crashtracker/src/collector/emitters.rs b/libdd-crashtracker/src/collector/emitters.rs index 33de2e8ce7..e30b2b6d7a 100644 --- a/libdd-crashtracker/src/collector/emitters.rs +++ b/libdd-crashtracker/src/collector/emitters.rs @@ -10,7 +10,8 @@ use crate::runtime_callback::{ }; use crate::shared::constants::*; use crate::{ - translate_si_code, CrashtrackerConfiguration, ErrorKind, SignalNames, StacktraceCollection, + translate_si_code, CrashtrackerConfiguration, ErrorKind, SignalNames, StackTrace, + StacktraceCollection, }; use backtrace::Frame; use libc::{siginfo_t, ucontext_t}; @@ -34,6 +35,8 @@ pub enum EmitterError { CounterError(#[from] crate::collector::counters::CounterError), #[error("Atomic set error: {0}")] AtomicSetError(#[from] crate::collector::atomic_set::AtomicSetError), + #[error("Serialization error: {0}")] + SerializationError(#[from] serde_json::Error), } /// Emit a stacktrace onto the given handle as formatted json. @@ -141,24 +144,34 @@ pub(crate) fn emit_crashreport( config_str: &str, metadata_string: &str, message_ptr: *mut String, - sig_info: *const siginfo_t, - ucontext: *const ucontext_t, + sig_info: Option<*const siginfo_t>, + stacktrace: Option, + ucontext: Option<*const ucontext_t>, ppid: i32, crashing_tid: libc::pid_t, + kind: ErrorKind, ) -> Result<(), EmitterError> { + // Crash ping data: // The following order is important in order to emit the crash ping: // - receiver expects the config because the endpoint to emit to is there + // We enhance the crash ping message by emitting: // - then message if any // - then siginfo if any - // - then the kind if any + // - then kind // - then metadata emit_config(pipe, config_str)?; emit_message(pipe, message_ptr)?; - emit_siginfo(pipe, sig_info)?; - emit_kind(pipe, &ErrorKind::UnixSignal)?; + if let Some(sig_info) = sig_info { + emit_siginfo(pipe, sig_info)?; + } + emit_kind(pipe, &kind)?; emit_metadata(pipe, metadata_string)?; // after the metadata the ping should have been sent - emit_ucontext(pipe, ucontext)?; + + if let Some(ucontext) = ucontext { + emit_ucontext(pipe, ucontext)?; + } + emit_procinfo(pipe, ppid, crashing_tid)?; emit_counters(pipe)?; emit_spans(pipe)?; @@ -168,15 +181,23 @@ pub(crate) fn emit_crashreport( #[cfg(target_os = "linux")] emit_proc_self_maps(pipe)?; + if let Some(stacktrace) = stacktrace { + emit_complete_stacktrace(pipe, stacktrace)?; + } + // Getting a backtrace on rust is not guaranteed to be signal safe // https://github.com/rust-lang/backtrace-rs/issues/414 // let current_backtrace = backtrace::Backtrace::new(); // In fact, if we look into the code here, we see mallocs. // https://doc.rust-lang.org/src/std/backtrace.rs.html#332 // Do this last, so even if it crashes, we still get the other info. - if config.resolve_frames() != StacktraceCollection::Disabled { + + // If we have a ucontext, we can emit stacktrace + if let Some(ucontext) = ucontext { let fault_ip = extract_ip(ucontext); - unsafe { emit_backtrace_by_frames(pipe, config.resolve_frames(), fault_ip)? }; + if config.resolve_frames() != StacktraceCollection::Disabled { + unsafe { emit_backtrace_by_frames(pipe, config.resolve_frames(), fault_ip)? }; + } } if is_runtime_callback_registered() { @@ -189,6 +210,17 @@ pub(crate) fn emit_crashreport( Ok(()) } +pub fn emit_complete_stacktrace( + w: &mut impl Write, + stacktrace: StackTrace, +) -> Result<(), EmitterError> { + writeln!(w, "{DD_CRASHTRACK_BEGIN_COMPLETE_STACKTRACE}")?; + writeln!(w, "{}", serde_json::to_string(&stacktrace)?)?; + writeln!(w, "{DD_CRASHTRACK_END_COMPLETE_STACKTRACE}")?; + w.flush()?; + Ok(()) +} + fn emit_config(w: &mut impl Write, config_str: &str) -> Result<(), EmitterError> { writeln!(w, "{DD_CRASHTRACK_BEGIN_CONFIG}")?; writeln!(w, "{config_str}")?; @@ -425,6 +457,8 @@ fn extract_ip(ucontext: *const ucontext_t) -> usize { #[cfg(test)] mod tests { + use crate::StackFrame; + use super::*; use std::str; @@ -446,6 +480,38 @@ mod tests { buf } + #[test] + #[cfg_attr(miri, ignore)] + fn test_emit_complete_stacktrace() { + // new_incomplete() starts with incomplete: true, which push_frame requires + let mut stacktrace = StackTrace::new_incomplete(); + let mut stackframe1 = StackFrame::new(); + stackframe1.with_ip(1234); + stackframe1.with_function("test_function1".to_string()); + stackframe1.with_file("test_file1".to_string()); + + let mut stackframe2 = StackFrame::new(); + stackframe2.with_ip(5678); + stackframe2.with_function("test_function2".to_string()); + stackframe2.with_file("test_file2".to_string()); + + stacktrace.push_frame(stackframe1, true).unwrap(); + stacktrace.push_frame(stackframe2, true).unwrap(); + + stacktrace.set_complete().unwrap(); + + let mut buf = Vec::new(); + emit_complete_stacktrace(&mut buf, stacktrace).expect("to work ;-)"); + let out = str::from_utf8(&buf).expect("to be valid UTF8"); + + assert!(out.contains("\"ip\":\"0x4d2\"")); + assert!(out.contains("\"function\":\"test_function1\"")); + assert!(out.contains("\"file\":\"test_file1\"")); + assert!(out.contains("\"ip\":\"0x162e\"")); + assert!(out.contains("\"function\":\"test_function2\"")); + assert!(out.contains("\"file\":\"test_file2\"")); + } + #[test] #[cfg_attr(miri, ignore)] fn test_emit_backtrace_disabled() { diff --git a/libdd-crashtracker/src/collector/mod.rs b/libdd-crashtracker/src/collector/mod.rs index 9fe75d1c86..5a67d90978 100644 --- a/libdd-crashtracker/src/collector/mod.rs +++ b/libdd-crashtracker/src/collector/mod.rs @@ -19,5 +19,7 @@ pub use additional_tags::{ }; pub use api::*; pub use counters::{begin_op, end_op, reset_counters, OpTypes}; -pub use crash_handler::{disable, enable, update_config, update_metadata}; +pub use crash_handler::{ + disable, enable, report_unhandled_exception, update_config, update_metadata, +}; pub use spans::{clear_spans, clear_traces, insert_span, insert_trace, remove_span, remove_trace}; diff --git a/libdd-crashtracker/src/lib.rs b/libdd-crashtracker/src/lib.rs index 4d0fd5626b..28a43109ff 100644 --- a/libdd-crashtracker/src/lib.rs +++ b/libdd-crashtracker/src/lib.rs @@ -76,7 +76,8 @@ pub use collector::{ begin_op, clear_additional_tags, clear_spans, clear_traces, consume_and_emit_additional_tags, default_signals, disable, enable, end_op, init, insert_additional_tag, insert_span, insert_trace, on_fork, reconfigure, remove_additional_tag, remove_span, remove_trace, - reset_counters, update_config, update_metadata, OpTypes, DEFAULT_SYMBOLS, + report_unhandled_exception, reset_counters, update_config, update_metadata, OpTypes, + DEFAULT_SYMBOLS, }; #[cfg(all(windows, feature = "collector_windows"))] diff --git a/libdd-crashtracker/src/receiver/receive_report.rs b/libdd-crashtracker/src/receiver/receive_report.rs index 9dc6ea6544..8ef28535ce 100644 --- a/libdd-crashtracker/src/receiver/receive_report.rs +++ b/libdd-crashtracker/src/receiver/receive_report.rs @@ -7,7 +7,7 @@ use crate::{ }, runtime_callback::RuntimeStack, shared::constants::*, - CrashtrackerConfiguration, + CrashtrackerConfiguration, StackTrace, }; use anyhow::Context; @@ -105,6 +105,7 @@ impl From for StackFrame { pub(crate) enum StdinState { AdditionalTags, Config, + CompleteStackTrace, Counters, Done, File(String, Vec), @@ -156,6 +157,17 @@ fn process_line( StdinState::Config } + StdinState::CompleteStackTrace + if line.starts_with(DD_CRASHTRACK_END_COMPLETE_STACKTRACE) => + { + StdinState::Waiting + } + StdinState::CompleteStackTrace => { + let stacktrace: StackTrace = serde_json::from_str(line)?; + builder.with_stack(stacktrace)?; + StdinState::CompleteStackTrace + } + StdinState::Counters if line.starts_with(DD_CRASHTRACK_END_COUNTERS) => StdinState::Waiting, StdinState::Counters => { let v: serde_json::Value = serde_json::from_str(line)?; @@ -312,6 +324,9 @@ fn process_line( StdinState::AdditionalTags } StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_CONFIG) => StdinState::Config, + StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_COMPLETE_STACKTRACE) => { + StdinState::CompleteStackTrace + } StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_COUNTERS) => { StdinState::Counters } diff --git a/libdd-crashtracker/src/shared/constants.rs b/libdd-crashtracker/src/shared/constants.rs index 207bf533b9..f0f3470ed2 100644 --- a/libdd-crashtracker/src/shared/constants.rs +++ b/libdd-crashtracker/src/shared/constants.rs @@ -5,6 +5,7 @@ use std::time::Duration; pub const DD_CRASHTRACK_BEGIN_ADDITIONAL_TAGS: &str = "DD_CRASHTRACK_BEGIN_ADDITIONAL_TAGS"; pub const DD_CRASHTRACK_BEGIN_CONFIG: &str = "DD_CRASHTRACK_BEGIN_CONFIG"; +pub const DD_CRASHTRACK_BEGIN_COMPLETE_STACKTRACE: &str = "DD_CRASHTRACK_BEGIN_COMPLETE_STACKTRACE"; pub const DD_CRASHTRACK_BEGIN_COUNTERS: &str = "DD_CRASHTRACK_BEGIN_COUNTERS"; pub const DD_CRASHTRACK_BEGIN_FILE: &str = "DD_CRASHTRACK_BEGIN_FILE"; pub const DD_CRASHTRACK_BEGIN_KIND: &str = "DD_CRASHTRACK_BEGIN_KIND"; @@ -23,6 +24,7 @@ pub const DD_CRASHTRACK_BEGIN_MESSAGE: &str = "DD_CRASHTRACK_BEGIN_MESSAGE"; pub const DD_CRASHTRACK_DONE: &str = "DD_CRASHTRACK_DONE"; pub const DD_CRASHTRACK_END_ADDITIONAL_TAGS: &str = "DD_CRASHTRACK_END_ADDITIONAL_TAGS"; pub const DD_CRASHTRACK_END_CONFIG: &str = "DD_CRASHTRACK_END_CONFIG"; +pub const DD_CRASHTRACK_END_COMPLETE_STACKTRACE: &str = "DD_CRASHTRACK_END_COMPLETE_STACKTRACE"; pub const DD_CRASHTRACK_END_COUNTERS: &str = "DD_CRASHTRACK_END_COUNTERS"; pub const DD_CRASHTRACK_END_FILE: &str = "DD_CRASHTRACK_END_FILE"; pub const DD_CRASHTRACK_END_KIND: &str = "DD_CRASHTRACK_END_KIND";