diff --git a/datadog-crashtracker-ffi/src/crash_info/api.rs b/datadog-crashtracker-ffi/src/crash_info/api.rs index fca027aa8a..838f4e432f 100644 --- a/datadog-crashtracker-ffi/src/crash_info/api.rs +++ b/datadog-crashtracker-ffi/src/crash_info/api.rs @@ -64,6 +64,31 @@ pub unsafe extern "C" fn ddog_crasht_CrashInfo_resolve_names( }) } +/// # Safety +/// The `crash_info` can be null, but if non-null it must point to a Builder made by this module, +/// which has not previously been dropped. +/// This function will: +// - resolve frame names +// - normalize IPs +// Difference between this function and the two functions above is that this function +// is tailored for performance. It makes sure that required resources between normalize_ips and +// resolve_names are shared and reused, which is not the case when calling the two functions +// separately. +// While this is more efficient, it means that you are forced to do both operations, and you are not +// able to do only one of them. +#[no_mangle] +#[must_use] +#[named] +#[cfg(unix)] +pub unsafe extern "C" fn ddog_crasht_CrashInfo_enrich_callstacks( + mut crash_info: *mut Handle, + pid: u32, +) -> VoidResult { + wrap_with_void_ffi_result!({ + crash_info.to_inner_mut()?.enrich_callstacks(pid)?; + }) +} + /// # Safety /// The `crash_info` can be null, but if non-null it must point to a Builder made by this module, /// which has not previously been dropped. diff --git a/datadog-crashtracker/src/crash_info/error_data.rs b/datadog-crashtracker/src/crash_info/error_data.rs index f7124b0b96..574fcff308 100644 --- a/datadog-crashtracker/src/crash_info/error_data.rs +++ b/datadog-crashtracker/src/crash_info/error_data.rs @@ -9,6 +9,8 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; #[cfg(unix)] use std::path::PathBuf; +#[cfg(unix)] +use std::rc::Rc; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct ErrorData { @@ -23,49 +25,81 @@ pub struct ErrorData { } #[cfg(unix)] -#[derive(Default)] -pub struct CachedElfResolvers { - elf_resolvers: HashMap, +pub struct CachedElfResolvers<'a> { + symbolizer: &'a mut blazesym::symbolize::Symbolizer, + elf_resolvers: HashMap>, } #[cfg(unix)] -impl CachedElfResolvers { - pub fn get(&mut self, file_path: &PathBuf) -> anyhow::Result<&ElfResolver> { +impl<'a> CachedElfResolvers<'a> { + pub fn new(symbolizer: &'a mut blazesym::symbolize::Symbolizer) -> Self { + Self { + symbolizer, + elf_resolvers: HashMap::new(), + } + } + + pub fn get_or_insert(&mut self, file_path: &PathBuf) -> anyhow::Result> { use anyhow::Context; - if !self.elf_resolvers.contains_key(file_path.as_path()) { - let resolver = ElfResolver::open(file_path).with_context(|| { - format!( - "ElfResolver::open failed for '{}'", - file_path.to_string_lossy() - ) - })?; - self.elf_resolvers.insert(file_path.clone(), resolver); + let entry = self.elf_resolvers.entry(file_path.clone()); + + match entry { + std::collections::hash_map::Entry::Occupied(o) => Ok(o.get().clone()), + std::collections::hash_map::Entry::Vacant(v) => { + let resolver = ElfResolver::open(file_path).with_context(|| { + format!( + "ElfResolver::open failed for '{}'", + file_path.to_string_lossy() + ) + }); + + match resolver { + Ok(resolver) => { + let resolver = Rc::new(resolver); + // even if the symbolizer failed at registering the elf resolver, we still + // cache it to avoid trying to open it again + let _ = self + .symbolizer + .register_elf_resolver(file_path.as_path(), Rc::clone(&resolver)); + v.insert(Rc::clone(&resolver)); + Ok(resolver) + } + Err(e) => Err(e), + } + } } - self.elf_resolvers - .get(file_path.as_path()) - .with_context(|| "key '{}' not found in ElfResolver cache") } } #[cfg(unix)] impl ErrorData { pub fn normalize_ips(&mut self, pid: u32) -> anyhow::Result<()> { - let mut errors = 0; - let mut elf_resolvers = CachedElfResolvers::default(); + let mut symbolizer = blazesym::symbolize::Symbolizer::new(); + let mut elf_resolvers = CachedElfResolvers::new(&mut symbolizer); let normalizer = blazesym::normalize::Normalizer::builder() .enable_vma_caching(true) .enable_build_ids(true) .enable_build_id_caching(true) .build(); + self.normalize_ips_impl(pid, &normalizer, &mut elf_resolvers) + } + + pub(crate) fn normalize_ips_impl( + &mut self, + pid: u32, + normalizer: &blazesym::normalize::Normalizer, + elf_resolvers: &mut CachedElfResolvers, + ) -> anyhow::Result<()> { + let mut errors = 0; let pid = pid.into(); self.stack - .normalize_ips(&normalizer, pid, &mut elf_resolvers) + .normalize_ips(normalizer, pid, elf_resolvers) .unwrap_or_else(|_| errors += 1); for thread in &mut self.threads { thread .stack - .normalize_ips(&normalizer, pid, &mut elf_resolvers) + .normalize_ips(normalizer, pid, elf_resolvers) .unwrap_or_else(|_| errors += 1); } anyhow::ensure!( @@ -75,21 +109,43 @@ impl ErrorData { Ok(()) } - pub fn resolve_names(&mut self, pid: u32) -> anyhow::Result<()> { - let mut errors = 0; + pub(crate) fn create_symbolizer_source<'a>( + pid: u32, + ) -> blazesym::symbolize::source::Source<'a> { let mut process = blazesym::symbolize::source::Process::new(pid.into()); // https://github.com/libbpf/blazesym/issues/518 process.map_files = false; - let src = blazesym::symbolize::source::Source::Process(process); + blazesym::symbolize::source::Source::Process(process) + } + + pub(crate) fn create_normalizer() -> blazesym::normalize::Normalizer { + blazesym::normalize::Normalizer::builder() + .enable_vma_caching(true) + .enable_build_ids(true) + .enable_build_id_caching(true) + .build() + } + + pub fn resolve_names(&mut self, pid: u32) -> anyhow::Result<()> { + let src = Self::create_symbolizer_source(pid); let symbolizer = blazesym::symbolize::Symbolizer::new(); + self.resolve_names_impl(&symbolizer, &src) + } + + pub(crate) fn resolve_names_impl( + &mut self, + symbolizer: &blazesym::symbolize::Symbolizer, + src: &blazesym::symbolize::source::Source, + ) -> anyhow::Result<()> { + let mut errors = 0; self.stack - .resolve_names(&src, &symbolizer) + .resolve_names(src, symbolizer) .unwrap_or_else(|_| errors += 1); for thread in &mut self.threads { thread .stack - .resolve_names(&src, &symbolizer) + .resolve_names(src, symbolizer) .unwrap_or_else(|_| errors += 1); } anyhow::ensure!( diff --git a/datadog-crashtracker/src/crash_info/mod.rs b/datadog-crashtracker/src/crash_info/mod.rs index 9c6e1d5480..41bf1a8e0c 100644 --- a/datadog-crashtracker/src/crash_info/mod.rs +++ b/datadog-crashtracker/src/crash_info/mod.rs @@ -79,6 +79,25 @@ impl CrashInfo { pub fn resolve_names(&mut self, pid: u32) -> anyhow::Result<()> { self.error.resolve_names(pid) } + + pub fn enrich_callstacks(&mut self, pid: u32) -> anyhow::Result<()> { + let src = ErrorData::create_symbolizer_source(pid); + let normalizer = ErrorData::create_normalizer(); + let mut symbolizer = blazesym::symbolize::Symbolizer::new(); + let mut elf_resolvers = CachedElfResolvers::new(&mut symbolizer); + + // We must call ips normalization first. + // This will allow us to feed the symbolizer with the ELF Resolvers + let rval1 = self + .error + .normalize_ips_impl(pid, &normalizer, &mut elf_resolvers); + let rval2 = self.error.resolve_names_impl(&symbolizer, &src); + anyhow::ensure!( + rval1.is_ok() && rval2.is_ok(), + "normalize_ips: {rval1:?}\tresolve_names: {rval2:?}" + ); + Ok(()) + } } impl CrashInfo { diff --git a/datadog-crashtracker/src/crash_info/stacktrace.rs b/datadog-crashtracker/src/crash_info/stacktrace.rs index 1327d35d0a..7d02479a1b 100644 --- a/datadog-crashtracker/src/crash_info/stacktrace.rs +++ b/datadog-crashtracker/src/crash_info/stacktrace.rs @@ -195,7 +195,7 @@ impl StackFrame { let (file_offset, meta_idx) = normed.outputs[0]; let meta = &normed.meta[meta_idx]; let elf = meta.as_elf().context("Not elf")?; - let resolver = elf_resolvers.get(&elf.path)?; + let resolver = elf_resolvers.get_or_insert(&elf.path)?; let virt_address = resolver .file_offset_to_virt_offset(file_offset)? .context("No matching segment found")?; @@ -412,12 +412,13 @@ mod unix_test { let mut frame = StackFrame::new(); frame.ip = Some(address); + let mut symbolizer = Symbolizer::new(); let normalizer = Normalizer::new(); frame .normalize_ip( &normalizer, Pid::from(std::process::id()), - &mut CachedElfResolvers::default(), + &mut CachedElfResolvers::new(&mut symbolizer), ) .unwrap(); @@ -446,12 +447,13 @@ mod unix_test { let mut frame = StackFrame::new(); frame.ip = Some(address); + let mut symbolizer = blazesym::symbolize::Symbolizer::new(); let normalizer = Normalizer::new(); frame .normalize_ip( &normalizer, Pid::from(std::process::id()), - &mut CachedElfResolvers::default(), + &mut CachedElfResolvers::new(&mut symbolizer), ) .unwrap(); diff --git a/datadog-crashtracker/src/receiver/entry_points.rs b/datadog-crashtracker/src/receiver/entry_points.rs index 4aabcddd81..0147d81443 100644 --- a/datadog-crashtracker/src/receiver/entry_points.rs +++ b/datadog-crashtracker/src/receiver/entry_points.rs @@ -139,12 +139,7 @@ fn resolve_frames( .as_ref() .context("Unable to resolve frames: No PID specified")? .pid; - let rval1 = crash_info.resolve_names(pid); - let rval2 = crash_info.normalize_ips(pid); - anyhow::ensure!( - rval1.is_ok() && rval2.is_ok(), - "resolve_names: {rval1:?}\tnormalize_ips: {rval2:?}" - ); + crash_info.enrich_callstacks(pid)?; } Ok(()) }