diff --git a/Cargo.lock b/Cargo.lock index ea20e4c7ce..5f5787fa0f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2878,6 +2878,7 @@ dependencies = [ name = "libdd-profiling-ffi" version = "1.0.0" dependencies = [ + "allocator-api2", "anyhow", "build_common", "datadog-ffe-ffi", diff --git a/examples/ffi/build-examples.sh b/examples/ffi/build-examples.sh index 62cc12b53c..7f96df9d28 100755 --- a/examples/ffi/build-examples.sh +++ b/examples/ffi/build-examples.sh @@ -34,6 +34,6 @@ echo "Configuring example build..." cmake -S examples/ffi -B examples/ffi/build -D Datadog_ROOT=./release echo "Building examples..." -cmake --build ./examples/ffi/build +cmake --build ./examples/ffi/build --target profiles echo "Done! Example executables are available in examples/ffi/build/" diff --git a/examples/ffi/profiles.c b/examples/ffi/profiles.c index 8e8bc05668..0359166acb 100644 --- a/examples/ffi/profiles.c +++ b/examples/ffi/profiles.c @@ -6,6 +6,9 @@ #include #include +// Number of samples to add with each API +#define NUM_SAMPLES 5000000 + int main(void) { const ddog_prof_ValueType wall_time = { .type_ = DDOG_CHARSLICE_C("wall-time"), @@ -14,16 +17,27 @@ int main(void) { const ddog_prof_Slice_ValueType sample_types = {&wall_time, 1}; const ddog_prof_Period period = {wall_time, 60}; - ddog_prof_Profile_NewResult new_result = ddog_prof_Profile_new(sample_types, &period); - if (new_result.tag != DDOG_PROF_PROFILE_NEW_RESULT_OK) { - ddog_CharSlice message = ddog_Error_message(&new_result.err); - fprintf(stderr, "%.*s", (int)message.len, message.ptr); - ddog_Error_drop(&new_result.err); + // Create a ProfilesDictionary for the new API + ddog_prof_ProfilesDictionaryHandle dict = {0}; + ddog_prof_Status dict_status = ddog_prof_ProfilesDictionary_new(&dict); + if (dict_status.flags != 0) { + fprintf(stderr, "Failed to create dictionary: %s\n", dict_status.err); + ddog_prof_Status_drop(&dict_status); exit(EXIT_FAILURE); } - ddog_prof_Profile *profile = &new_result.ok; + // Create profile using the dictionary + ddog_prof_Profile profile = {0}; + ddog_prof_Status profile_status = + ddog_prof_Profile_with_dictionary(&profile, &dict, sample_types, &period); + if (profile_status.flags != 0) { + fprintf(stderr, "Failed to create profile: %s\n", profile_status.err); + ddog_prof_Status_drop(&profile_status); + ddog_prof_ProfilesDictionary_drop(&dict); + exit(EXIT_FAILURE); + } + // Original API sample ddog_prof_Location root_location = { // yes, a zero-initialized mapping is valid .mapping = (ddog_prof_Mapping){0}, @@ -44,10 +58,10 @@ int main(void) { .labels = {&label, 1}, }; - for (int i = 0; i < 10000000; i++) { + for (int i = 0; i < NUM_SAMPLES; i++) { label.num = i; - ddog_prof_Profile_Result add_result = ddog_prof_Profile_add(profile, sample, 0); + ddog_prof_Profile_Result add_result = ddog_prof_Profile_add(&profile, sample, 0); if (add_result.tag != DDOG_PROF_PROFILE_RESULT_OK) { ddog_CharSlice message = ddog_Error_message(&add_result.err); fprintf(stderr, "%.*s", (int)message.len, message.ptr); @@ -55,17 +69,96 @@ int main(void) { } } + // New API sample using the dictionary + // Insert strings into the dictionary + ddog_prof_StringId2 function_name_id, filename_id, label_key_id; + + dict_status = ddog_prof_ProfilesDictionary_insert_str( + &function_name_id, dict, DDOG_CHARSLICE_C("{main}"), DDOG_PROF_UTF8_OPTION_ASSUME); + if (dict_status.flags != 0) { + fprintf(stderr, "Failed to insert function name: %s\n", dict_status.err); + ddog_prof_Status_drop(&dict_status); + goto cleanup; + } + + dict_status = ddog_prof_ProfilesDictionary_insert_str(&filename_id, dict, + DDOG_CHARSLICE_C("/srv/example/index.php"), + DDOG_PROF_UTF8_OPTION_ASSUME); + if (dict_status.flags != 0) { + fprintf(stderr, "Failed to insert filename: %s\n", dict_status.err); + ddog_prof_Status_drop(&dict_status); + goto cleanup; + } + + dict_status = ddog_prof_ProfilesDictionary_insert_str( + &label_key_id, dict, DDOG_CHARSLICE_C("unique_counter"), DDOG_PROF_UTF8_OPTION_ASSUME); + if (dict_status.flags != 0) { + fprintf(stderr, "Failed to insert label key: %s\n", dict_status.err); + ddog_prof_Status_drop(&dict_status); + goto cleanup; + } + + // Create a function using the dictionary IDs + ddog_prof_FunctionId2 function_id; + ddog_prof_Function2 function2 = { + .name = function_name_id, + .system_name = DDOG_PROF_STRINGID2_EMPTY, + .file_name = filename_id, + }; + + dict_status = ddog_prof_ProfilesDictionary_insert_function(&function_id, dict, &function2); + if (dict_status.flags != 0) { + fprintf(stderr, "Failed to insert function: %s\n", dict_status.err); + ddog_prof_Status_drop(&dict_status); + goto cleanup; + } + + // Create a location using the dictionary IDs + ddog_prof_Location2 location2 = { + .mapping = (ddog_prof_MappingId2){0}, // null mapping is valid + .function = function_id, + .address = 0, + .line = 0, + }; + + // New API sample using dictionary IDs + ddog_prof_Label2 label2 = { + .key = label_key_id, + .str = DDOG_CHARSLICE_C(""), + .num = 0, + .num_unit = DDOG_CHARSLICE_C(""), + }; + const ddog_prof_Sample2 sample2 = { + .locations = {&location2, 1}, + .values = {&value, 1}, + .labels = {&label2, 1}, + }; + + for (int i = 0; i < NUM_SAMPLES; i++) { + label2.num = i; + + ddog_prof_Status add2_status = ddog_prof_Profile_add2(&profile, sample2, 0); + if (add2_status.flags != 0) { + fprintf(stderr, "add2 error: %s\n", add2_status.err); + ddog_prof_Status_drop(&add2_status); + } + } + // printf("Press any key to reset and drop..."); // getchar(); - ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile); +cleanup: + ; // Can't have a declaration after a label pre-C23, so use an empty statement. + ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(&profile); if (reset_result.tag != DDOG_PROF_PROFILE_RESULT_OK) { ddog_CharSlice message = ddog_Error_message(&reset_result.err); fprintf(stderr, "%.*s", (int)message.len, message.ptr); ddog_Error_drop(&reset_result.err); } - ddog_prof_Profile_drop(profile); + ddog_prof_Profile_drop(&profile); + // Drop the dictionary + ddog_prof_ProfilesDictionary_drop(&dict); return 0; -} \ No newline at end of file +} diff --git a/libdd-common/src/error.rs b/libdd-common/src/error.rs index b91ea2ed2e..5a1d695190 100644 --- a/libdd-common/src/error.rs +++ b/libdd-common/src/error.rs @@ -8,7 +8,7 @@ /// 2. It must be valid UTF-8. /// 3. It must not allocate to achieve the static bounds. /// -/// Using a c-str literal in Rust achieves all these requirements: +/// Using a c-str literal in Rust generally achieves all these requirements: /// /// ``` /// c"this string is compatible with FfiSafeErrorMessage"; diff --git a/libdd-profiling-ffi/Cargo.toml b/libdd-profiling-ffi/Cargo.toml index 41e01d1631..e63daeae4d 100644 --- a/libdd-profiling-ffi/Cargo.toml +++ b/libdd-profiling-ffi/Cargo.toml @@ -38,6 +38,7 @@ datadog-ffe-ffi = ["dep:datadog-ffe-ffi"] build_common = { path = "../build-common" } [dependencies] +allocator-api2 = { version = "0.2.21", default-features = false, features = ["alloc"] } anyhow = "1.0" libdd-data-pipeline-ffi = { path = "../libdd-data-pipeline-ffi", default-features = false, optional = true } libdd-crashtracker-ffi = { path = "../libdd-crashtracker-ffi", default-features = false, optional = true} diff --git a/libdd-profiling-ffi/cbindgen.toml b/libdd-profiling-ffi/cbindgen.toml index cc94c47795..187d1b8c8d 100644 --- a/libdd-profiling-ffi/cbindgen.toml +++ b/libdd-profiling-ffi/cbindgen.toml @@ -106,6 +106,9 @@ renaming_overrides_prefixing = true "CancellationToken" = "struct ddog_OpaqueCancellationToken" "Handle_TokioCancellationToken" = "ddog_CancellationToken" +"ArcHandle_ProfilesDictionary" = "ddog_prof_ProfilesDictionaryHandle" +"ProfileStatus" = "ddog_prof_Status" + [export.mangle] rename_types = "PascalCase" diff --git a/libdd-profiling-ffi/src/arc_handle.rs b/libdd-profiling-ffi/src/arc_handle.rs new file mode 100644 index 0000000000..44bf718685 --- /dev/null +++ b/libdd-profiling-ffi/src/arc_handle.rs @@ -0,0 +1,78 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::profile_error::ProfileError; +use crate::EmptyHandleError; +use libdd_profiling::profiles::collections::Arc; +use std::ptr::{null_mut, NonNull}; + +/// Opaque FFI handle to an `Arc`'s inner `T`. +/// +/// Safety rules for implementors/callers: +/// - Do not create multiple owning `Arc`s from the same raw pointer. +/// - Always restore the original `Arc` with `into_raw` after any `from_raw`. +/// - Use `as_inner()` to validate non-null before performing raw round-trips. +/// +/// From Rust, use [`ArcHandle::try_clone`] to make a reference-counted copy. +/// From the C FFI, the handle should probably be renamed to avoid generics +/// bloat garbage, and a *_try_clone API should be provided. +/// +/// Use [`ArcHandle::drop_resource`] to drop the resource and move this handle +/// into the empty handle state, which is the default state. +#[repr(transparent)] +#[derive(Debug)] +pub struct ArcHandle(*mut T); + +impl Default for ArcHandle { + fn default() -> Self { + Self(null_mut()) + } +} + +impl ArcHandle { + /// Constructs a new handle by allocating an `ArcHandle` and returning + /// its inner pointer as a handle. + /// + /// Returns OutOfMemory on allocation failure. + pub fn new(value: T) -> Result { + let arc = Arc::try_new(value)?; + let ptr = Arc::into_raw(arc).as_ptr(); + Ok(Self(ptr)) + } + + pub fn try_clone_into_arc(&self) -> Result, ProfileError> { + let clone = self.try_clone()?; + // SAFETY: try_clone succeeded so it must not be null. + let nn = unsafe { NonNull::new_unchecked(clone.0) }; + // SAFETY: validated that it isn't null, should otherwise be an Arc. + Ok(unsafe { Arc::from_raw(nn) }) + } + + #[inline] + pub fn as_inner(&self) -> Result<&T, EmptyHandleError> { + unsafe { self.0.as_ref() }.ok_or(EmptyHandleError) + } + + /// Tries to clone the resource this handle points to, and returns a new + /// handle to it. + pub fn try_clone(&self) -> Result { + let nn = NonNull::new(self.0).ok_or(EmptyHandleError)?; + // SAFETY: ArcHandle uses a pointer to T as its repr, and as long as + // callers have upheld safety requirements elsewhere, including the + // FFI, then there will be a valid object with refcount > 0. + unsafe { Arc::try_increment_count(nn.as_ptr())? }; + Ok(Self(self.0)) + } + + /// Drops the resource that this handle refers to. It will remain alive if + /// there are other handles to the resource which were created by + /// successful calls to try_clone. This handle will now be empty and + /// operations on it will fail. + pub fn drop_resource(&mut self) { + // pointers aren't default until Rust 1.88. + let ptr = core::mem::replace(&mut self.0, null_mut()); + if let Some(nn) = NonNull::new(ptr) { + drop(unsafe { Arc::from_raw(nn) }); + } + } +} diff --git a/libdd-profiling-ffi/src/lib.rs b/libdd-profiling-ffi/src/lib.rs index 067de9dbae..aac70dab2c 100644 --- a/libdd-profiling-ffi/src/lib.rs +++ b/libdd-profiling-ffi/src/lib.rs @@ -7,10 +7,17 @@ #![cfg_attr(not(test), deny(clippy::todo))] #![cfg_attr(not(test), deny(clippy::unimplemented))] +mod arc_handle; mod exporter; +mod profile_error; +mod profile_status; mod profiles; mod string_storage; +pub use arc_handle::*; +pub use profile_error::*; +pub use profile_status::*; + #[cfg(all(feature = "symbolizer", not(target_os = "windows")))] pub use symbolizer_ffi::*; diff --git a/libdd-profiling-ffi/src/profile_error.rs b/libdd-profiling-ffi/src/profile_error.rs new file mode 100644 index 0000000000..fd4ea417ce --- /dev/null +++ b/libdd-profiling-ffi/src/profile_error.rs @@ -0,0 +1,157 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::profile_status::{string_try_shrink_to_fit, ProfileStatus}; +use libdd_common::error::FfiSafeErrorMessage; +use libdd_common_ffi::slice::SliceConversionError; +use libdd_profiling::profiles::collections::{ArcOverflow, SetError}; +use libdd_profiling::profiles::FallibleStringWriter; +use std::borrow::Cow; +use std::ffi::{CStr, CString}; +use std::fmt; +use std::io::ErrorKind; + +/// Represents errors which can occur in the profiling FFI. Its main purpose +/// is to hold a more Rust-friendly version of [`ProfileStatus`]. +#[derive(Debug)] +pub enum ProfileError { + AllocError, + CapacityOverflow, + ReferenceCountOverflow, + + Other(Cow<'static, CStr>), +} + +/// Represents an error that means the handle is empty, meaning it doesn't +/// point to a resource. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct EmptyHandleError; + +impl From<&'static CStr> for ProfileError { + fn from(s: &'static CStr) -> ProfileError { + Self::Other(Cow::Borrowed(s)) + } +} + +impl From for ProfileError { + fn from(s: CString) -> ProfileError { + Self::Other(Cow::Owned(s)) + } +} + +impl From for Cow<'static, CStr> { + fn from(err: ProfileError) -> Cow<'static, CStr> { + match err { + ProfileError::AllocError => Cow::Borrowed(c"memory allocation failed because the memory allocator returned an error"), + ProfileError::CapacityOverflow => Cow::Borrowed(c"memory allocation failed because the computed capacity exceeded the collection's maximum"), + ProfileError::ReferenceCountOverflow => Cow::Borrowed(c"reference count overflow"), + ProfileError::Other(msg) => msg, + } + } +} + +impl From for ProfileStatus { + fn from(err: ProfileError) -> ProfileStatus { + let cow = >::from(err); + match cow { + Cow::Borrowed(borrowed) => ProfileStatus::from(borrowed), + Cow::Owned(owned) => ProfileStatus::from(owned), + } + } +} + +impl From for ProfileError { + fn from(_: ArcOverflow) -> ProfileError { + ProfileError::ReferenceCountOverflow + } +} + +impl From for ProfileError { + fn from(err: allocator_api2::collections::TryReserveError) -> ProfileError { + match err.kind() { + allocator_api2::collections::TryReserveErrorKind::CapacityOverflow => { + ProfileError::CapacityOverflow + } + allocator_api2::collections::TryReserveErrorKind::AllocError { .. } => { + ProfileError::AllocError + } + } + } +} + +impl From for ProfileError { + fn from(_: allocator_api2::alloc::AllocError) -> ProfileError { + ProfileError::AllocError + } +} + +impl From for ProfileError { + fn from(_: std::collections::TryReserveError) -> ProfileError { + // We just assume it's out of memory since kind isn't stable. + ProfileError::AllocError + } +} + +impl From for ProfileError { + fn from(err: SetError) -> ProfileError { + ProfileError::Other(Cow::Borrowed(err.as_ffi_str())) + } +} + +impl From for ProfileError { + fn from(err: EmptyHandleError) -> ProfileError { + ProfileError::from(err.as_ffi_str()) + } +} + +impl From for ProfileError { + fn from(err: SliceConversionError) -> ProfileError { + ProfileError::from(err.as_ffi_str()) + } +} + +/// # Safety +/// +/// Uses c-str literal to ensure valid UTF-8 and null termination. +unsafe impl FfiSafeErrorMessage for EmptyHandleError { + fn as_ffi_str(&self) -> &'static CStr { + c"handle used with an interior null pointer" + } +} + +impl fmt::Display for EmptyHandleError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_rust_str().fmt(f) + } +} + +impl core::error::Error for EmptyHandleError {} + +impl From for ProfileError { + fn from(err: std::io::Error) -> ProfileError { + match err.kind() { + ErrorKind::StorageFull => ProfileError::CapacityOverflow, + ErrorKind::WriteZero | ErrorKind::OutOfMemory => ProfileError::AllocError, + e => { + let mut writer = FallibleStringWriter::new(); + use core::fmt::Write; + // Add null terminator that from_vec_with_nul expects. + if write!(&mut writer, "{e}\0").is_ok() { + return ProfileError::Other(Cow::Borrowed( + c"memory allocation failed while trying to create an error message", + )); + } + let mut string = String::from(writer); + // We do this to avoid the potential panic case of failed + // allocation in CString::from_vec_with_nul. + if string_try_shrink_to_fit(&mut string).is_err() { + return ProfileError::Other(Cow::Borrowed(c"memory allocation failed while trying to shrink a vec to create an error message")); + } + match CString::from_vec_with_nul(string.into_bytes()) { + Ok(cstring) => ProfileError::Other(Cow::Owned(cstring)), + Err(_) => ProfileError::Other(Cow::Borrowed(c"encountered an interior null byte while converting a std::io::Error into a ProfileError")) + } + } + } + } +} diff --git a/libdd-profiling-ffi/src/profile_status.rs b/libdd-profiling-ffi/src/profile_status.rs new file mode 100644 index 0000000000..9c4d8c59da --- /dev/null +++ b/libdd-profiling-ffi/src/profile_status.rs @@ -0,0 +1,275 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use allocator_api2::alloc::{AllocError, Allocator, Global, Layout}; +use libdd_profiling::profiles::FallibleStringWriter; +use std::borrow::Cow; +use std::ffi::{c_char, CStr, CString}; +use std::fmt::Display; +use std::hint::unreachable_unchecked; +use std::mem::ManuallyDrop; +use std::ptr::{null, NonNull}; + +const FLAG_OK: usize = 0b00; +const FLAG_STATIC: usize = 0b01; +const FLAG_ALLOCATED: usize = 0b11; + +const MASK_IS_ERROR: usize = 0b01; +const MASK_IS_ALLOCATED: usize = 0b10; +const MASK_UNUSED: usize = !(MASK_IS_ERROR | MASK_IS_ALLOCATED); + +/// Represents the result of an operation that either succeeds with no value, +/// or fails with an error message. This is like `Result<(), Cow` except +/// its representation is smaller, and is FFI-stable. +/// +/// The OK status is guaranteed to have a representation of `{ 0, null }`. +#[repr(C)] +#[derive(Debug)] +pub struct ProfileStatus { + /// 0 means okay, everything else is opaque in C. + /// In Rust, the bits help us know whether it is heap allocated or not. + pub flags: libc::size_t, + /// If not null, this is a pointer to a valid null-terminated string in + /// UTF-8 encoding. + /// This is null if `flags` == 0. + pub err: *const c_char, +} + +impl Default for ProfileStatus { + fn default() -> Self { + Self { + flags: 0, + err: null(), + } + } +} + +unsafe impl Send for ProfileStatus {} +unsafe impl Sync for ProfileStatus {} + +impl From> for ProfileStatus +where + ProfileStatus: From, +{ + fn from(result: Result<(), E>) -> Self { + match result { + Ok(_) => ProfileStatus::OK, + Err(err) => ProfileStatus::from(err), + } + } +} + +impl From for ProfileStatus { + fn from(err: anyhow::Error) -> ProfileStatus { + ProfileStatus::from_error(err) + } +} + +impl From<&'static CStr> for ProfileStatus { + fn from(value: &'static CStr) -> Self { + Self { + flags: FLAG_STATIC, + err: value.as_ptr(), + } + } +} + +impl From for ProfileStatus { + fn from(cstring: CString) -> Self { + Self { + flags: FLAG_ALLOCATED, + err: cstring.into_raw(), + } + } +} + +impl TryFrom for CString { + type Error = usize; + + fn try_from(status: ProfileStatus) -> Result { + if status.flags == FLAG_ALLOCATED { + Ok(unsafe { CString::from_raw(status.err.cast_mut()) }) + } else { + Err(status.flags) + } + } +} + +impl TryFrom<&ProfileStatus> for &CStr { + type Error = usize; + + fn try_from(status: &ProfileStatus) -> Result { + if status.flags != FLAG_OK { + Ok(unsafe { CStr::from_ptr(status.err.cast_mut()) }) + } else { + Err(status.flags) + } + } +} + +impl From for Result<(), Cow<'static, CStr>> { + fn from(status: ProfileStatus) -> Self { + let flags = status.flags; + let is_error = (flags & MASK_IS_ERROR) != 0; + let is_allocated = (flags & MASK_IS_ALLOCATED) != 0; + #[allow(clippy::panic)] + if cfg!(debug_assertions) && (status.flags & MASK_UNUSED) != 0 { + panic!("invalid bit pattern: {flags:b}"); + } + match (is_allocated, is_error) { + (false, false) => Ok(()), + (false, true) => Err(Cow::Borrowed(unsafe { CStr::from_ptr(status.err) })), + (true, true) => Err(Cow::Owned(unsafe { + CString::from_raw(status.err.cast_mut()) + })), + (true, false) => { + #[allow(clippy::panic)] + if cfg!(debug_assertions) { + panic!("invalid bit pattern: {flags:b}"); + } + unsafe { unreachable_unchecked() } + } + } + } +} + +impl From<()> for ProfileStatus { + fn from(_: ()) -> Self { + Self::OK + } +} + +/// Tries to shrink a vec to exactly fit its length. +/// On success, the vector's capacity equals its length. +/// Returns an allocation error if the allocator cannot shrink. +fn vec_try_shrink_to_fit(vec: &mut Vec) -> Result<(), AllocError> { + let len = vec.len(); + if vec.capacity() == len || core::mem::size_of::() == 0 { + return Ok(()); + } + + // Take ownership temporarily to manipulate raw parts; put an empty vec + // in its place. + let mut md = ManuallyDrop::new(core::mem::take(vec)); + + // Avoid len=0 case for allocators by dropping the allocation and replacing + // it with a new empty vec. + if len == 0 { + // SAFETY: we have exclusive access, and we're not exposing the zombie + // bits to safe code since we're just returning (original vec was + // replaced by an empty vec). + unsafe { ManuallyDrop::drop(&mut md) }; + return Ok(()); + } + + let ptr = md.as_mut_ptr(); + let cap = md.capacity(); + + // SAFETY: Vec invariants ensure `cap >= len`, and capacity/len fit isize. + let old_layout = unsafe { Layout::array::(cap).unwrap_unchecked() }; + let new_layout = unsafe { Layout::array::(len).unwrap_unchecked() }; + + // SAFETY: `ptr` is non-null and properly aligned for T (Vec invariant). + let old_ptr_u8 = unsafe { NonNull::new_unchecked(ptr.cast::()) }; + + match unsafe { Global.shrink(old_ptr_u8, old_layout, new_layout) } { + Ok(new_ptr_u8) => { + let new_ptr = new_ptr_u8.as_ptr().cast::(); + // SAFETY: new allocation valid for len Ts; capacity == len. + let new_vec = unsafe { Vec::from_raw_parts(new_ptr, len, len) }; + *vec = new_vec; + Ok(()) + } + Err(_) => { + // Reconstruct original and put it back; report OOM. + let orig = unsafe { Vec::from_raw_parts(ptr, len, cap) }; + *vec = orig; + Err(AllocError) + } + } +} + +pub(crate) fn string_try_shrink_to_fit(string: &mut String) -> Result<(), AllocError> { + // Take ownership to get access to the backing Vec. + let mut bytes = core::mem::take(string).into_bytes(); + let res = vec_try_shrink_to_fit(&mut bytes); + // SAFETY: bytes came from a valid UTF-8 String and were not mutated. + *string = unsafe { String::from_utf8_unchecked(bytes) }; + res +} + +impl ProfileStatus { + pub const OK: ProfileStatus = ProfileStatus { + flags: FLAG_OK, + err: null(), + }; + + const OUT_OF_MEMORY: ProfileStatus = ProfileStatus { + flags: FLAG_STATIC, + err: c"out of memory while trying to display error".as_ptr(), + }; + const NULL_BYTE_IN_ERROR_MESSAGE: ProfileStatus = ProfileStatus { + flags: FLAG_STATIC, + err: c"another error occured, but cannot be displayed because it has interior null bytes" + .as_ptr(), + }; + + pub fn from_ffi_safe_error_message( + err: E, + ) -> Self { + ProfileStatus::from(err.as_ffi_str()) + } + + pub fn from_error(err: E) -> Self { + use core::fmt::Write; + let mut writer = FallibleStringWriter::new(); + if write!(writer, "{err}").is_err() { + return ProfileStatus::OUT_OF_MEMORY; + } + + let mut str = String::from(writer); + + // std doesn't expose memchr even though it has it, but fortunately + // libc has it, and we use the libc crate already in FFI. + let pos = unsafe { libc::memchr(str.as_ptr().cast(), 0, str.len()) }; + if !pos.is_null() { + return ProfileStatus::NULL_BYTE_IN_ERROR_MESSAGE; + } + + // Reserve memory exactly. We have to shrink later in order to turn + // it into a box, so we don't want any excess capacity. + if str.try_reserve_exact(1).is_err() { + return ProfileStatus::OUT_OF_MEMORY; + } + str.push('\0'); + + if string_try_shrink_to_fit(&mut str).is_err() { + return ProfileStatus::OUT_OF_MEMORY; + } + + // Pop the null off because CString::from_vec_unchecked adds one. + _ = str.pop(); + + // And finally, this is why we went through the pain of + // string_try_shrink_to_fit: this method will call shrink_to_fit, so + // to avoid an allocation failure here, we had to make a String with + // no excess capacity. + let cstring = unsafe { CString::from_vec_unchecked(str.into_bytes()) }; + ProfileStatus::from(cstring) + } +} + +/// Frees any error associated with the status, and replaces it with an OK. +/// +/// # Safety +/// +/// The pointer should point at a valid Status object, if it's not null. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_Status_drop(status: *mut ProfileStatus) { + if status.is_null() { + return; + } + // SAFETY: safe when the user respects ddog_prof_Status_drop's conditions. + let status = unsafe { core::ptr::replace(status, ProfileStatus::OK) }; + drop(Result::from(status)); +} diff --git a/libdd-profiling-ffi/src/profiles/datatypes.rs b/libdd-profiling-ffi/src/profiles/datatypes.rs index 51f4142e5a..cc118a102f 100644 --- a/libdd-profiling-ffi/src/profiles/datatypes.rs +++ b/libdd-profiling-ffi/src/profiles/datatypes.rs @@ -2,12 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 use crate::string_storage::{get_inner_string_storage, ManagedStringStorage}; +use crate::{ensure_non_null_out_parameter, ArcHandle, ProfileError, ProfileStatus}; use anyhow::Context; use function_name::named; use libdd_common_ffi::slice::{AsBytes, ByteSlice, CharSlice, Slice}; use libdd_common_ffi::{wrap_with_ffi_result, Error, Handle, Timespec, ToInner}; use libdd_profiling::api::{self, ManagedStringId}; -use libdd_profiling::internal; +use libdd_profiling::profiles::datatypes::{ProfilesDictionary, StringId2}; +use libdd_profiling::{api2, internal}; use std::num::NonZeroI64; use std::str::Utf8Error; use std::time::SystemTime; @@ -215,6 +217,44 @@ pub struct Sample<'a> { pub labels: Slice<'a, Label<'a>>, } +#[derive(Copy, Clone, Debug, Default)] +#[repr(C)] +pub struct Label2<'a> { + pub key: StringId2, + + /// At most one of `.str` and `.num` should not be empty. + pub str: CharSlice<'a>, + pub num: i64, + + /// Should only be present when num is present. + /// Specifies the units of num. + /// Use arbitrary string (for example, "requests") as a custom count unit. + /// If no unit is specified, consumer may apply heuristic to deduce the unit. + /// Consumers may also interpret units like "bytes" and "kilobytes" as memory + /// units and units like "seconds" and "nanoseconds" as time units, + /// and apply appropriate unit conversions to these. + pub num_unit: CharSlice<'a>, +} + +#[repr(C)] +#[derive(Copy, Clone)] +pub struct Sample2<'a> { + /// The leaf is at locations[0]. + pub locations: Slice<'a, api2::Location2>, + + /// The type and unit of each value is defined by the corresponding + /// entry in Profile.sample_type. All samples must have the same + /// number of values, the same as the length of Profile.sample_type. + /// When aggregating multiple samples into a single sample, the + /// result has a list of values that is the element-wise sum of the + /// lists of the originals. + pub values: Slice<'a, i64>, + + /// label includes additional context for this sample. It can include + /// things like a thread id, allocation size, etc + pub labels: Slice<'a, Label2<'a>>, +} + impl<'a> TryFrom<&'a Mapping<'a>> for api::Mapping<'a> { type Error = Utf8Error; @@ -400,6 +440,64 @@ pub unsafe extern "C" fn ddog_prof_Profile_new( profile_new(sample_types, period, None) } +/// Create a new profile with the given sample types. Must call +/// `ddog_prof_Profile_drop` when you are done with the profile. +/// +/// # Arguments +/// * `out` - a non-null pointer to an uninitialized Profile. +/// * `dict`: a valid reference to a ProfilesDictionary handle. +/// * `sample_types` +/// * `period` - Optional period of the profile. Passing None/null translates to zero values. +/// +/// # Safety +/// All slices must have pointers that are suitably aligned for their type +/// and must have the correct number of elements for the slice. +/// +/// The `dict` reference must be to a valid ProfilesDictionary handle. It may +/// be an empty handle, but it must be a valid handle. +/// +/// The `out` pointer must be non-null and suitable for pointer writes, +/// including that it has the correct size and alignment. +#[no_mangle] +#[must_use] +pub unsafe extern "C" fn ddog_prof_Profile_with_dictionary( + out: *mut Profile, + dict: &ArcHandle, + sample_types: Slice, + period: Option<&Period>, +) -> ProfileStatus { + ensure_non_null_out_parameter!(out); + match profile_with_dictionary(dict, sample_types, period) { + // SAFETY: checked that it isn't null above, the rest comes from this + // function's own safety conditions. Technically, our safety conditions + // don't require a null check, but we're being safe there. + Ok(profile) => unsafe { + out.write(profile); + ProfileStatus::OK + }, + Err(e) => ProfileStatus::from(e), + } +} + +unsafe fn profile_with_dictionary( + dict: &ArcHandle, + sample_types: Slice, + period: Option<&Period>, +) -> Result { + let sample_types = sample_types.try_as_slice()?; + let dict = dict.try_clone_into_arc()?; + + let mut types = Vec::new(); + types.try_reserve_exact(sample_types.len())?; + types.extend(sample_types.iter().map(api::ValueType::from)); + let period = period.map(Into::into); + + match internal::Profile::try_new_with_dictionary(&types, period, dict) { + Ok(ok) => Ok(Profile::new(ok)), + Err(err) => Err(ProfileError::from(err)), + } +} + /// Same as `ddog_profile_new` but also configures a `string_storage` for the profile. #[no_mangle] #[must_use] @@ -508,6 +606,42 @@ pub unsafe extern "C" fn ddog_prof_Profile_add( .context("ddog_prof_Profile_add failed") .into() } +/// # Safety +/// The `profile` ptr must point to a valid Profile object created by this +/// module. All pointers inside the `sample` need to be valid for the duration +/// of this call. +/// +/// If successful, it returns the Ok variant. +/// On error, it holds an error message in the error variant. +/// +/// This call is _NOT_ thread-safe. +#[must_use] +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_Profile_add2( + profile: *mut Profile, + sample: Sample2, + timestamp: Option, +) -> ProfileStatus { + ProfileStatus::from((|| { + let profile = profile_ptr_to_inner(profile)?; + + let locations = sample.locations.try_as_slice()?; + let values = sample.values.try_as_slice()?; + let labels = sample.labels.try_as_slice()?; + + let labels_iter = labels.iter().map(|label| -> anyhow::Result { + Ok(api2::Label { + key: label.key, + str: core::str::from_utf8(label.str.try_as_bytes()?)?, + num: label.num, + num_unit: core::str::from_utf8(label.str.try_as_bytes()?)?, + }) + }); + profile + .try_add_sample2(locations, values, labels_iter, timestamp) + .context("ddog_prof_Profile_add failed") + })()) +} pub(crate) unsafe fn profile_ptr_to_inner<'a>( profile_ptr: *mut Profile, diff --git a/libdd-profiling-ffi/src/profiles/mod.rs b/libdd-profiling-ffi/src/profiles/mod.rs index 86136a6fbf..9b2fd8b45f 100644 --- a/libdd-profiling-ffi/src/profiles/mod.rs +++ b/libdd-profiling-ffi/src/profiles/mod.rs @@ -3,3 +3,40 @@ mod datatypes; mod interning_api; +mod profiles_dictionary; +mod utf8; + +use std::ffi::CStr; + +// Shared error message helpers and null-check macros reused by FFI modules. +pub const fn null_out_param_err() -> &'static CStr { + c"null pointer used as out parameter" +} + +pub const fn null_insert_err() -> &'static CStr { + c"tried to insert a null pointer" +} + +pub const fn null_profiles_dictionary() -> &'static CStr { + c"passed a null pointer for a ProfilesDictionary" +} + +#[macro_export] +macro_rules! ensure_non_null_out_parameter { + ($expr:expr) => { + if $expr.is_null() { + return $crate::ProfileStatus::from($crate::profiles::null_out_param_err()); + } + }; +} + +#[macro_export] +macro_rules! ensure_non_null_insert { + ($expr:expr) => { + if $expr.is_null() { + return $crate::ProfileStatus::from($crate::profiles::null_insert_err()); + } + }; +} + +pub(crate) use {ensure_non_null_insert, ensure_non_null_out_parameter}; diff --git a/libdd-profiling-ffi/src/profiles/profiles_dictionary.rs b/libdd-profiling-ffi/src/profiles/profiles_dictionary.rs new file mode 100644 index 0000000000..f4eed5967a --- /dev/null +++ b/libdd-profiling-ffi/src/profiles/profiles_dictionary.rs @@ -0,0 +1,261 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::arc_handle::ArcHandle; +use crate::profile_status::ProfileStatus; +use crate::profiles::utf8::Utf8Option; +use crate::profiles::{ + ensure_non_null_insert, ensure_non_null_out_parameter, null_profiles_dictionary, +}; +use crate::ProfileError; +use libdd_common_ffi::slice::CharSlice; +use libdd_profiling::profiles::collections::StringRef; +use libdd_profiling::profiles::datatypes::{ + Function2, FunctionId2, Mapping2, MappingId2, ProfilesDictionary, StringId2, +}; + +/// A StringId that represents the empty string. +/// This is always available in every string set and can be used without +/// needing to insert it into a string set. +#[no_mangle] +pub static DDOG_PROF_STRINGID2_EMPTY: StringId2 = StringId2::EMPTY; + +/// A StringId that represents the string "end_timestamp_ns". +/// This is always available in every string set and can be used without +/// needing to insert it into a string set. +#[no_mangle] +pub static DDOG_PROF_STRINGID2_END_TIMESTAMP_NS: StringId2 = + unsafe { core::mem::transmute(StringRef::END_TIMESTAMP_NS) }; + +/// A StringId that represents the string "local root span id". +/// This is always available in every string set and can be used without +/// needing to insert it into a string set. +#[no_mangle] +pub static DDOG_PROF_STRINGID2_LOCAL_ROOT_SPAN_ID: StringId2 = + unsafe { core::mem::transmute(StringRef::LOCAL_ROOT_SPAN_ID) }; + +/// A StringId that represents the string "trace endpoint". +/// This is always available in every string set and can be used without +/// needing to insert it into a string set. +#[no_mangle] +pub static DDOG_PROF_STRINGID2_TRACE_ENDPOINT: StringId2 = + unsafe { core::mem::transmute(StringRef::TRACE_ENDPOINT) }; + +/// A StringId that represents the string "span id". +/// This is always available in every string set and can be used without +/// needing to insert it into a string set. +#[no_mangle] +pub static DDOG_PROF_STRINGID2_SPAN_ID: StringId2 = + unsafe { core::mem::transmute(StringRef::SPAN_ID) }; + +/// Allocates a new `ProfilesDictionary` and writes a handle to it in `handle`. +/// +/// # Safety +/// +/// - `handle` must be non-null and valid for writes of `ProfilesDictionaryHandle`. +/// - The returned handle must eventually drop the resource; see +/// [`ddog_prof_ProfilesDictionary_drop`] for more details. +/// - If you need a copy, use [`ddog_prof_ProfilesDictionary_try_clone`]; don't just memcpy a new +/// handle. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_new( + handle: *mut ArcHandle, +) -> ProfileStatus { + ensure_non_null_out_parameter!(handle); + ProfileStatus::from(|| -> Result<(), ProfileError> { + let dict = ProfilesDictionary::try_new()?; + let h = ArcHandle::new(dict)?; + unsafe { handle.write(h) }; + Ok(()) + }()) +} + +/// Creates a new handle to the same `ProfilesDictionary` by incrementing the +/// internal reference count. +/// +/// # Safety +/// +/// - `out` must be non-null and valid for writes of `ProfilesDictionaryHandle`. +/// - `handle` must point to a live dictionary resource. +/// - Do not duplicate handles via memcpy; always use this API to create new handles so the +/// reference count is maintained correctly. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_try_clone( + out: *mut ArcHandle, + handle: ArcHandle, +) -> ProfileStatus { + ensure_non_null_out_parameter!(out); + ProfileStatus::from(|| -> Result<(), ProfileError> { + let cloned = handle.try_clone()?; + unsafe { out.write(cloned) }; + Ok(()) + }()) +} + +/// Inserts a `Function` into the dictionary and returns its id. +/// +/// # Safety +/// +/// - `function_id` must be non-null and valid for writes of `FunctionId`. +/// - `dict` must refer to a live dictionary. +/// - `function` must be non-null and point to a valid `Function` for the duration of the call. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_insert_function( + function_id: *mut FunctionId2, + dict: Option<&ProfilesDictionary>, + function: *const Function2, +) -> ProfileStatus { + ensure_non_null_out_parameter!(function_id); + ensure_non_null_insert!(function); + ProfileStatus::from(|| -> Result<(), ProfileError> { + let dict = dict.ok_or(null_profiles_dictionary())?; + let f2: Function2 = unsafe { *function }; + let id = dict.try_insert_function2(f2)?; + unsafe { function_id.write(id) }; + Ok(()) + }()) +} + +/// Inserts a `Mapping` into the dictionary and returns its id. +/// +/// # Safety +/// +/// - `mapping_id` must be non-null and valid for writes of `MappingId`. +/// - `dict` must refer to a live dictionary. +/// - `mapping` must be non-null and point to a valid `Mapping` for the duration of the call. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_insert_mapping( + mapping_id: *mut MappingId2, + dict: Option<&ProfilesDictionary>, + mapping: *const Mapping2, +) -> ProfileStatus { + ensure_non_null_out_parameter!(mapping_id); + ensure_non_null_insert!(mapping); + ProfileStatus::from(|| -> Result<(), ProfileError> { + let dict = dict.ok_or(null_profiles_dictionary())?; + let m2 = unsafe { *mapping }; + let id = dict.try_insert_mapping2(m2)?; + unsafe { mapping_id.write(id) }; + Ok(()) + }()) +} + +/// Inserts a UTF-8 string into the dictionary string table. +/// +/// # Safety +/// +/// - `string_id` must be non-null and valid for writes of `StringId`. +/// - `handle` must refer to a live dictionary. +/// - The UTF-8 policy indicated by `utf8_option` must be respected by caller for the provided +/// `byte_slice`. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_insert_str( + string_id: *mut StringId2, + dict: Option<&ProfilesDictionary>, + byte_slice: CharSlice, + utf8_option: Utf8Option, +) -> ProfileStatus { + ensure_non_null_out_parameter!(string_id); + ProfileStatus::from(|| -> Result<(), ProfileError> { + let dict = dict.ok_or(null_profiles_dictionary())?; + crate::profiles::utf8::insert_str(dict.strings(), byte_slice, utf8_option) + .map(|id| unsafe { string_id.write(id.into()) }) + }()) +} + +/// Tries to get the string value associated with the string id. Fails if the +/// handle has been taken from, or the result param is null. +/// +/// # Safety +/// +/// 1. The lifetime of the return slice is tied to the underlying storage of the string set, make +/// sure the string set is still alive when using the returned slice. +/// 2. The string id should belong to the string set in this dictionary. Well-known strings are an +/// exception, as they exist in every set. +/// 3. The handle must represent a live profiles dictionary. Remember handles can be copied, and if +/// _any_ handle drops the resource, then all handles pointing the resource are now invalid, +/// even if though they are unaware of it. +/// 4. The result pointer must valid for [`core::ptr::write`]. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_get_str( + result: *mut CharSlice<'static>, + dict: Option<&ProfilesDictionary>, + string_id: StringId2, +) -> ProfileStatus { + ensure_non_null_out_parameter!(result); + let Some(dict) = dict else { + return ProfileStatus::from(null_profiles_dictionary()); + }; + let string_ref = StringRef::from(string_id); + // SAFETY: It's not actually safe--as indicated in the docs + // for this function, the caller needs to be sure the string + // set in the dictionary outlives the slice. + result.write(unsafe { + std::mem::transmute::, CharSlice<'static>>(CharSlice::from( + dict.strings().get(string_ref), + )) + }); + ProfileStatus::OK +} + +/// Drops the `ProfilesDictionary` that the handle owns, leaving a valid but +/// useless handle (all operations on it will error). This takes a pointer to +/// the handle to be able to modify it to leave behind an empty handle. +/// +/// # Safety +/// +/// - If non-null, `handle` must point to a valid `ProfilesDictionaryHandle`. +/// - The underlying resource must be dropped exactly once across all copies of the handle. After +/// dropping, all other copies become invalid and must not be used; they should be discarded +/// without dropping. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_drop( + handle: *mut ArcHandle, +) { + if let Some(h) = handle.as_mut() { + h.drop_resource(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::profiles::utf8::Utf8Option; + + #[test] + fn test_basics_including_drop() { + let mut handle = ArcHandle::default(); + unsafe { + Result::from(ddog_prof_ProfilesDictionary_new(&mut handle)).unwrap(); + + let mut string_id = StringId2::default(); + Result::from(ddog_prof_ProfilesDictionary_insert_str( + &mut string_id, + handle.as_inner().ok(), + CharSlice::from("void main(int, char *[])"), + Utf8Option::Assume, + )) + .unwrap(); + + let mut function_id = FunctionId2::default(); + let function = Function2 { + name: string_id, + system_name: Default::default(), + file_name: Default::default(), + }; + Result::from(ddog_prof_ProfilesDictionary_insert_function( + &mut function_id, + handle.as_inner().ok(), + &function, + )) + .unwrap(); + + let mut found = CharSlice::empty(); + let status = + ddog_prof_ProfilesDictionary_get_str(&mut found, handle.as_inner().ok(), string_id); + Result::from(status).unwrap(); + + ddog_prof_ProfilesDictionary_drop(&mut handle); + } + } +} diff --git a/libdd-profiling-ffi/src/profiles/utf8.rs b/libdd-profiling-ffi/src/profiles/utf8.rs new file mode 100644 index 0000000000..045105bfef --- /dev/null +++ b/libdd-profiling-ffi/src/profiles/utf8.rs @@ -0,0 +1,147 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::ProfileError; +use libdd_common::error::FfiSafeErrorMessage; +use libdd_common_ffi::slice::{AsBytes, CharSlice, SliceConversionError}; +use libdd_profiling::profiles::collections::{ParallelStringSet, StringRef}; +use std::borrow::Cow; +use std::collections::TryReserveError; +use std::ffi::CStr; +use std::str::Utf8Error; + +#[repr(C)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +#[allow(dead_code)] // these are made through ffi +pub enum Utf8Option { + /// The string is assumed to be valid UTF-8. If it's not, the behavior + /// is undefined. + Assume, + /// The string is converted to UTF-8 using lossy conversion. + ConvertLossy, + /// The string is validated to be UTF-8. If it's not, an error is + /// returned. + Validate, +} + +#[allow(dead_code)] +pub enum Utf8ConversionError { + OutOfMemory(TryReserveError), + SliceConversionError(SliceConversionError), + Utf8Error(Utf8Error), +} + +impl From for Utf8ConversionError { + fn from(e: TryReserveError) -> Self { + Self::OutOfMemory(e) + } +} + +impl From for Utf8ConversionError { + fn from(e: SliceConversionError) -> Self { + Self::SliceConversionError(e) + } +} + +impl From for Utf8ConversionError { + fn from(e: Utf8Error) -> Self { + Self::Utf8Error(e) + } +} + +// SAFETY: all cases are c-str literals, or delegate to the same trait. +unsafe impl FfiSafeErrorMessage for Utf8ConversionError { + fn as_ffi_str(&self) -> &'static CStr { + match self { + Utf8ConversionError::OutOfMemory(_) => c"out of memory: utf8 conversion failed", + Utf8ConversionError::SliceConversionError(err) => err.as_ffi_str(), + Utf8ConversionError::Utf8Error(_) => c"invalid input: string was not utf-8", + } + } +} + +impl Utf8Option { + /// Converts a byte slice to a UTF-8 string according to the option. + /// - Assume: Borrow without validation (caller guarantees UTF-8) + /// - ConvertLossy: Lossy conversion with fallible allocation + /// - Validate: Validate and borrow on success + /// + /// # Safety + /// + /// When [`Utf8Option::Assume`] is passed, it must be valid UTF-8. + pub unsafe fn convert(self, bytes: &[u8]) -> Result, Utf8ConversionError> { + // SAFETY: caller asserts validity under Assume + Ok(match self { + Utf8Option::Assume => Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(bytes) }), + Utf8Option::ConvertLossy => try_from_utf8_lossy(bytes)?, + Utf8Option::Validate => Cow::Borrowed(std::str::from_utf8(bytes)?), + }) + } + + /// # Safety + /// See the safety conditions on [`AsBytes::try_as_bytes`] and also + /// [`Utf8Option::convert`]; both must be upheld. + pub unsafe fn try_as_bytes_convert<'a, T: AsBytes<'a>>( + self, + t: T, + ) -> Result, Utf8ConversionError> { + let bytes = t.try_as_bytes()?; + self.convert(bytes) + } +} + +/// Tries to convert a slice of bytes to a string. The input may have invalid +/// characters. +/// +/// This is the same implementation as [`String::from_utf8_lossy`] except that +/// this uses fallible allocations. +pub fn try_from_utf8_lossy(v: &[u8]) -> Result, TryReserveError> { + let mut iter = v.utf8_chunks(); + + let first_valid = if let Some(chunk) = iter.next() { + let valid = chunk.valid(); + if chunk.invalid().is_empty() { + debug_assert_eq!(valid.len(), v.len()); + return Ok(Cow::Borrowed(valid)); + } + valid + } else { + return Ok(Cow::Borrowed("")); + }; + + const REPLACEMENT: &str = "\u{FFFD}"; + const REPLACEMENT_LEN: usize = REPLACEMENT.len(); + + let mut res = String::new(); + res.try_reserve(v.len())?; + res.push_str(first_valid); + res.try_reserve(REPLACEMENT_LEN)?; + res.push_str(REPLACEMENT); + + for chunk in iter { + let valid = chunk.valid(); + res.try_reserve(valid.len())?; + res.push_str(valid); + if !chunk.invalid().is_empty() { + res.try_reserve(REPLACEMENT_LEN)?; + res.push_str(REPLACEMENT); + } + } + + Ok(Cow::Owned(res)) +} + +pub fn insert_str( + set: &ParallelStringSet, + str: CharSlice<'_>, + utf8_options: Utf8Option, +) -> Result { + let string = unsafe { utf8_options.try_as_bytes_convert(str) }.map_err(|err| match err { + Utf8ConversionError::OutOfMemory(err) => ProfileError::from(err), + Utf8ConversionError::SliceConversionError(err) => ProfileError::from(err.as_ffi_str()), + Utf8ConversionError::Utf8Error(_) => { + ProfileError::from(c"tried to insert a non-UTF8 string into a ProfilesDictionary") + } + })?; + Ok(set.try_insert(string.as_ref())?) +} diff --git a/libdd-profiling/src/profiles/collections/error.rs b/libdd-profiling/src/profiles/collections/error.rs index 57d3da1bef..34ca01fac9 100644 --- a/libdd-profiling/src/profiles/collections/error.rs +++ b/libdd-profiling/src/profiles/collections/error.rs @@ -1,14 +1,15 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use libdd_common::error::FfiSafeErrorMessage; +use std::ffi::CStr; +use std::fmt::{Display, Formatter}; + #[repr(C)] -#[derive(Debug, thiserror::Error)] +#[derive(Debug)] pub enum SetError { - #[error("set error: invalid argument")] InvalidArgument, - #[error("set error: out of memory")] OutOfMemory, - #[error("set error: reference count overflow")] ReferenceCountOverflow, } @@ -29,3 +30,30 @@ impl From for SetError { SetError::OutOfMemory } } + +unsafe impl FfiSafeErrorMessage for SetError { + fn as_ffi_str(&self) -> &'static CStr { + match self { + SetError::InvalidArgument => c"set error: invalid argument", + SetError::OutOfMemory => c"set error: out of memory", + SetError::ReferenceCountOverflow => c"set error: reference count overflow", + } + } + + fn as_rust_str(&self) -> &'static str { + // todo: MSRV 1.87: use str::from_utf8_unchecked + match self { + SetError::InvalidArgument => "set error: invalid argument", + SetError::OutOfMemory => "set error: out of memory", + SetError::ReferenceCountOverflow => "set error: reference count overflow", + } + } +} + +impl Display for SetError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.as_rust_str().fmt(f) + } +} + +impl core::error::Error for SetError {} diff --git a/libdd-profiling/src/profiles/fallible_string_writer.rs b/libdd-profiling/src/profiles/fallible_string_writer.rs new file mode 100644 index 0000000000..e08fb657a1 --- /dev/null +++ b/libdd-profiling/src/profiles/fallible_string_writer.rs @@ -0,0 +1,75 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use core::fmt::{self, Write}; +use std::collections::TryReserveError; + +/// A `fmt::Write` adapter that grows a `String` using `try_reserve` before +/// each write, returning `fmt::Error` on allocation failure. +#[derive(Debug)] +pub struct FallibleStringWriter { + buf: String, +} + +impl Default for FallibleStringWriter { + fn default() -> FallibleStringWriter { + FallibleStringWriter::new() + } +} + +impl FallibleStringWriter { + /// Creates a new empty string writer. + pub const fn new() -> Self { + Self { buf: String::new() } + } + + /// Creates a new fallible string writer with a previously existing string + /// as the start of the buffer. New writes will append to the end of this. + pub const fn new_from_existing(buf: String) -> FallibleStringWriter { + FallibleStringWriter { buf } + } + + /// Tries to reserve capacity for at least additional bytes more than the + /// current length. The allocator may reserve more space to speculatively + /// avoid frequent allocations. + pub fn try_reserve(&mut self, len: usize) -> Result<(), TryReserveError> { + self.buf.try_reserve(len) + } + + /// Tries to reserve the minimum capacity for at least `additional` bytes + /// more than the current length. Unlike [`try_reserve`], this will not + /// deliberately over-allocate to speculatively avoid frequent allocations. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`try_reserve`] if future insertions are expected. + pub fn try_reserve_exact(&mut self, len: usize) -> Result<(), TryReserveError> { + self.buf.try_reserve_exact(len) + } + + pub fn try_push_str(&mut self, str: &str) -> Result<(), TryReserveError> { + self.try_reserve(str.len())?; + self.buf.push_str(str); + Ok(()) + } +} + +impl From for String { + fn from(w: FallibleStringWriter) -> String { + w.buf + } +} + +impl From for FallibleStringWriter { + fn from(buf: String) -> FallibleStringWriter { + FallibleStringWriter { buf } + } +} + +impl Write for FallibleStringWriter { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.buf.try_reserve(s.len()).map_err(|_| fmt::Error)?; + self.buf.push_str(s); + Ok(()) + } +} diff --git a/libdd-profiling/src/profiles/mod.rs b/libdd-profiling/src/profiles/mod.rs index 2ed0ebe8ea..8e74b22781 100644 --- a/libdd-profiling/src/profiles/mod.rs +++ b/libdd-profiling/src/profiles/mod.rs @@ -4,5 +4,7 @@ pub mod collections; mod compressor; pub mod datatypes; +mod fallible_string_writer; pub use compressor::*; +pub use fallible_string_writer::*;