diff --git a/Cargo.lock b/Cargo.lock index 272b22311c..09a0c45cdd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2901,6 +2901,7 @@ dependencies = [ "libdd-telemetry-ffi", "serde_json", "symbolizer-ffi", + "thiserror 2.0.17", "tokio-util", ] diff --git a/examples/ffi/profiles.c b/examples/ffi/profiles.c index 8e8bc05668..0359166acb 100644 --- a/examples/ffi/profiles.c +++ b/examples/ffi/profiles.c @@ -6,6 +6,9 @@ #include #include +// Number of samples to add with each API +#define NUM_SAMPLES 5000000 + int main(void) { const ddog_prof_ValueType wall_time = { .type_ = DDOG_CHARSLICE_C("wall-time"), @@ -14,16 +17,27 @@ int main(void) { const ddog_prof_Slice_ValueType sample_types = {&wall_time, 1}; const ddog_prof_Period period = {wall_time, 60}; - ddog_prof_Profile_NewResult new_result = ddog_prof_Profile_new(sample_types, &period); - if (new_result.tag != DDOG_PROF_PROFILE_NEW_RESULT_OK) { - ddog_CharSlice message = ddog_Error_message(&new_result.err); - fprintf(stderr, "%.*s", (int)message.len, message.ptr); - ddog_Error_drop(&new_result.err); + // Create a ProfilesDictionary for the new API + ddog_prof_ProfilesDictionaryHandle dict = {0}; + ddog_prof_Status dict_status = ddog_prof_ProfilesDictionary_new(&dict); + if (dict_status.flags != 0) { + fprintf(stderr, "Failed to create dictionary: %s\n", dict_status.err); + ddog_prof_Status_drop(&dict_status); exit(EXIT_FAILURE); } - ddog_prof_Profile *profile = &new_result.ok; + // Create profile using the dictionary + ddog_prof_Profile profile = {0}; + ddog_prof_Status profile_status = + ddog_prof_Profile_with_dictionary(&profile, &dict, sample_types, &period); + if (profile_status.flags != 0) { + fprintf(stderr, "Failed to create profile: %s\n", profile_status.err); + ddog_prof_Status_drop(&profile_status); + ddog_prof_ProfilesDictionary_drop(&dict); + exit(EXIT_FAILURE); + } + // Original API sample ddog_prof_Location root_location = { // yes, a zero-initialized mapping is valid .mapping = (ddog_prof_Mapping){0}, @@ -44,10 +58,10 @@ int main(void) { .labels = {&label, 1}, }; - for (int i = 0; i < 10000000; i++) { + for (int i = 0; i < NUM_SAMPLES; i++) { label.num = i; - ddog_prof_Profile_Result add_result = ddog_prof_Profile_add(profile, sample, 0); + ddog_prof_Profile_Result add_result = ddog_prof_Profile_add(&profile, sample, 0); if (add_result.tag != DDOG_PROF_PROFILE_RESULT_OK) { ddog_CharSlice message = ddog_Error_message(&add_result.err); fprintf(stderr, "%.*s", (int)message.len, message.ptr); @@ -55,17 +69,96 @@ int main(void) { } } + // New API sample using the dictionary + // Insert strings into the dictionary + ddog_prof_StringId2 function_name_id, filename_id, label_key_id; + + dict_status = ddog_prof_ProfilesDictionary_insert_str( + &function_name_id, dict, DDOG_CHARSLICE_C("{main}"), DDOG_PROF_UTF8_OPTION_ASSUME); + if (dict_status.flags != 0) { + fprintf(stderr, "Failed to insert function name: %s\n", dict_status.err); + ddog_prof_Status_drop(&dict_status); + goto cleanup; + } + + dict_status = ddog_prof_ProfilesDictionary_insert_str(&filename_id, dict, + DDOG_CHARSLICE_C("/srv/example/index.php"), + DDOG_PROF_UTF8_OPTION_ASSUME); + if (dict_status.flags != 0) { + fprintf(stderr, "Failed to insert filename: %s\n", dict_status.err); + ddog_prof_Status_drop(&dict_status); + goto cleanup; + } + + dict_status = ddog_prof_ProfilesDictionary_insert_str( + &label_key_id, dict, DDOG_CHARSLICE_C("unique_counter"), DDOG_PROF_UTF8_OPTION_ASSUME); + if (dict_status.flags != 0) { + fprintf(stderr, "Failed to insert label key: %s\n", dict_status.err); + ddog_prof_Status_drop(&dict_status); + goto cleanup; + } + + // Create a function using the dictionary IDs + ddog_prof_FunctionId2 function_id; + ddog_prof_Function2 function2 = { + .name = function_name_id, + .system_name = DDOG_PROF_STRINGID2_EMPTY, + .file_name = filename_id, + }; + + dict_status = ddog_prof_ProfilesDictionary_insert_function(&function_id, dict, &function2); + if (dict_status.flags != 0) { + fprintf(stderr, "Failed to insert function: %s\n", dict_status.err); + ddog_prof_Status_drop(&dict_status); + goto cleanup; + } + + // Create a location using the dictionary IDs + ddog_prof_Location2 location2 = { + .mapping = (ddog_prof_MappingId2){0}, // null mapping is valid + .function = function_id, + .address = 0, + .line = 0, + }; + + // New API sample using dictionary IDs + ddog_prof_Label2 label2 = { + .key = label_key_id, + .str = DDOG_CHARSLICE_C(""), + .num = 0, + .num_unit = DDOG_CHARSLICE_C(""), + }; + const ddog_prof_Sample2 sample2 = { + .locations = {&location2, 1}, + .values = {&value, 1}, + .labels = {&label2, 1}, + }; + + for (int i = 0; i < NUM_SAMPLES; i++) { + label2.num = i; + + ddog_prof_Status add2_status = ddog_prof_Profile_add2(&profile, sample2, 0); + if (add2_status.flags != 0) { + fprintf(stderr, "add2 error: %s\n", add2_status.err); + ddog_prof_Status_drop(&add2_status); + } + } + // printf("Press any key to reset and drop..."); // getchar(); - ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(profile); +cleanup: + ; // Can't have a declaration after a label pre-C23, so use an empty statement. + ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(&profile); if (reset_result.tag != DDOG_PROF_PROFILE_RESULT_OK) { ddog_CharSlice message = ddog_Error_message(&reset_result.err); fprintf(stderr, "%.*s", (int)message.len, message.ptr); ddog_Error_drop(&reset_result.err); } - ddog_prof_Profile_drop(profile); + ddog_prof_Profile_drop(&profile); + // Drop the dictionary + ddog_prof_ProfilesDictionary_drop(&dict); return 0; -} \ No newline at end of file +} diff --git a/libdd-profiling-ffi/Cargo.toml b/libdd-profiling-ffi/Cargo.toml index e63daeae4d..cfa1031ebe 100644 --- a/libdd-profiling-ffi/Cargo.toml +++ b/libdd-profiling-ffi/Cargo.toml @@ -56,5 +56,6 @@ hyper = { workspace = true} libc = "0.2" serde_json = { version = "1.0" } symbolizer-ffi = { path = "../symbolizer-ffi", optional = true, default-features = false } +thiserror = "2" tokio-util = "0.7.1" datadog-ffe-ffi = { path = "../datadog-ffe-ffi", default-features = false, optional = true } diff --git a/libdd-profiling-ffi/cbindgen.toml b/libdd-profiling-ffi/cbindgen.toml index cc94c47795..187d1b8c8d 100644 --- a/libdd-profiling-ffi/cbindgen.toml +++ b/libdd-profiling-ffi/cbindgen.toml @@ -106,6 +106,9 @@ renaming_overrides_prefixing = true "CancellationToken" = "struct ddog_OpaqueCancellationToken" "Handle_TokioCancellationToken" = "ddog_CancellationToken" +"ArcHandle_ProfilesDictionary" = "ddog_prof_ProfilesDictionaryHandle" +"ProfileStatus" = "ddog_prof_Status" + [export.mangle] rename_types = "PascalCase" diff --git a/libdd-profiling-ffi/src/arc_handle.rs b/libdd-profiling-ffi/src/arc_handle.rs new file mode 100644 index 0000000000..e8d8ea5b75 --- /dev/null +++ b/libdd-profiling-ffi/src/arc_handle.rs @@ -0,0 +1,80 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::profile_error::ProfileError; +use crate::EmptyHandleError; +use libdd_profiling::profiles::collections::Arc; +use std::ptr::{null_mut, NonNull}; + +/// Opaque FFI handle to an `Arc`'s inner `T`. +/// +/// Safety rules for implementors/callers: +/// - Do not create multiple owning `Arc`s from the same raw pointer. +/// - Always restore the original `Arc` with `into_raw` after any `from_raw`. +/// - Use `as_inner()` to validate non-null before performing raw round-trips. +/// +/// From Rust, use [`ArcHandle::try_clone`] to make a reference-counted copy. +/// From the C FFI, the handle should probably be renamed to avoid generics +/// bloat garbage, and a *_try_clone API should be provided. +/// +/// Use [`ArcHandle::drop_resource`] to drop the resource and move this handle +/// into the empty handle state, which is the default state. +#[repr(transparent)] +#[derive(Debug)] +pub struct ArcHandle(*mut T); + +impl Default for ArcHandle { + fn default() -> Self { + Self(null_mut()) + } +} + +impl ArcHandle { + /// Constructs a new handle by allocating an `ArcHandle` and returning + /// its inner pointer as a handle. + /// + /// Returns OutOfMemory on allocation failure. + pub fn new(value: T) -> Result { + let arc = Arc::try_new(value)?; + let ptr = Arc::into_raw(arc).as_ptr(); + Ok(Self(ptr)) + } + + pub fn try_clone_into_arc(&self) -> Result, ProfileError> { + let clone = self.try_clone()?; + // SAFETY: try_clone succeeded so it must not be null. + let nn = unsafe { NonNull::new_unchecked(clone.0) }; + // SAFETY: validated that it isn't null, should otherwise be an Arc. + Ok(unsafe { Arc::from_raw(nn) }) + } + + #[inline] + pub fn as_inner(&self) -> Result<&T, EmptyHandleError> { + // SAFETY: If non-null, self.0 was created from Arc and remains valid, + // at least as long as we can trust the C side to not do insane things. + unsafe { self.0.as_ref() }.ok_or(EmptyHandleError) + } + + /// Tries to clone the resource this handle points to, and returns a new + /// handle to it. + pub fn try_clone(&self) -> Result { + let nn = NonNull::new(self.0).ok_or(EmptyHandleError)?; + // SAFETY: ArcHandle uses a pointer to T as its repr, and as long as + // callers have upheld safety requirements elsewhere, including the + // FFI, then there will be a valid object with refcount > 0. + unsafe { Arc::try_increment_count(nn.as_ptr())? }; + Ok(Self(self.0)) + } + + /// Drops the resource that this handle refers to. It will remain alive if + /// there are other handles to the resource which were created by + /// successful calls to try_clone. This handle will now be empty and + /// operations on it will fail. + pub fn drop_resource(&mut self) { + // pointers aren't default until Rust 1.88. + let ptr = core::mem::replace(&mut self.0, null_mut()); + if let Some(nn) = NonNull::new(ptr) { + drop(unsafe { Arc::from_raw(nn) }); + } + } +} diff --git a/libdd-profiling-ffi/src/lib.rs b/libdd-profiling-ffi/src/lib.rs index fa7e424f1e..aac70dab2c 100644 --- a/libdd-profiling-ffi/src/lib.rs +++ b/libdd-profiling-ffi/src/lib.rs @@ -7,12 +7,14 @@ #![cfg_attr(not(test), deny(clippy::todo))] #![cfg_attr(not(test), deny(clippy::unimplemented))] +mod arc_handle; mod exporter; mod profile_error; mod profile_status; mod profiles; mod string_storage; +pub use arc_handle::*; pub use profile_error::*; pub use profile_status::*; diff --git a/libdd-profiling-ffi/src/profiles/datatypes.rs b/libdd-profiling-ffi/src/profiles/datatypes.rs index 51f4142e5a..8cc3f72f1b 100644 --- a/libdd-profiling-ffi/src/profiles/datatypes.rs +++ b/libdd-profiling-ffi/src/profiles/datatypes.rs @@ -2,12 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 use crate::string_storage::{get_inner_string_storage, ManagedStringStorage}; +use crate::{ensure_non_null_out_parameter, ArcHandle, ProfileError, ProfileStatus}; use anyhow::Context; use function_name::named; use libdd_common_ffi::slice::{AsBytes, ByteSlice, CharSlice, Slice}; use libdd_common_ffi::{wrap_with_ffi_result, Error, Handle, Timespec, ToInner}; use libdd_profiling::api::{self, ManagedStringId}; -use libdd_profiling::internal; +use libdd_profiling::profiles::datatypes::{ProfilesDictionary, StringId2}; +use libdd_profiling::{api2, internal}; use std::num::NonZeroI64; use std::str::Utf8Error; use std::time::SystemTime; @@ -215,6 +217,44 @@ pub struct Sample<'a> { pub labels: Slice<'a, Label<'a>>, } +#[derive(Copy, Clone, Debug, Default)] +#[repr(C)] +pub struct Label2<'a> { + pub key: StringId2, + + /// At most one of `.str` and `.num` should not be empty. + pub str: CharSlice<'a>, + pub num: i64, + + /// Should only be present when num is present. + /// Specifies the units of num. + /// Use arbitrary string (for example, "requests") as a custom count unit. + /// If no unit is specified, consumer may apply heuristic to deduce the unit. + /// Consumers may also interpret units like "bytes" and "kilobytes" as memory + /// units and units like "seconds" and "nanoseconds" as time units, + /// and apply appropriate unit conversions to these. + pub num_unit: CharSlice<'a>, +} + +#[repr(C)] +#[derive(Copy, Clone)] +pub struct Sample2<'a> { + /// The leaf is at locations[0]. + pub locations: Slice<'a, api2::Location2>, + + /// The type and unit of each value is defined by the corresponding + /// entry in Profile.sample_type. All samples must have the same + /// number of values, the same as the length of Profile.sample_type. + /// When aggregating multiple samples into a single sample, the + /// result has a list of values that is the element-wise sum of the + /// lists of the originals. + pub values: Slice<'a, i64>, + + /// label includes additional context for this sample. It can include + /// things like a thread id, allocation size, etc + pub labels: Slice<'a, Label2<'a>>, +} + impl<'a> TryFrom<&'a Mapping<'a>> for api::Mapping<'a> { type Error = Utf8Error; @@ -400,6 +440,64 @@ pub unsafe extern "C" fn ddog_prof_Profile_new( profile_new(sample_types, period, None) } +/// Create a new profile with the given sample types. Must call +/// `ddog_prof_Profile_drop` when you are done with the profile. +/// +/// # Arguments +/// * `out` - a non-null pointer to an uninitialized Profile. +/// * `dict`: a valid reference to a ProfilesDictionary handle. +/// * `sample_types` +/// * `period` - Optional period of the profile. Passing None/null translates to zero values. +/// +/// # Safety +/// All slices must have pointers that are suitably aligned for their type +/// and must have the correct number of elements for the slice. +/// +/// The `dict` reference must be to a valid ProfilesDictionary handle. It may +/// be an empty handle, but it must be a valid handle. +/// +/// The `out` pointer must be non-null and suitable for pointer writes, +/// including that it has the correct size and alignment. +#[no_mangle] +#[must_use] +pub unsafe extern "C" fn ddog_prof_Profile_with_dictionary( + out: *mut Profile, + dict: &ArcHandle, + sample_types: Slice, + period: Option<&Period>, +) -> ProfileStatus { + ensure_non_null_out_parameter!(out); + match profile_with_dictionary(dict, sample_types, period) { + // SAFETY: checked that it isn't null above, the rest comes from this + // function's own safety conditions. Technically, our safety conditions + // don't require a null check, but we're being safe there. + Ok(profile) => unsafe { + out.write(profile); + ProfileStatus::OK + }, + Err(e) => ProfileStatus::from(e), + } +} + +unsafe fn profile_with_dictionary( + dict: &ArcHandle, + sample_types: Slice, + period: Option<&Period>, +) -> Result { + let sample_types = sample_types.try_as_slice()?; + let dict = dict.try_clone_into_arc()?; + + let mut types = Vec::new(); + types.try_reserve_exact(sample_types.len())?; + types.extend(sample_types.iter().map(api::ValueType::from)); + let period = period.map(Into::into); + + match internal::Profile::try_new_with_dictionary(&types, period, dict) { + Ok(ok) => Ok(Profile::new(ok)), + Err(err) => Err(ProfileError::from(err)), + } +} + /// Same as `ddog_profile_new` but also configures a `string_storage` for the profile. #[no_mangle] #[must_use] @@ -508,6 +606,42 @@ pub unsafe extern "C" fn ddog_prof_Profile_add( .context("ddog_prof_Profile_add failed") .into() } +/// # Safety +/// The `profile` ptr must point to a valid Profile object created by this +/// module. All pointers inside the `sample` need to be valid for the duration +/// of this call. +/// +/// If successful, it returns the Ok variant. +/// On error, it holds an error message in the error variant. +/// +/// This call is _NOT_ thread-safe. +#[must_use] +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_Profile_add2( + profile: *mut Profile, + sample: Sample2, + timestamp: Option, +) -> ProfileStatus { + ProfileStatus::from((|| { + let profile = profile_ptr_to_inner(profile)?; + + let locations = sample.locations.try_as_slice()?; + let values = sample.values.try_as_slice()?; + let labels = sample.labels.try_as_slice()?; + + let labels_iter = labels.iter().map(|label| -> anyhow::Result { + Ok(api2::Label { + key: label.key, + str: core::str::from_utf8(label.str.try_as_bytes()?)?, + num: label.num, + num_unit: core::str::from_utf8(label.num_unit.try_as_bytes()?)?, + }) + }); + profile + .try_add_sample2(locations, values, labels_iter, timestamp) + .context("ddog_prof_Profile_add failed") + })()) +} pub(crate) unsafe fn profile_ptr_to_inner<'a>( profile_ptr: *mut Profile, diff --git a/libdd-profiling-ffi/src/profiles/mod.rs b/libdd-profiling-ffi/src/profiles/mod.rs index 86136a6fbf..077500d41a 100644 --- a/libdd-profiling-ffi/src/profiles/mod.rs +++ b/libdd-profiling-ffi/src/profiles/mod.rs @@ -3,3 +3,25 @@ mod datatypes; mod interning_api; +mod profiles_dictionary; +mod utf8; + +#[macro_export] +macro_rules! ensure_non_null_out_parameter { + ($expr:expr) => { + if $expr.is_null() { + return $crate::ProfileStatus::from(c"null pointer used as out parameter"); + } + }; +} + +#[macro_export] +macro_rules! ensure_non_null_insert { + ($expr:expr) => { + if $expr.is_null() { + return $crate::ProfileStatus::from(c"tried to insert a null pointer"); + } + }; +} + +pub(crate) use {ensure_non_null_insert, ensure_non_null_out_parameter}; diff --git a/libdd-profiling-ffi/src/profiles/profiles_dictionary.rs b/libdd-profiling-ffi/src/profiles/profiles_dictionary.rs new file mode 100644 index 0000000000..07f77b2d59 --- /dev/null +++ b/libdd-profiling-ffi/src/profiles/profiles_dictionary.rs @@ -0,0 +1,261 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::arc_handle::ArcHandle; +use crate::profile_status::ProfileStatus; +use crate::profiles::utf8::Utf8Option; +use crate::profiles::{ensure_non_null_insert, ensure_non_null_out_parameter}; +use crate::ProfileError; +use libdd_common_ffi::slice::CharSlice; +use libdd_profiling::profiles::collections::StringRef; +use libdd_profiling::profiles::datatypes::{ + Function2, FunctionId2, Mapping2, MappingId2, ProfilesDictionary, StringId2, +}; +use std::ffi::CStr; + +/// A StringId that represents the empty string. +/// This is always available in every string set and can be used without +/// needing to insert it into a string set. +#[no_mangle] +pub static DDOG_PROF_STRINGID2_EMPTY: StringId2 = StringId2::EMPTY; + +/// A StringId that represents the string "end_timestamp_ns". +/// This is always available in every string set and can be used without +/// needing to insert it into a string set. +#[no_mangle] +pub static DDOG_PROF_STRINGID2_END_TIMESTAMP_NS: StringId2 = + StringId2::from(StringRef::END_TIMESTAMP_NS); + +/// A StringId that represents the string "local root span id". +/// This is always available in every string set and can be used without +/// needing to insert it into a string set. +#[no_mangle] +pub static DDOG_PROF_STRINGID2_LOCAL_ROOT_SPAN_ID: StringId2 = + StringId2::from(StringRef::LOCAL_ROOT_SPAN_ID); + +/// A StringId that represents the string "trace endpoint". +/// This is always available in every string set and can be used without +/// needing to insert it into a string set. +#[no_mangle] +pub static DDOG_PROF_STRINGID2_TRACE_ENDPOINT: StringId2 = + StringId2::from(StringRef::TRACE_ENDPOINT); + +/// A StringId that represents the string "span id". +/// This is always available in every string set and can be used without +/// needing to insert it into a string set. +#[no_mangle] +pub static DDOG_PROF_STRINGID2_SPAN_ID: StringId2 = StringId2::from(StringRef::SPAN_ID); + +const NULL_PROFILES_DICTIONARY: &CStr = c"passed a null pointer for a ProfilesDictionary"; + +/// Allocates a new `ProfilesDictionary` and writes a handle to it in `handle`. +/// +/// # Safety +/// +/// - `handle` must be non-null and valid for writes of `ProfilesDictionaryHandle`. +/// - The returned handle must eventually drop the resource; see +/// [`ddog_prof_ProfilesDictionary_drop`] for more details. +/// - If you need a copy, use [`ddog_prof_ProfilesDictionary_try_clone`]; don't just memcpy a new +/// handle. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_new( + handle: *mut ArcHandle, +) -> ProfileStatus { + ensure_non_null_out_parameter!(handle); + ProfileStatus::from(|| -> Result<(), ProfileError> { + let dict = ProfilesDictionary::try_new()?; + let h = ArcHandle::new(dict)?; + unsafe { handle.write(h) }; + Ok(()) + }()) +} + +/// Creates a new handle to the same `ProfilesDictionary` by incrementing the +/// internal reference count. +/// +/// # Safety +/// +/// - `out` must be non-null and valid for writes of `ProfilesDictionaryHandle`. +/// - `handle` must point to a live dictionary resource. +/// - Do not duplicate handles via memcpy; always use this API to create new handles so the +/// reference count is maintained correctly. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_try_clone( + out: *mut ArcHandle, + handle: ArcHandle, +) -> ProfileStatus { + ensure_non_null_out_parameter!(out); + ProfileStatus::from(|| -> Result<(), ProfileError> { + let cloned = handle.try_clone()?; + unsafe { out.write(cloned) }; + Ok(()) + }()) +} + +/// Inserts a `Function` into the dictionary and returns its id. +/// +/// # Safety +/// +/// - `function_id` must be non-null and valid for writes of `FunctionId`. +/// - `dict` must refer to a live dictionary. +/// - `function` must be non-null and point to a valid `Function` for the duration of the call. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_insert_function( + function_id: *mut FunctionId2, + dict: Option<&ProfilesDictionary>, + function: *const Function2, +) -> ProfileStatus { + ensure_non_null_out_parameter!(function_id); + ensure_non_null_insert!(function); + ProfileStatus::from(|| -> Result<(), ProfileError> { + let dict = dict.ok_or(NULL_PROFILES_DICTIONARY)?; + let f2: Function2 = unsafe { *function }; + let id = dict.try_insert_function2(f2)?; + unsafe { function_id.write(id) }; + Ok(()) + }()) +} + +/// Inserts a `Mapping` into the dictionary and returns its id. +/// +/// # Safety +/// +/// - `mapping_id` must be non-null and valid for writes of `MappingId`. +/// - `dict` must refer to a live dictionary. +/// - `mapping` must be non-null and point to a valid `Mapping` for the duration of the call. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_insert_mapping( + mapping_id: *mut MappingId2, + dict: Option<&ProfilesDictionary>, + mapping: *const Mapping2, +) -> ProfileStatus { + ensure_non_null_out_parameter!(mapping_id); + ensure_non_null_insert!(mapping); + ProfileStatus::from(|| -> Result<(), ProfileError> { + let dict = dict.ok_or(NULL_PROFILES_DICTIONARY)?; + let m2 = unsafe { *mapping }; + let id = dict.try_insert_mapping2(m2)?; + unsafe { mapping_id.write(id) }; + Ok(()) + }()) +} + +/// Inserts a UTF-8 string into the dictionary string table. +/// +/// # Safety +/// +/// - `string_id` must be non-null and valid for writes of `StringId`. +/// - `handle` must refer to a live dictionary. +/// - The UTF-8 policy indicated by `utf8_option` must be respected by caller for the provided +/// `byte_slice`. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_insert_str( + string_id: *mut StringId2, + dict: Option<&ProfilesDictionary>, + byte_slice: CharSlice, + utf8_option: Utf8Option, +) -> ProfileStatus { + ensure_non_null_out_parameter!(string_id); + ProfileStatus::from(|| -> Result<(), ProfileError> { + let dict = dict.ok_or(NULL_PROFILES_DICTIONARY)?; + crate::profiles::utf8::insert_str(dict.strings(), byte_slice, utf8_option) + .map(|id| unsafe { string_id.write(id.into()) }) + }()) +} + +/// Tries to get the string value associated with the string id. Fails if the +/// handle has been taken from, or the result param is null. +/// +/// # Safety +/// +/// 1. The lifetime of the return slice is tied to the underlying storage of the string set, make +/// sure the string set is still alive when using the returned slice. +/// 2. The string id should belong to the string set in this dictionary. Well-known strings are an +/// exception, as they exist in every set. +/// 3. The handle must represent a live profiles dictionary. Remember handles can be copied, and if +/// _any_ handle drops the resource, then all handles pointing the resource are now invalid, +/// even if though they are unaware of it. +/// 4. The result pointer must valid for [`core::ptr::write`]. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_get_str( + result: *mut CharSlice<'static>, + dict: Option<&ProfilesDictionary>, + string_id: StringId2, +) -> ProfileStatus { + ensure_non_null_out_parameter!(result); + let Some(dict) = dict else { + return ProfileStatus::from(NULL_PROFILES_DICTIONARY); + }; + let string_ref = StringRef::from(string_id); + // SAFETY: It's not actually safe--as indicated in the docs + // for this function, the caller needs to be sure the string + // set in the dictionary outlives the slice. + result.write(unsafe { + std::mem::transmute::, CharSlice<'static>>(CharSlice::from( + dict.strings().get(string_ref), + )) + }); + ProfileStatus::OK +} + +/// Drops the `ProfilesDictionary` that the handle owns, leaving a valid but +/// useless handle (all operations on it will error). This takes a pointer to +/// the handle to be able to modify it to leave behind an empty handle. +/// +/// # Safety +/// +/// - If non-null, `handle` must point to a valid `ProfilesDictionaryHandle`. +/// - The underlying resource must be dropped exactly once across all copies of the handle. After +/// dropping, all other copies become invalid and must not be used; they should be discarded +/// without dropping. +#[no_mangle] +pub unsafe extern "C" fn ddog_prof_ProfilesDictionary_drop( + handle: *mut ArcHandle, +) { + if let Some(h) = handle.as_mut() { + h.drop_resource(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::profiles::utf8::Utf8Option; + + #[test] + fn test_basics_including_drop() { + let mut handle = ArcHandle::default(); + unsafe { + Result::from(ddog_prof_ProfilesDictionary_new(&mut handle)).unwrap(); + + let mut string_id = StringId2::default(); + Result::from(ddog_prof_ProfilesDictionary_insert_str( + &mut string_id, + handle.as_inner().ok(), + CharSlice::from("void main(int, char *[])"), + Utf8Option::Assume, + )) + .unwrap(); + + let mut function_id = FunctionId2::default(); + let function = Function2 { + name: string_id, + system_name: Default::default(), + file_name: Default::default(), + }; + Result::from(ddog_prof_ProfilesDictionary_insert_function( + &mut function_id, + handle.as_inner().ok(), + &function, + )) + .unwrap(); + + let mut found = CharSlice::empty(); + let status = + ddog_prof_ProfilesDictionary_get_str(&mut found, handle.as_inner().ok(), string_id); + Result::from(status).unwrap(); + + ddog_prof_ProfilesDictionary_drop(&mut handle); + } + } +} diff --git a/libdd-profiling-ffi/src/profiles/utf8.rs b/libdd-profiling-ffi/src/profiles/utf8.rs new file mode 100644 index 0000000000..1e9167e936 --- /dev/null +++ b/libdd-profiling-ffi/src/profiles/utf8.rs @@ -0,0 +1,252 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::ProfileError; +use libdd_common::error::FfiSafeErrorMessage; +use libdd_common_ffi::slice::{AsBytes, CharSlice, SliceConversionError}; +use libdd_profiling::profiles::collections::{ParallelStringSet, StringRef}; +use std::borrow::Cow; +use std::collections::TryReserveError; +use std::ffi::CStr; +use std::str::Utf8Error; + +#[repr(C)] +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +#[allow(dead_code)] // these are made through ffi +pub enum Utf8Option { + /// The string is assumed to be valid UTF-8. If it's not, the behavior + /// is undefined. + Assume, + /// The string is converted to UTF-8 using lossy conversion. + ConvertLossy, + /// The string is validated to be UTF-8. If it's not, an error is + /// returned. + Validate, +} + +#[allow(dead_code)] +#[derive(thiserror::Error, Debug)] +pub enum Utf8ConversionError { + #[error("out of memory: utf8 conversion failed")] + OutOfMemory(#[from] TryReserveError), + #[error(transparent)] + SliceConversionError(#[from] SliceConversionError), + #[error("invalid input: string was not utf-8")] + Utf8Error(#[from] Utf8Error), +} + +// SAFETY: all cases are c-str literals, or delegate to the same trait. +unsafe impl FfiSafeErrorMessage for Utf8ConversionError { + fn as_ffi_str(&self) -> &'static CStr { + match self { + Utf8ConversionError::OutOfMemory(_) => c"out of memory: utf8 conversion failed", + Utf8ConversionError::SliceConversionError(err) => err.as_ffi_str(), + Utf8ConversionError::Utf8Error(_) => c"invalid input: string was not utf-8", + } + } +} + +impl Utf8Option { + /// Converts a byte slice to a UTF-8 string according to the option. + /// - Assume: Borrow without validation (caller guarantees UTF-8) + /// - ConvertLossy: Lossy conversion with fallible allocation + /// - Validate: Validate and borrow on success + /// + /// # Safety + /// + /// When [`Utf8Option::Assume`] is passed, it must be valid UTF-8. + pub unsafe fn convert(self, bytes: &[u8]) -> Result, Utf8ConversionError> { + // SAFETY: caller asserts validity under Assume + Ok(match self { + Utf8Option::Assume => Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(bytes) }), + Utf8Option::ConvertLossy => try_from_utf8_lossy(bytes)?, + Utf8Option::Validate => Cow::Borrowed(std::str::from_utf8(bytes)?), + }) + } + + /// # Safety + /// See the safety conditions on [`AsBytes::try_as_bytes`] and also + /// [`Utf8Option::convert`]; both must be upheld. + pub unsafe fn try_as_bytes_convert<'a, T: AsBytes<'a>>( + self, + t: T, + ) -> Result, Utf8ConversionError> { + let bytes = t.try_as_bytes()?; + self.convert(bytes) + } +} + +/// Tries to convert a slice of bytes to a string. The input may have invalid +/// characters. +/// +/// This is the same implementation as [`String::from_utf8_lossy`] except that +/// this uses fallible allocations. +pub fn try_from_utf8_lossy(v: &[u8]) -> Result, TryReserveError> { + let mut iter = v.utf8_chunks(); + + let first_valid = if let Some(chunk) = iter.next() { + let valid = chunk.valid(); + if chunk.invalid().is_empty() { + debug_assert_eq!(valid.len(), v.len()); + return Ok(Cow::Borrowed(valid)); + } + valid + } else { + return Ok(Cow::Borrowed("")); + }; + + const REPLACEMENT: &str = "\u{FFFD}"; + const REPLACEMENT_LEN: usize = REPLACEMENT.len(); + + let mut res = String::new(); + res.try_reserve(v.len())?; + res.push_str(first_valid); + res.try_reserve(REPLACEMENT_LEN)?; + res.push_str(REPLACEMENT); + + for chunk in iter { + let valid = chunk.valid(); + res.try_reserve(valid.len())?; + res.push_str(valid); + if !chunk.invalid().is_empty() { + res.try_reserve(REPLACEMENT_LEN)?; + res.push_str(REPLACEMENT); + } + } + + Ok(Cow::Owned(res)) +} + +pub fn insert_str( + set: &ParallelStringSet, + str: CharSlice<'_>, + utf8_options: Utf8Option, +) -> Result { + let string = unsafe { utf8_options.try_as_bytes_convert(str) }.map_err(|err| match err { + Utf8ConversionError::OutOfMemory(err) => ProfileError::from(err), + Utf8ConversionError::SliceConversionError(err) => ProfileError::from(err.as_ffi_str()), + Utf8ConversionError::Utf8Error(_) => { + ProfileError::from(c"tried to insert a non-UTF8 string into a ProfilesDictionary") + } + })?; + Ok(set.try_insert(string.as_ref())?) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_utf8_option_assume_valid() { + let bytes = b"hello world"; + let result = unsafe { Utf8Option::Assume.convert(bytes) }.unwrap(); + assert_eq!(result, "hello world"); + assert!(matches!(result, Cow::Borrowed(_))); + } + + #[test] + fn test_utf8_option_validate_valid() { + let bytes = b"hello world"; + let result = unsafe { Utf8Option::Validate.convert(bytes) }.unwrap(); + assert_eq!(result, "hello world"); + assert!(matches!(result, Cow::Borrowed(_))); + } + + #[test] + fn test_utf8_option_validate_invalid() { + let bytes = b"hello \xFF world"; + let result = unsafe { Utf8Option::Validate.convert(bytes) }; + assert!(result.is_err()); + match result.unwrap_err() { + Utf8ConversionError::Utf8Error(_) => (), + _ => panic!("Expected Utf8Error"), + } + } + + #[test] + fn test_utf8_option_convert_lossy_valid() { + let bytes = b"hello world"; + let result = unsafe { Utf8Option::ConvertLossy.convert(bytes) }.unwrap(); + assert_eq!(result, "hello world"); + assert!(matches!(result, Cow::Borrowed(_))); + } + + #[test] + fn test_utf8_option_convert_lossy_invalid() { + let bytes = b"hello \xFF world"; + let result = unsafe { Utf8Option::ConvertLossy.convert(bytes) }.unwrap(); + assert_eq!(result, "hello \u{FFFD} world"); + assert!(matches!(result, Cow::Owned(_))); + } + + #[test] + fn test_utf8_option_convert_lossy_multiple_invalid() { + let bytes = b"\xFF\xFE valid \x80"; + let result = unsafe { Utf8Option::ConvertLossy.convert(bytes) }.unwrap(); + assert_eq!(result, "\u{FFFD}\u{FFFD} valid \u{FFFD}"); + } + + #[test] + fn test_try_from_utf8_lossy_valid() { + let result = try_from_utf8_lossy(b"valid utf8").unwrap(); + assert_eq!(result, "valid utf8"); + assert!(matches!(result, Cow::Borrowed(_))); + } + + #[test] + fn test_try_from_utf8_lossy_invalid_single() { + let result = try_from_utf8_lossy(b"test\xFFstring").unwrap(); + assert_eq!(result, "test\u{FFFD}string"); + assert!(matches!(result, Cow::Owned(_))); + } + + #[test] + fn test_try_from_utf8_lossy_invalid_multiple() { + let result = try_from_utf8_lossy(b"\xC3\x28 \xFF test").unwrap(); + // Invalid sequence at start, then valid, then another invalid + assert!(result.contains("\u{FFFD}")); + assert!(result.contains("test")); + } + + #[test] + fn test_try_from_utf8_lossy_empty() { + let result = try_from_utf8_lossy(b"").unwrap(); + assert_eq!(result, ""); + assert!(matches!(result, Cow::Borrowed(_))); + } + + #[test] + fn test_try_from_utf8_lossy_all_valid_emoji() { + let bytes = "Hello 👋 World 🌍".as_bytes(); + let result = try_from_utf8_lossy(bytes).unwrap(); + assert_eq!(result, "Hello 👋 World 🌍"); + assert!(matches!(result, Cow::Borrowed(_))); + } + + #[test] + #[allow(invalid_from_utf8)] // Checking error conversion + fn test_utf8_conversion_error_display() { + let err = Utf8ConversionError::Utf8Error(std::str::from_utf8(b"\xFF").unwrap_err()); + assert_eq!( + err.as_ffi_str().to_str().unwrap(), + "invalid input: string was not utf-8" + ); + } + + #[test] + fn test_utf8_conversion_error_from_try_reserve() { + let mut v = vec![0u8]; + let reserve_err = v.try_reserve(isize::MAX as usize).unwrap_err(); + let err = Utf8ConversionError::from(reserve_err); + + match err { + Utf8ConversionError::OutOfMemory(_) => (), + _ => panic!("Expected OutOfMemory"), + } + + assert_eq!( + err.as_ffi_str().to_str().unwrap(), + "out of memory: utf8 conversion failed" + ); + } +} diff --git a/libdd-profiling/src/profiles/datatypes/string.rs b/libdd-profiling/src/profiles/datatypes/string.rs index b556bf9083..fba3b2f87b 100644 --- a/libdd-profiling/src/profiles/datatypes/string.rs +++ b/libdd-profiling/src/profiles/datatypes/string.rs @@ -43,16 +43,23 @@ impl StringId2 { pub fn is_empty(&self) -> bool { self.0.is_null() } -} -impl From for StringId2 { - fn from(s: StringRef) -> Self { + /// Creates a [`StringId2`] from the [`StringRef`]. This is an associated + /// method so that it can be marked const and used in const contexts such + /// as static initializers. + pub const fn from(s: StringRef) -> Self { // SAFETY: every StringRef is a valid StringId2 (but not the other way // because of null). unsafe { core::mem::transmute::(s) } } } +impl From for StringId2 { + fn from(s: StringRef) -> Self { + StringId2::from(s) + } +} + impl From for StringRef { fn from(id: StringId2) -> Self { if id.0.is_null() {