-
Notifications
You must be signed in to change notification settings - Fork 167
Profiling: fix UB and simplify SystemSettings #3578
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
32db8b9
cfe23dd
1d51ce0
f6261b9
4352c47
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,19 +8,36 @@ use crate::bindings::{ | |
| }; | ||
| use crate::zend::zai_str_from_zstr; | ||
| use core::fmt::{Display, Formatter}; | ||
| use core::mem::{swap, transmute, MaybeUninit}; | ||
| use core::mem::transmute; | ||
| use core::ptr; | ||
| use core::str::FromStr; | ||
| use libc::{c_char, c_int}; | ||
| use libdd_common::tag::{parse_tags, Tag}; | ||
| pub use libdd_profiling::exporter::Uri; | ||
| use log::{warn, LevelFilter}; | ||
| use log::{debug, warn, LevelFilter}; | ||
| use std::borrow::Cow; | ||
| use std::ffi::CString; | ||
| use std::path::{Path, PathBuf}; | ||
|
|
||
| #[derive(Copy, Clone, Debug, Default)] | ||
| pub enum SystemSettingsState { | ||
| /// Indicates the system settings are not aware of the configuration at | ||
| /// the moment. | ||
| #[default] | ||
| ConfigUnaware, | ||
|
|
||
| /// Indicates the system settings _are_ aware of configuration at the | ||
| /// moment. | ||
| ConfigAware, | ||
|
|
||
| /// Expressly disabled, such as a child process post-fork (forks are not | ||
| /// currently profiled, except certain forks made by SAPIs). | ||
| Disabled, | ||
| } | ||
|
|
||
| #[derive(Clone, Debug)] | ||
| pub struct SystemSettings { | ||
| pub state: SystemSettingsState, | ||
| pub profiling_enabled: bool, | ||
| pub profiling_experimental_features_enabled: bool, | ||
| pub profiling_endpoint_collection_enabled: bool, | ||
|
|
@@ -41,16 +58,26 @@ pub struct SystemSettings { | |
| } | ||
|
|
||
| impl SystemSettings { | ||
| pub fn disable_all(&mut self) { | ||
| self.profiling_enabled = false; | ||
| self.profiling_experimental_features_enabled = false; | ||
| self.profiling_endpoint_collection_enabled = false; | ||
| self.profiling_experimental_cpu_time_enabled = false; | ||
| self.profiling_allocation_enabled = false; | ||
| self.profiling_timeline_enabled = false; | ||
| self.profiling_exception_enabled = false; | ||
| self.profiling_exception_message_enabled = false; | ||
| self.profiling_io_enabled = false; | ||
| /// Provides "initial" settings, which are all "off"-like values. | ||
| pub const fn initial() -> SystemSettings { | ||
| SystemSettings { | ||
| state: SystemSettingsState::ConfigUnaware, | ||
| profiling_enabled: false, | ||
| profiling_experimental_features_enabled: false, | ||
| profiling_endpoint_collection_enabled: false, | ||
| profiling_experimental_cpu_time_enabled: false, | ||
| profiling_allocation_enabled: false, | ||
| profiling_allocation_sampling_distance: u32::MAX, | ||
| profiling_timeline_enabled: false, | ||
| profiling_exception_enabled: false, | ||
| profiling_exception_message_enabled: false, | ||
| profiling_wall_time_enabled: false, | ||
| profiling_io_enabled: false, | ||
| output_pprof: None, | ||
| profiling_exception_sampling_distance: u32::MAX, | ||
| profiling_log_level: LevelFilter::Off, | ||
| uri: AgentEndpoint::Socket(Cow::Borrowed(AgentEndpoint::DEFAULT_UNIX_SOCKET_PATH)), | ||
| } | ||
| } | ||
|
|
||
| /// # Safety | ||
|
|
@@ -64,6 +91,7 @@ impl SystemSettings { | |
| let trace_agent_url = trace_agent_url(); | ||
| let uri = detect_uri_from_config(trace_agent_url, agent_host, trace_agent_port); | ||
| Self { | ||
| state: SystemSettingsState::ConfigAware, | ||
| profiling_enabled: profiling_enabled(), | ||
| profiling_experimental_features_enabled: profiling_experimental_features_enabled(), | ||
| profiling_endpoint_collection_enabled: profiling_endpoint_collection_enabled(), | ||
|
|
@@ -83,15 +111,16 @@ impl SystemSettings { | |
| } | ||
| } | ||
|
|
||
| static mut SYSTEM_SETTINGS: MaybeUninit<SystemSettings> = MaybeUninit::uninit(); | ||
| static mut SYSTEM_SETTINGS: SystemSettings = SystemSettings::initial(); | ||
|
|
||
| impl SystemSettings { | ||
| /// # Safety | ||
| /// Must be called after [first_rinit] and before [shutdown]. | ||
| pub unsafe fn get() -> ptr::NonNull<SystemSettings> { | ||
| // SAFETY: required by this function's own safety requirements. | ||
| let addr = unsafe { (*ptr::addr_of_mut!(SYSTEM_SETTINGS)).assume_init_mut() }; | ||
| ptr::NonNull::from(addr) | ||
| /// Returns the "current" system settings, which are always memory-safe | ||
| /// but may point to "initial" values rather than the configured ones, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should there be an "is_initial" field on the settings for debugging purposes?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've pushed a commit with state tracking, I'll play a little bit with it but don't have time for a lot of experimentation right now. |
||
| /// depending on what point in the lifecycle we're at. | ||
| pub const fn get() -> ptr::NonNull<SystemSettings> { | ||
| let addr = ptr::addr_of_mut!(SYSTEM_SETTINGS); | ||
| // SAFETY: it's derived from a static variable, it's not null. | ||
| unsafe { ptr::NonNull::new_unchecked(addr) } | ||
| } | ||
|
|
||
| /// # Safety | ||
|
|
@@ -110,10 +139,6 @@ impl SystemSettings { | |
| } | ||
| } | ||
|
|
||
| // Initialize the lazy lock holding the env var for new origin | ||
| // detection in a safe place. | ||
| _ = std::sync::LazyLock::force(&libdd_common::entity_id::DD_EXTERNAL_ENV); | ||
|
|
||
| // Work around version-specific issues. | ||
| #[cfg(not(php_zend_mm_set_custom_handlers_ex))] | ||
| if allocation::allocation_le83::first_rinit_should_disable_due_to_jit() { | ||
|
|
@@ -123,54 +148,48 @@ impl SystemSettings { | |
| if allocation::allocation_ge84::first_rinit_should_disable_due_to_jit() { | ||
| system_settings.profiling_allocation_enabled = false; | ||
| } | ||
| swap( | ||
| &mut system_settings, | ||
| (*ptr::addr_of_mut!(SYSTEM_SETTINGS)).assume_init_mut(), | ||
|
|
||
| SystemSettings::log_state( | ||
| (*ptr::addr_of!(SYSTEM_SETTINGS)).state, | ||
| system_settings.state, | ||
| "the first request was received", | ||
| ); | ||
| ptr::addr_of_mut!(SYSTEM_SETTINGS).swap(&mut system_settings); | ||
| } | ||
|
|
||
| /// # Safety | ||
| /// Must be called exactly once each startup in either minit or startup, | ||
| /// whether profiling is enabled or not. | ||
| unsafe fn on_startup() { | ||
| (*ptr::addr_of_mut!(SYSTEM_SETTINGS)).write(INITIAL_SYSTEM_SETTINGS.clone()); | ||
| fn log_state(from: SystemSettingsState, to: SystemSettingsState, reason: &str) { | ||
| debug!("SystemSettings state transitioned from {from:?} to {to:?} because {reason}."); | ||
| } | ||
|
|
||
| /// # Safety | ||
| /// Must be called exactly once per shutdown in either mshutdown or | ||
| /// shutdown, before zai config is shutdown. | ||
| unsafe fn on_shutdown() { | ||
| let system_settings = (*ptr::addr_of_mut!(SYSTEM_SETTINGS)).assume_init_mut(); | ||
| let system_settings = &mut *ptr::addr_of_mut!(SYSTEM_SETTINGS); | ||
| let state = SystemSettingsState::ConfigUnaware; | ||
| SystemSettings::log_state( | ||
| system_settings.state, | ||
| state, | ||
| "a shutdown command was received", | ||
| ); | ||
| *system_settings = SystemSettings { | ||
| profiling_enabled: false, | ||
| profiling_experimental_features_enabled: false, | ||
| profiling_endpoint_collection_enabled: false, | ||
| profiling_experimental_cpu_time_enabled: false, | ||
| profiling_allocation_enabled: false, | ||
| profiling_allocation_sampling_distance: 0, | ||
| profiling_timeline_enabled: false, | ||
| profiling_exception_enabled: false, | ||
| profiling_exception_message_enabled: false, | ||
| profiling_wall_time_enabled: false, | ||
| profiling_io_enabled: false, | ||
| output_pprof: None, | ||
| profiling_exception_sampling_distance: 0, | ||
| profiling_log_level: LevelFilter::Off, | ||
| uri: Default::default(), | ||
| state, | ||
| ..SystemSettings::initial() | ||
| }; | ||
| } | ||
|
|
||
| unsafe fn on_fork_in_child() { | ||
| let system_settings = (*ptr::addr_of_mut!(SYSTEM_SETTINGS)).assume_init_mut(); | ||
| system_settings.profiling_enabled = false; | ||
| system_settings.profiling_experimental_features_enabled = false; | ||
| system_settings.profiling_endpoint_collection_enabled = false; | ||
| system_settings.profiling_experimental_cpu_time_enabled = false; | ||
| system_settings.profiling_allocation_enabled = false; | ||
| system_settings.profiling_timeline_enabled = false; | ||
| system_settings.profiling_exception_enabled = false; | ||
| system_settings.profiling_exception_message_enabled = false; | ||
| system_settings.profiling_io_enabled = false; | ||
| let system_settings = &mut *ptr::addr_of_mut!(SYSTEM_SETTINGS); | ||
| let state = SystemSettingsState::Disabled; | ||
| SystemSettings::log_state( | ||
| system_settings.state, | ||
| state, | ||
| "the processed forked, and child processes are not profiled", | ||
| ); | ||
| *system_settings = SystemSettings { | ||
| state, | ||
| ..SystemSettings::initial() | ||
| }; | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -448,55 +467,25 @@ impl ConfigId { | |
| } | ||
| } | ||
|
|
||
| lazy_static::lazy_static! { | ||
| /// In some SAPIs, full configuration is not known until the first request | ||
| /// is served. This is ripe for edge cases. Consider this order of events: | ||
| /// 1. Worker is created. | ||
| /// 2. No requests are served. | ||
| /// 3. As the worker shuts down, the timeline profiler will attempt to | ||
| /// add idle time to timeline. | ||
| /// What state should the configuration be in? | ||
| /// | ||
| /// Since the real configuration was never learned, assume everything is | ||
| /// disabled, which should cause fewer issues for customers than assuming | ||
| /// defaults. | ||
| pub static ref INITIAL_SYSTEM_SETTINGS: SystemSettings = SystemSettings { | ||
| profiling_enabled: false, | ||
| profiling_experimental_features_enabled: false, | ||
| profiling_endpoint_collection_enabled: false, | ||
| profiling_experimental_cpu_time_enabled: false, | ||
| profiling_allocation_enabled: false, | ||
| profiling_allocation_sampling_distance: u32::MAX, | ||
| profiling_timeline_enabled: false, | ||
| profiling_exception_enabled: false, | ||
| profiling_exception_message_enabled: false, | ||
| profiling_wall_time_enabled: false, | ||
| profiling_io_enabled: false, | ||
| output_pprof: None, | ||
| profiling_exception_sampling_distance: u32::MAX, | ||
| profiling_log_level: LevelFilter::Off, | ||
| uri: Default::default(), | ||
| }; | ||
|
|
||
| /// Keep these in sync with the INI defaults. | ||
| static ref DEFAULT_SYSTEM_SETTINGS: SystemSettings = SystemSettings { | ||
| profiling_enabled: true, | ||
| profiling_experimental_features_enabled: false, | ||
| profiling_endpoint_collection_enabled: true, | ||
| profiling_experimental_cpu_time_enabled: true, | ||
| profiling_allocation_enabled: true, | ||
| profiling_allocation_sampling_distance: 1024 * 4096, | ||
| profiling_timeline_enabled: true, | ||
| profiling_exception_enabled: true, | ||
| profiling_exception_message_enabled: false, | ||
| profiling_wall_time_enabled: true, | ||
| profiling_io_enabled: false, | ||
| output_pprof: None, | ||
| profiling_exception_sampling_distance: 100, | ||
| profiling_log_level: LevelFilter::Off, | ||
| uri: Default::default(), | ||
| }; | ||
| } | ||
| /// Keep these in sync with the INI defaults. | ||
| static DEFAULT_SYSTEM_SETTINGS: SystemSettings = SystemSettings { | ||
| state: SystemSettingsState::ConfigUnaware, | ||
| profiling_enabled: true, | ||
| profiling_experimental_features_enabled: false, | ||
| profiling_endpoint_collection_enabled: true, | ||
| profiling_experimental_cpu_time_enabled: true, | ||
| profiling_allocation_enabled: true, | ||
| profiling_allocation_sampling_distance: 1024 * 4096, | ||
| profiling_timeline_enabled: true, | ||
| profiling_exception_enabled: true, | ||
| profiling_exception_message_enabled: false, | ||
| profiling_wall_time_enabled: true, | ||
| profiling_io_enabled: false, | ||
| output_pprof: None, | ||
| profiling_exception_sampling_distance: 100, | ||
| profiling_log_level: LevelFilter::Off, | ||
| uri: AgentEndpoint::Socket(Cow::Borrowed(AgentEndpoint::DEFAULT_UNIX_SOCKET_PATH)), | ||
| }; | ||
|
|
||
| /// # Safety | ||
| /// This function must only be called after config has been initialized in | ||
|
|
@@ -1244,7 +1233,8 @@ pub(crate) fn minit(module_number: libc::c_int) { | |
| ); | ||
| assert!(tmp); // It's literally return true in the source. | ||
|
|
||
| SystemSettings::on_startup(); | ||
| let system_settings = &mut *ptr::addr_of_mut!(SYSTEM_SETTINGS); | ||
| *system_settings = SystemSettings::initial(); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1268,6 +1258,7 @@ pub(crate) unsafe fn on_fork_in_child() { | |
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
| use core::mem::MaybeUninit; | ||
| use libc::memcmp; | ||
|
|
||
| #[test] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ mod exception; | |
| mod timeline; | ||
| mod vec_ext; | ||
|
|
||
| use crate::config::{SystemSettings, INITIAL_SYSTEM_SETTINGS}; | ||
| use crate::config::SystemSettings; | ||
| use crate::zend::datadog_sapi_globals_request_info; | ||
| use bindings::{ | ||
| self as zend, ddog_php_prof_php_version, ddog_php_prof_php_version_id, ZendExtension, | ||
|
|
@@ -323,6 +323,9 @@ extern "C" fn minit(_type: c_int, module_number: c_int) -> ZendResult { | |
| let _connector = libdd_common::connector::Connector::default(); | ||
| } | ||
|
|
||
| // Initialize the lazy lock holding the env var for new origin detection. | ||
| _ = std::sync::LazyLock::force(&libdd_common::entity_id::DD_EXTERNAL_ENV); | ||
|
Comment on lines
+326
to
+327
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is unrelated to this PR, but it's a safer place for it. Maybe I should make it its own PR but it's 10:35pm for me right now when I noticed this. |
||
|
|
||
| // Use a hybrid extension hack to load as a module but have the | ||
| // zend_extension hooks available: | ||
| // https://www.phpinternalsbook.com/php7/extensions_design/zend_extensions.html#hybrid-extensions | ||
|
|
@@ -423,7 +426,7 @@ pub struct RequestLocals { | |
| /// conditions such as in mshutdown when there were no requests served, | ||
| /// then the settings are still memory safe, but they may not have the | ||
| /// real configuration. Instead, they have a best-effort values such as | ||
| /// INITIAL_SYSTEM_SETTINGS, or possibly the values which were available | ||
| /// the initial settings, or possibly the values which were available | ||
| /// in MINIT. | ||
| pub system_settings: ptr::NonNull<SystemSettings>, | ||
|
|
||
|
|
@@ -434,8 +437,9 @@ pub struct RequestLocals { | |
| impl RequestLocals { | ||
| #[track_caller] | ||
| pub fn system_settings(&self) -> &SystemSettings { | ||
| // SAFETY: it should always be valid, either set to the | ||
| // INITIAL_SYSTEM_SETTINGS or to the SYSTEM_SETTINGS. | ||
| // SAFETY: it should always be valid, just maybe "stale", such as | ||
| // having only the initial values, or only the ones available in minit, | ||
| // rather than the fully configured values. | ||
| unsafe { self.system_settings.as_ref() } | ||
| } | ||
| } | ||
|
|
@@ -449,7 +453,7 @@ impl Default for RequestLocals { | |
| git_commit_sha: None, | ||
| git_repository_url: None, | ||
| tags: vec![], | ||
| system_settings: ptr::NonNull::from(INITIAL_SYSTEM_SETTINGS.deref()), | ||
| system_settings: SystemSettings::get(), | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's an example of a potential behavior divergence. Before, this would have used Now, this shouldn't really matter in any scenarios, because we really should be changing these to real values ASAP. It's something to specifically scrutinize in the PR. |
||
| interrupt_count: AtomicU32::new(0), | ||
| vm_interrupt_addr: ptr::null_mut(), | ||
| } | ||
|
|
@@ -572,8 +576,9 @@ extern "C" fn rinit(_type: c_int, _module_number: c_int) -> ZendResult { | |
|
|
||
| unsafe { bindings::zai_config_rinit() }; | ||
|
|
||
| // SAFETY: We are after first rinit and before config mshutdown. | ||
| let mut system_settings = unsafe { SystemSettings::get() }; | ||
| // Needs to come after config::first_rinit, because that's what sets the | ||
| // values to the ones in the configuration. | ||
| let system_settings = SystemSettings::get(); | ||
|
|
||
| // initialize the thread local storage and cache some items | ||
| let result = REQUEST_LOCALS.try_with_borrow_mut(|locals| { | ||
|
|
@@ -645,8 +650,10 @@ extern "C" fn rinit(_type: c_int, _module_number: c_int) -> ZendResult { | |
| return ZendResult::Success; | ||
| } | ||
|
|
||
| // SAFETY: still safe to access in rinit after first_rinit. | ||
| let system_settings = unsafe { system_settings.as_mut() }; | ||
| // SAFETY: safe to dereference in rinit after first_rinit. It's important | ||
| // that this is a non-mut reference because in ZTS there's nothing which | ||
| // enforces mutual exclusion. | ||
| let system_settings = unsafe { system_settings.as_ref() }; | ||
|
|
||
| // SAFETY: the once control is not mutable during request. | ||
| let once = unsafe { &*ptr::addr_of!(RINIT_ONCE) }; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like that 👍