Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
f14deb8
perf(profiling): piggyback time samples onto allocation samples
morrisonlevi Jan 5, 2026
5d9030b
refactor orderings of collect_* and extract helpers
morrisonlevi Jan 7, 2026
989876d
refactor(test): allocation + CPU combined tests
morrisonlevi Jan 7, 2026
c0119e9
refactor: make jq for combined samples less brittle
morrisonlevi Jan 7, 2026
f43a597
refactor: revert Ordering optimizations
morrisonlevi Jan 7, 2026
243d028
style: use SAFETY instead of Safety
morrisonlevi Jan 7, 2026
a9c822e
test: hammer piggybacking
morrisonlevi Jan 7, 2026
5ee032d
refactor: simplify test and hook it up properly
morrisonlevi Jan 7, 2026
5d79ecd
test: fix prof-correctness
morrisonlevi Jan 9, 2026
aa62d66
ci(profiling): run allocation_time_combined on PHP 8.4+
morrisonlevi Jan 12, 2026
b1765c0
test(profiling): fix allocation_time_combined float->int
morrisonlevi Jan 12, 2026
8b15661
Merge branch 'master' into levi/perf-time-alloc-piggyback
morrisonlevi Jan 12, 2026
23c9bc1
ci(profiling): update expected data
morrisonlevi Jan 12, 2026
7e93679
Merge branch 'master' into levi/perf-time-alloc-piggyback
morrisonlevi Jan 13, 2026
f52cf55
ci(profiling): run allocation_time_combined with ALLOCATION_SAMPLING_…
morrisonlevi Jan 13, 2026
3a7430e
ci(profiling): ignore main for wall-time for allocation_time_combined
morrisonlevi Jan 13, 2026
ee910ea
EXPERIMENTAL: check ZEND_OP_DATA too
morrisonlevi Jan 13, 2026
e16e1b1
DEBUG: do we hit frameless icalls?
morrisonlevi Jan 14, 2026
a3e3e4e
DEBUG: more info in panic
morrisonlevi Jan 14, 2026
1d40054
Merge branch 'master' into levi/perf-time-alloc-piggyback
morrisonlevi Jan 16, 2026
eb10ef8
refactor: simplify Profiler::get_timeline_timestamp
morrisonlevi Jan 17, 2026
73c86a0
Revert "DEBUG: more info in panic"
morrisonlevi Jan 17, 2026
520c158
Revert "DEBUG: do we hit frameless icalls?"
morrisonlevi Jan 17, 2026
35ac0b6
undo experimental debugging
morrisonlevi Jan 17, 2026
87a52f4
test: fix expected log message
morrisonlevi Jan 17, 2026
093ce66
test: use more replaces to exaggerate effect
morrisonlevi Jan 17, 2026
d2d11b1
EXPERIMENT: show leaf frame
morrisonlevi Jan 17, 2026
78d9cbc
Merge branch 'master' into levi/perf-time-alloc-piggyback
realFlowControl Jan 20, 2026
b64dd9a
do not fail fast, finish prof correctness
realFlowControl Jan 20, 2026
44e2734
upload artifacts
realFlowControl Jan 20, 2026
d063129
Disable Xdebug for prof-correctness
realFlowControl Jan 20, 2026
ea6bc22
Revert "upload artifacts"
realFlowControl Jan 20, 2026
ddb1dfc
Merge branch 'master' into levi/perf-time-alloc-piggyback
morrisonlevi Jan 20, 2026
5114acd
test: allow any non-zero wall-time for piggy-backed samples
morrisonlevi Jan 20, 2026
9cb6ba1
remove first_frame for performance
morrisonlevi Jan 21, 2026
1e42042
Remove misleading strlen and comment
morrisonlevi Jan 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion profiling/src/allocation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,21 @@ impl AllocationProfilingStats {
#[cold]
pub fn collect_allocation(len: size_t) {
if let Some(profiler) = Profiler::get() {
// Safety: execute_data was provided by the engine, and the profiler doesn't mutate it.
// Check if there's a pending time interrupt that we can handle now
// instead of waiting for an interrupt handler. This is slightly more
// accurate and efficient, win-win.
let interrupt_count = REQUEST_LOCALS
.try_with_borrow(|locals| locals.interrupt_count.swap(0, Ordering::SeqCst))
.unwrap_or(0);

// SAFETY: execute_data was provided by the engine, and the profiler
// doesn't mutate it.
unsafe {
profiler.collect_allocations(
zend::ddog_php_prof_get_current_execute_data(),
1_i64,
len as i64,
(interrupt_count > 0).then_some(interrupt_count),
)
};
}
Expand Down
64 changes: 40 additions & 24 deletions profiling/src/profiling/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -902,16 +902,7 @@ impl Profiler {
let labels = Profiler::common_labels(0);
let n_labels = labels.len();

let mut timestamp = NO_TIMESTAMP;
{
let system_settings = self.system_settings.load(Ordering::SeqCst);
// SAFETY: system settings are stable during a request.
if unsafe { *ptr::addr_of!((*system_settings).profiling_timeline_enabled) } {
if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) {
timestamp = now.as_nanos() as i64;
}
}
}
let timestamp = self.get_timeline_timestamp();

match self.prepare_and_send_message(
frames,
Expand All @@ -938,33 +929,51 @@ impl Profiler {
}
}

/// Collect a stack sample with memory allocations.
/// Collect a stack sample with memory allocations, and optionally time data.
///
/// When `interrupt_count` is provided, this piggybacks time sampling onto
/// allocation sampling to avoid redundant stack walks.
#[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
pub fn collect_allocations(
&self,
execute_data: *mut zend_execute_data,
alloc_samples: i64,
alloc_size: i64,
interrupt_count: Option<u32>,
) {
let result = collect_stack_sample(execute_data);
match result {
Ok(frames) => {
let depth = frames.len();

// Optionally collect time data when interrupt_count is provided
let (interrupt_count, wall_time, cpu_time, timestamp) =
if let Some(count) = interrupt_count {
let (wall_time, cpu_time) = CLOCKS.with_borrow_mut(Clocks::rotate_clocks);
let timestamp = self.get_timeline_timestamp();
(count as i64, wall_time, cpu_time, timestamp)
} else {
(0, 0, 0, NO_TIMESTAMP)
};

let labels = Profiler::common_labels(0);
let n_labels = labels.len();

match self.prepare_and_send_message(
frames,
SampleValues {
alloc_size,
interrupt_count,
wall_time,
cpu_time,
alloc_samples,
alloc_size,
..Default::default()
},
labels,
NO_TIMESTAMP,
timestamp,
) {
Ok(_) => trace!(
"Sent stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, and {alloc_samples} allocations to profiler."
"Sent stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, {alloc_samples} allocations, and {interrupt_count} time interrupts to profiler."
),
Err(err) => warn!(
"Failed to send stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, and {alloc_samples} allocations to profiler: {err}"
Expand Down Expand Up @@ -1005,16 +1014,7 @@ impl Profiler {

let n_labels = labels.len();

let mut timestamp = NO_TIMESTAMP;
{
let system_settings = self.system_settings.load(Ordering::SeqCst);
// SAFETY: system settings are stable during a request.
if unsafe { *ptr::addr_of!((*system_settings).profiling_timeline_enabled) } {
if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) {
timestamp = now.as_nanos() as i64;
}
}
}
let timestamp = self.get_timeline_timestamp();

match self.prepare_and_send_message(
frames,
Expand Down Expand Up @@ -1425,6 +1425,22 @@ impl Profiler {
}
}

/// Gets a timestamp for timeline profiling if timeline is enabled.
/// Returns NO_TIMESTAMP if timeline is disabled or if getting the time fails.
fn get_timeline_timestamp(&self) -> i64 {
let system_settings = self.system_settings.load(Ordering::SeqCst);
// SAFETY: system settings are stable during a request.
if unsafe { *ptr::addr_of!((*system_settings).profiling_timeline_enabled) } {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.ok()
.map(|now| now.as_nanos() as i64)
.unwrap_or(NO_TIMESTAMP)
} else {
NO_TIMESTAMP
}
}

/// Creates the common message labels for all samples.
///
/// * `n_extra_labels` - Reserve room for extra labels, such as when the
Expand Down
3 changes: 1 addition & 2 deletions profiling/src/profiling/uploader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,11 @@ impl Uploader {
}
}

#[cfg(test)]
#[cfg(all(test, feature = "debug_stats"))]
mod tests {
use super::*;

#[test]
#[cfg(feature = "debug_stats")]
fn test_create_internal_metadata() {
// Set up all counters with known values
EXCEPTION_PROFILING_EXCEPTION_COUNT.store(42, Ordering::Relaxed);
Expand Down
54 changes: 54 additions & 0 deletions profiling/tests/correctness/allocation_time_combined.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"args": {
"DD_PROFILING_ENABLED": "yes",
"DD_PROFILING_ALLOCATION_ENABLED": "yes",
"DD_PROFILING_WALL_TIME_ENABLED": "yes",
"DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED": "no",
"DD_PROFILING_ENDPOINT_COLLECTION_ENABLED": "no",
"DD_PROFILING_LOG_LEVEL": "debug",
"EXECUTION_TIME": "5"
},
"flaky": false,
"skip-asan": false,
"todo": "",
"checks": [
{
"type": "regex",
"path": "$.sample_types[?(@.type=='samples')].unit",
"match": "^count$"
},
{
"type": "regex",
"path": "$.sample_types[?(@.type=='wall-time')].unit",
"match": "^nanoseconds$"
},
{
"type": "regex",
"path": "$.sample_types[?(@.type=='alloc-samples')].unit",
"match": "^count$"
},
{
"type": "regex",
"path": "$.sample_types[?(@.type=='alloc-space')].unit",
"match": "^bytes$"
},
{
"comment": "Verify we have samples with both allocation and time data (piggybacked samples)",
"type": "jq-minimum",
"path": ". as $root | (.sample_types | to_entries | map({(.value.type): .key}) | add) as $indices | .samples | map(select(.values[$indices.samples] > 0 and .values[$indices.\"wall-time\"] > 0 and .values[$indices.\"alloc-samples\"] > 0 and .values[$indices.\"alloc-space\"] > 0)) | length",
"match": 1
},
{
"comment": "Verify allocate_large appears in the profile",
"type": "jq-minimum",
"path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"allocate_large\")))) | length",
"match": 1
},
{
"comment": "Verify allocate_medium appears in the profile",
"type": "jq-minimum",
"path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"allocate_medium\")))) | length",
"match": 1
}
]
}
37 changes: 37 additions & 0 deletions profiling/tests/correctness/allocation_time_combined.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php

// Test to verify that allocation and time profiling can coexist and that
// the piggybacking optimization works (where time samples are taken during
// allocation samples when interrupt_count > 0).
//
// Note: Piggybacking is opportunistic and happens when an allocation sample
// occurs while a time interrupt is pending. PHP checks interrupts frequently
// (at loop boundaries, function calls, etc.), so the window is narrow. This
// test just verifies the mechanism works, not that it happens often.

function allocate_large() {
// Large allocation to trigger sampling
$data = str_repeat("a", 1024 * 200_000); // ~200MB
return strlen($data);
}

function allocate_medium() {
// Medium allocation to trigger sampling
$data = str_repeat("b", 1024 * 100_000); // ~100MB
return strlen($data);
}

function main() {
$duration = $_ENV["EXECUTION_TIME"] ?? 10;
$end = microtime(true) + $duration;

while (microtime(true) < $end) {
// Just allocate repeatedly. Over time, some allocations will happen
// to occur while a time interrupt is pending (set by the profiler
// thread every ~10ms), exercising the piggybacking code path.
allocate_large();
allocate_medium();
}
}

main();
Loading