Skip to content

Commit 02cfa8b

Browse files
committed
fix: use shared elf_helper for unwind and symbol information
1 parent 4c990fd commit 02cfa8b

15 files changed

+6168
-2841
lines changed

.gitattributes

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
testdata/perf_map/cpp_my_benchmark.bin filter=lfs diff=lfs merge=lfs -text
22
testdata/perf_map/go_fib.bin filter=lfs diff=lfs merge=lfs -text
33
testdata/perf_map/divan_sleep_benches.bin filter=lfs diff=lfs merge=lfs -text
4+
testdata/perf_map/the_algorithms.bin filter=lfs diff=lfs merge=lfs -text
5+
src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__perf_map__tests__ruff_symbols.snap filter=lfs diff=lfs merge=lfs -text
6+
testdata/perf_map/ty_walltime filter=lfs diff=lfs merge=lfs -text
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
//! Based on this: https://github.com/mstange/samply/blob/4a5afec57b7c68b37ecde12b5a258de523e89463/samply/src/linux_shared/svma_file_range.rs#L8
2+
3+
use anyhow::Context;
4+
use object::Object;
5+
use object::ObjectSegment;
6+
7+
// A file range in an object file, such as a segment or a section,
8+
// for which we know the corresponding Stated Virtual Memory Address (SVMA).
9+
struct SvmaFileRange {
10+
pub svma: u64,
11+
pub file_offset: u64,
12+
pub size: u64,
13+
}
14+
15+
impl SvmaFileRange {
16+
pub fn from_segment<'data, S: ObjectSegment<'data>>(segment: S) -> Self {
17+
let svma = segment.address();
18+
let (file_offset, size) = segment.file_range();
19+
SvmaFileRange {
20+
svma,
21+
file_offset,
22+
size,
23+
}
24+
}
25+
26+
pub fn encompasses_file_range(&self, runtime_file_offset: u64, mapping_size: u64) -> bool {
27+
self.file_offset <= runtime_file_offset
28+
&& (runtime_file_offset + mapping_size) <= (self.file_offset + self.size)
29+
}
30+
31+
pub fn is_encompassed_by_file_range(
32+
&self,
33+
runtime_file_offset: u64,
34+
mapping_size: u64,
35+
) -> bool {
36+
runtime_file_offset <= self.file_offset
37+
&& (self.file_offset + self.size) <= (runtime_file_offset + mapping_size)
38+
}
39+
}
40+
41+
pub fn compute_load_bias(
42+
runtime_start_addr: u64,
43+
runtime_end_addr: u64,
44+
runtime_file_offset: u64,
45+
object: &object::File,
46+
) -> anyhow::Result<u64> {
47+
// The addresses of symbols read from an ELF file on disk are not their final runtime addresses.
48+
// This is due to Address Space Layout Randomization (ASLR) and the way the OS loader maps
49+
// file segments into virtual memory.
50+
//
51+
// Step 1: Find the corresponding ELF segment.
52+
// We must find the `PT_LOAD` segment that corresponds to the executable memory region we found
53+
// in /proc/<pid>/maps. We do this by comparing the `runtime_offset` against the offset in the file.
54+
//
55+
// For example, if we have the following `/proc/<pid>/maps` output:
56+
// ```
57+
// 00400000-00402000 r--p 00000000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
58+
// 00402000-0050f000 r-xp 00002000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin <-- we find this
59+
// 0050f000-0064b000 r--p 0010f000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
60+
// 0064b000-0064c000 r--p 0024a000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
61+
// 0064c000-0065e000 rw-p 0024b000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
62+
// 0065e000-00684000 rw-p 00000000 00:00 0
63+
// ```
64+
//
65+
// We'll match the PT_LOAD segment with the same offset (0x2000):
66+
// ```
67+
// $ readelf -l testdata/perf_map/go_fib.bin
68+
// Elf file type is EXEC (Executable file)
69+
// Entry point 0x402490
70+
// There are 15 program headers, starting at offset 64
71+
//
72+
// Program Headers:
73+
// Type Offset VirtAddr PhysAddr
74+
// PHDR 0x0000000000000040 0x0000000000400040 0x0000000000400040
75+
// 0x0000000000000348 0x0000000000000348 R 0x8
76+
// INTERP 0x0000000000000430 0x0000000000400430 0x0000000000400430
77+
// 0x0000000000000053 0x0000000000000053 R 0x1
78+
// LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000
79+
// 0x0000000000001640 0x0000000000001640 R 0x1000
80+
// LOAD 0x0000000000002000 0x0000000000402000 0x0000000000402000 <-- we'll match this
81+
// 0x000000000010ceb1 0x000000000010ceb1 R E 0x1000
82+
// ```
83+
let mapping_size = runtime_end_addr - runtime_start_addr;
84+
let load_segment = object
85+
.segments()
86+
.map(SvmaFileRange::from_segment)
87+
.find(|segment| {
88+
// When the kernel loads an ELF file, it maps entire pages (usually 4KB aligned),
89+
// not just the exact segment boundaries. Here's what happens:
90+
//
91+
// **ELF File Structure**:
92+
// - LOAD segment 1: file offset 0x0 - 0x4d26a (data/code)
93+
// - LOAD segment 2: file offset 0x4d26c - 0x13c4b6 (executable code)
94+
//
95+
// **Kernel Memory Mapping**: The kernel rounds down to page boundaries when mapping:
96+
// - Maps pages starting at offset 0x0 (covers segment 1)
97+
// - Maps pages starting at offset 0x4d000 (page-aligned, covers segment 2)
98+
//
99+
// (the example values are based on the `test_rust_divan_symbols` test)
100+
segment.encompasses_file_range(runtime_file_offset, mapping_size)
101+
|| segment.is_encompassed_by_file_range(runtime_file_offset, mapping_size)
102+
})
103+
.context(format!(
104+
"Could not find segment or section overlapping the file offset range 0x{:x}..0x{:x}",
105+
runtime_file_offset,
106+
runtime_file_offset + mapping_size
107+
))?;
108+
109+
// Compute the actual virtual address at which the segment is located in process memory.
110+
let runtime_start_addr = if load_segment.file_offset > runtime_file_offset {
111+
runtime_start_addr + (load_segment.file_offset - runtime_file_offset)
112+
} else {
113+
runtime_start_addr - (runtime_file_offset - load_segment.file_offset)
114+
};
115+
116+
// Step 2: Calculate the "load bias".
117+
// The bias is the difference between where the segment *actually* is in memory versus where the
118+
// ELF file *preferred* it to be.
119+
//
120+
// load_bias = runtime_start_addr - segment_preferred_vaddr
121+
//
122+
// - `runtime_start_addr`: The actual base address of this segment in memory (from `/proc/maps`).
123+
// - `load_segment.address()`: The preferred virtual address (`p_vaddr`) from the ELF file itself.
124+
//
125+
// This single calculation correctly handles both PIE/shared-objects and non-PIE executables:
126+
// - For PIE/.so files: `0x7f... (random) - 0x... (small) = <large_bias>`
127+
// - For non-PIE files: `0x402000 (fixed) - 0x402000 (fixed) = 0`
128+
Ok(runtime_start_addr.wrapping_sub(load_segment.svma))
129+
}
130+
131+
/// The "relative address base" is the base address which [`LookupAddress::Relative`]
132+
/// addresses are relative to. You start with an SVMA (a stated virtual memory address),
133+
/// you subtract the relative address base, and out comes a relative address.
134+
///
135+
/// This function computes that base address. It is defined as follows:
136+
///
137+
/// - For Windows binaries, the base address is the "image base address".
138+
/// - For mach-O binaries, the base address is the vmaddr of the __TEXT segment.
139+
/// - For ELF binaries, the base address is the vmaddr of the *first* segment,
140+
/// i.e. the vmaddr of the first "LOAD" ELF command.
141+
///
142+
/// In many cases, this base address is simply zero:
143+
///
144+
/// - ELF images of dynamic libraries (i.e. not executables) usually have a
145+
/// base address of zero.
146+
/// - Stand-alone mach-O dylibs usually have a base address of zero because their
147+
/// __TEXT segment is at address zero.
148+
/// - In PDBs, "RVAs" are relative addresses which are already relative to the
149+
/// image base.
150+
///
151+
/// However, in the following cases, the base address is usually non-zero:
152+
///
153+
/// - The "image base address" of Windows binaries is usually non-zero.
154+
/// - mach-O executable files (not dylibs) usually have their __TEXT segment at
155+
/// address 0x100000000.
156+
/// - mach-O libraries in the dyld shared cache have a __TEXT segment at some
157+
/// non-zero address in the cache.
158+
/// - ELF executables can have non-zero base addresses, e.g. 0x200000 or 0x400000.
159+
/// - Kernel ELF binaries ("vmlinux") have a large base address such as
160+
/// 0xffffffff81000000. Moreover, the base address seems to coincide with the
161+
/// vmaddr of the .text section, which is readily-available in perf.data files
162+
/// (in a synthetic mapping called "[kernel.kallsyms]_text").
163+
///
164+
/// Credits: https://github.com/mstange/samply/blob/4a5afec57b7c68b37ecde12b5a258de523e89463/samply-symbols/src/shared.rs#L513-L566
165+
pub fn relative_address_base(object_file: &object::File) -> u64 {
166+
use object::read::ObjectSegment;
167+
if let Some(text_segment) = object_file
168+
.segments()
169+
.find(|s| s.name() == Ok(Some("__TEXT")))
170+
{
171+
// This is a mach-O image. "Relative addresses" are relative to the
172+
// vmaddr of the __TEXT segment.
173+
return text_segment.address();
174+
}
175+
176+
if let object::FileFlags::Elf { .. } = object_file.flags() {
177+
// This is an ELF image. "Relative addresses" are relative to the
178+
// vmaddr of the first segment (the first LOAD command).
179+
if let Some(first_segment) = object_file.segments().next() {
180+
return first_segment.address();
181+
}
182+
}
183+
184+
// For PE binaries, relative_address_base() returns the image base address.
185+
object_file.relative_address_base()
186+
}
187+
188+
pub fn compute_base_avma(
189+
runtime_start_addr: u64,
190+
runtime_end_addr: u64,
191+
runtime_file_offset: u64,
192+
object: &object::File,
193+
) -> anyhow::Result<u64> {
194+
let bias = compute_load_bias(
195+
runtime_start_addr,
196+
runtime_end_addr,
197+
runtime_file_offset,
198+
object,
199+
)?;
200+
let base_svma = relative_address_base(object);
201+
Ok(base_svma.wrapping_add(bias))
202+
}

src/run/runner/wall_time/perf/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ use std::{cell::OnceCell, collections::HashMap, process::ExitStatus};
2929
mod jit_dump;
3030
mod setup;
3131

32+
pub mod elf_helper;
3233
pub mod fifo;
3334
pub mod perf_map;
3435
pub mod unwind_data;
@@ -244,7 +245,7 @@ impl PerfRunner {
244245
path.to_string_lossy().as_bytes(),
245246
page_offset,
246247
base_addr,
247-
end_addr - base_addr,
248+
end_addr,
248249
None,
249250
) {
250251
Ok(unwind_data) => {

0 commit comments

Comments
 (0)