Skip to content

Commit 31266e0

Browse files
feat: parse perf file for memmap events instead of relying on /proc/pid/maps
1 parent 0207835 commit 31266e0

File tree

4 files changed

+170
-112
lines changed

4 files changed

+170
-112
lines changed

Cargo.lock

Lines changed: 11 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ path = "src/main.rs"
1111

1212

1313
[dependencies]
14-
anyhow = "1.0.75"
14+
anyhow = { workspace = true }
1515
clap = { workspace = true }
1616
itertools = "0.11.0"
1717
lazy_static = "1.4.0"
@@ -26,8 +26,8 @@ reqwest = { version = "0.11.22", features = [
2626
] }
2727
reqwest-middleware = "0.2.4"
2828
reqwest-retry = "0.3.0"
29-
serde = { version = "1.0.192", features = ["derive"] }
30-
serde_json = { version = "1.0.108", features = ["preserve_order"] }
29+
serde = { workspace = true }
30+
serde_json = { workspace = true }
3131
url = "2.4.1"
3232
sha256 = "1.4.0"
3333
tokio = { version = "1", features = ["macros", "rt"] }
@@ -51,10 +51,11 @@ async-trait = "0.1.82"
5151
libc = "0.2.171"
5252
bincode = "1.3.3"
5353
object = "0.36.7"
54-
linux-perf-data = "0.11.0"
54+
# TODO: Make this repo public
55+
linux-perf-data = { git = "ssh://[email protected]/CodSpeedHQ/linux-perf-data.git", branch = "feat/support-perf-pipe-data-parsing" }
5556
debugid = "0.8.0"
5657
memmap2 = "0.9.5"
57-
nix = { version = "0.29.0", features = ["fs", "time", "user"] }
58+
nix = { workspace = true, features = ["fs", "time", "user"] }
5859
futures = "0.3.31"
5960
runner-shared = { path = "crates/runner-shared" }
6061
shellexpand = { version = "3.1.1", features = ["tilde"] }
@@ -79,7 +80,11 @@ shell-quote = "0.7.2"
7980
members = ["crates/exec-harness", "crates/runner-shared"]
8081

8182
[workspace.dependencies]
83+
anyhow = "1.0.75"
8284
clap = { version = "4.4.8", features = ["derive", "env", "color"] }
85+
nix = "0.29.0"
86+
serde = { version = "1.0.192", features = ["derive"] }
87+
serde_json = { version = "1.0.108", features = ["preserve_order"] }
8388

8489
[workspace.metadata.release]
8590
sign-tag = true

src/executor/wall_time/perf/mod.rs

Lines changed: 28 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,26 @@ use crate::executor::valgrind::helpers::perf_maps::harvest_perf_maps_for_pids;
1111
use crate::executor::wall_time::perf::debug_info::ProcessDebugInfo;
1212
use crate::executor::wall_time::perf::jit_dump::harvest_perf_jit_for_pids;
1313
use crate::executor::wall_time::perf::perf_executable::get_working_perf_executable;
14-
use crate::executor::wall_time::perf::unwind_data::UnwindDataExt;
1514
use crate::prelude::*;
1615
use crate::run::UnwindingMode;
1716
use anyhow::Context;
1817
use fifo::{PerfFifo, RunnerFifo};
1918
use libc::pid_t;
2019
use nix::sys::time::TimeValLike;
2120
use nix::time::clock_gettime;
22-
use perf_map::ProcessSymbols;
21+
use parse_perf_file::MemmapRecordsOutput;
2322
use runner_shared::debug_info::ModuleDebugInfo;
2423
use runner_shared::fifo::Command as FifoCommand;
2524
use runner_shared::fifo::MarkerType;
2625
use runner_shared::metadata::PerfMetadata;
27-
use runner_shared::unwind_data::UnwindData;
2826
use std::collections::HashSet;
2927
use std::path::Path;
28+
use std::path::PathBuf;
3029
use std::time::Duration;
3130
use std::{cell::OnceCell, collections::HashMap, process::ExitStatus};
3231

3332
mod jit_dump;
33+
mod parse_perf_file;
3434
mod setup;
3535

3636
pub mod debug_info;
@@ -146,8 +146,8 @@ impl PerfRunner {
146146
]);
147147
cmd_builder.wrap_with(perf_wrapper_builder);
148148

149-
// Copy the perf data to the profile folder
150-
let perf_data_file_path = profile_folder.join(PERF_DATA_FILE_NAME);
149+
// Output the perf data to the profile folder
150+
let perf_data_file_path = get_perf_file_path(profile_folder);
151151

152152
let raw_command = format!(
153153
"set -o pipefail && {} | cat > {}",
@@ -203,84 +203,12 @@ impl PerfRunner {
203203
Ok(())
204204
}
205205

206-
#[cfg(target_os = "linux")]
207-
fn process_memory_mappings(
208-
pid: pid_t,
209-
symbols_by_pid: &mut HashMap<pid_t, ProcessSymbols>,
210-
unwind_data_by_pid: &mut HashMap<pid_t, Vec<UnwindData>>,
211-
) -> anyhow::Result<()> {
212-
use procfs::process::MMPermissions;
213-
214-
let bench_proc =
215-
procfs::process::Process::new(pid as _).expect("Failed to find benchmark process");
216-
let exe_maps = bench_proc.maps().expect("Failed to read /proc/{pid}/maps");
217-
218-
debug!("Process memory mappings for PID {pid}:");
219-
for map in exe_maps.iter().sorted_by_key(|m| m.address.0) {
220-
let (base_addr, end_addr) = map.address;
221-
debug!(
222-
" {:016x}-{:016x} {:08x} {:?} {:?} ",
223-
base_addr, end_addr, map.offset, map.pathname, map.perms,
224-
);
225-
}
226-
227-
for map in &exe_maps {
228-
let page_offset = map.offset;
229-
let (base_addr, end_addr) = map.address;
230-
let path = match &map.pathname {
231-
procfs::process::MMapPath::Path(path) => Some(path.clone()),
232-
_ => None,
233-
};
234-
235-
let Some(path) = &path else {
236-
if map.perms.contains(MMPermissions::EXECUTE) {
237-
debug!("Found executable mapping without path: {base_addr:x} - {end_addr:x}");
238-
}
239-
continue;
240-
};
241-
242-
if !map.perms.contains(MMPermissions::EXECUTE) {
243-
continue;
244-
}
245-
246-
symbols_by_pid
247-
.entry(pid)
248-
.or_insert(ProcessSymbols::new(pid))
249-
.add_mapping(pid, path, base_addr, end_addr, map.offset);
250-
debug!("Added mapping for module {path:?}");
251-
252-
match UnwindData::new(
253-
path.to_string_lossy().as_bytes(),
254-
page_offset,
255-
base_addr,
256-
end_addr,
257-
None,
258-
) {
259-
Ok(unwind_data) => {
260-
unwind_data_by_pid.entry(pid).or_default().push(unwind_data);
261-
debug!("Added unwind data for {path:?} ({base_addr:x} - {end_addr:x})");
262-
}
263-
Err(error) => {
264-
debug!(
265-
"Failed to create unwind data for module {}: {}",
266-
path.display(),
267-
error
268-
);
269-
}
270-
}
271-
}
272-
273-
Ok(())
274-
}
275-
276206
async fn handle_fifo(
277207
mut runner_fifo: RunnerFifo,
278208
mut perf_fifo: PerfFifo,
279209
) -> anyhow::Result<BenchmarkData> {
280210
let mut bench_order_by_timestamp = Vec::<(u64, String)>::new();
281211
let mut bench_pids = HashSet::<pid_t>::new();
282-
let mut symbols_by_pid = HashMap::<pid_t, ProcessSymbols>::new();
283-
let mut unwind_data_by_pid = HashMap::<pid_t, Vec<UnwindData>>::new();
284212
let mut markers = Vec::<MarkerType>::new();
285213

286214
let mut integration = None;
@@ -317,19 +245,9 @@ impl PerfRunner {
317245

318246
match cmd {
319247
FifoCommand::CurrentBenchmark { pid, uri } => {
320-
bench_order_by_timestamp.push((current_time(), uri));
248+
bench_order_by_timestamp.push((current_time(), uri.clone()));
321249
bench_pids.insert(pid);
322250

323-
#[cfg(target_os = "linux")]
324-
if !symbols_by_pid.contains_key(&pid) && !unwind_data_by_pid.contains_key(&pid)
325-
{
326-
Self::process_memory_mappings(
327-
pid,
328-
&mut symbols_by_pid,
329-
&mut unwind_data_by_pid,
330-
)?;
331-
}
332-
333251
runner_fifo.send_cmd(FifoCommand::Ack).await?;
334252
}
335253
FifoCommand::StartBenchmark => {
@@ -398,8 +316,6 @@ impl PerfRunner {
398316
integration,
399317
uri_by_ts: bench_order_by_timestamp,
400318
bench_pids,
401-
symbols_by_pid,
402-
unwind_data_by_pid,
403319
markers,
404320
})
405321
}
@@ -411,35 +327,45 @@ pub struct BenchmarkData {
411327

412328
uri_by_ts: Vec<(u64, String)>,
413329
bench_pids: HashSet<pid_t>,
414-
symbols_by_pid: HashMap<pid_t, ProcessSymbols>,
415-
unwind_data_by_pid: HashMap<pid_t, Vec<UnwindData>>,
416330
markers: Vec<MarkerType>,
417331
}
418332

419333
#[derive(Debug)]
420334
pub enum BenchmarkDataSaveError {
421335
MissingIntegration,
336+
FailedToParsePerfFile,
422337
}
423338

424339
impl BenchmarkData {
425340
pub fn save_to<P: AsRef<std::path::Path>>(
426341
&self,
427342
path: P,
428343
) -> Result<(), BenchmarkDataSaveError> {
429-
for proc_sym in self.symbols_by_pid.values() {
344+
debug!("Reading perf data from file for mmap extraction");
345+
let perf_file_path = get_perf_file_path(&path);
346+
347+
let MemmapRecordsOutput {
348+
symbols_by_pid,
349+
unwind_data_by_pid,
350+
} = parse_perf_file::parse_for_memmap2(&perf_file_path).map_err(|e| {
351+
error!("Failed to parse perf file: {e}");
352+
BenchmarkDataSaveError::FailedToParsePerfFile
353+
})?;
354+
355+
for proc_sym in symbols_by_pid.values() {
430356
proc_sym.save_to(&path).unwrap();
431357
}
432358

433359
// Collect debug info for each process by looking up file/line for symbols
434360
let mut debug_info_by_pid = HashMap::<i32, Vec<ModuleDebugInfo>>::new();
435-
for (pid, proc_sym) in &self.symbols_by_pid {
361+
for (pid, proc_sym) in &symbols_by_pid {
436362
debug_info_by_pid
437363
.entry(*pid)
438364
.or_default()
439365
.extend(ProcessDebugInfo::new(proc_sym).modules());
440366
}
441367

442-
for (pid, modules) in &self.unwind_data_by_pid {
368+
for (pid, modules) in &unwind_data_by_pid {
443369
for module in modules {
444370
module.save_to(&path, *pid).unwrap();
445371
}
@@ -457,7 +383,7 @@ impl BenchmarkData {
457383

458384
// Check if any of the ignored modules has been loaded in the process
459385
for ignore_path in get_objects_path_to_ignore() {
460-
for proc in self.symbols_by_pid.values() {
386+
for proc in symbols_by_pid.values() {
461387
if let Some(mapping) = proc.module_mapping(&ignore_path) {
462388
let (Some((base_addr, _)), Some((_, end_addr))) = (
463389
mapping.iter().min_by_key(|(base_addr, _)| base_addr),
@@ -472,16 +398,15 @@ impl BenchmarkData {
472398
}
473399

474400
// When python is statically linked, we'll not find it in the ignored modules. Add it manually:
475-
let python_modules = self.symbols_by_pid.values().filter_map(|proc| {
401+
let python_modules = symbols_by_pid.values().filter_map(|proc| {
476402
proc.loaded_modules().find(|path| {
477403
path.file_name()
478404
.map(|name| name.to_string_lossy().starts_with("python"))
479405
.unwrap_or(false)
480406
})
481407
});
482408
for path in python_modules {
483-
if let Some(mapping) = self
484-
.symbols_by_pid
409+
if let Some(mapping) = symbols_by_pid
485410
.values()
486411
.find_map(|proc| proc.module_mapping(path))
487412
{
@@ -505,3 +430,7 @@ impl BenchmarkData {
505430
Ok(())
506431
}
507432
}
433+
434+
fn get_perf_file_path<P: AsRef<Path>>(profile_folder: P) -> PathBuf {
435+
profile_folder.as_ref().join(PERF_DATA_FILE_NAME)
436+
}

0 commit comments

Comments
 (0)