Skip to content

Commit de8936c

Browse files
committed
Make error handling more robust.
1 parent 23b8570 commit de8936c

File tree

7 files changed

+161
-61
lines changed

7 files changed

+161
-61
lines changed

perfrecord/src/error.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
use crate::kernel_error::KernelError;
2+
use thiserror::Error;
3+
4+
#[derive(Debug, Clone, Error)]
5+
pub enum SamplingError {
6+
#[error("Fatal error encountered during sampling: {0}, {1}")]
7+
Fatal(&'static str, KernelError),
8+
9+
#[error("Ignorable error encountered during sampling: {0}, {1}")]
10+
Ignorable(&'static str, KernelError),
11+
12+
#[error("The target thread has probably been terminated. {0}, {1}")]
13+
ThreadTerminated(&'static str, KernelError),
14+
15+
#[error("The target process has probably been terminated. {0}, {1}")]
16+
ProcessTerminated(&'static str, KernelError),
17+
18+
#[error("Could not obtain root task.")]
19+
CouldNotObtainRootTask,
20+
}

perfrecord/src/kernel_error.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ impl IntoResult for kern_return_t {
2929

3030
pub type Result<T> = std::result::Result<T, KernelError>;
3131

32-
#[derive(Error, Debug, PartialEq, Eq)]
32+
#[derive(Error, Debug, Clone, PartialEq, Eq)]
3333
pub enum KernelError {
3434
#[error("Specified address is not currently valid.")]
3535
InvalidAddress,

perfrecord/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use structopt::StructOpt;
1414
#[allow(deref_nullptr)]
1515
mod dyld_bindings;
1616

17+
mod error;
1718
mod proc_maps;
1819
mod process_launcher;
1920
mod sampler;

perfrecord/src/proc_maps.rs

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use super::kernel_error::{self, IntoResult};
21
use mach::message::mach_msg_type_number_t;
32
use mach::port::mach_port_t;
43
use mach::task::{task_info, task_resume, task_suspend};
@@ -21,7 +20,8 @@ use uuid::Uuid;
2120
use mach::{structs::x86_thread_state64_t, thread_status::x86_THREAD_STATE64};
2221

2322
use crate::dyld_bindings;
24-
use crate::kernel_error::KernelError;
23+
use crate::error::SamplingError;
24+
use crate::kernel_error::{self, IntoResult, KernelError};
2525
use dyld_bindings::{
2626
dyld_all_image_infos, dyld_image_info, load_command, mach_header_64, segment_command_64,
2727
uuid_command,
@@ -355,24 +355,43 @@ fn get_unwinding_registers(thread_act: mach_port_t) -> kernel_error::Result<(u64
355355

356356
fn with_suspended_thread<R>(
357357
thread_act: mach_port_t,
358-
f: impl FnOnce() -> kernel_error::Result<R>,
358+
f: impl FnOnce() -> R,
359359
) -> kernel_error::Result<R> {
360360
unsafe { thread_suspend(thread_act) }.into_result()?;
361361
let result = f();
362362
let _ = unsafe { thread_resume(thread_act) };
363-
result
363+
Ok(result)
364364
}
365365

366366
pub fn get_backtrace(
367367
memory: &mut ForeignMemory,
368368
thread_act: mach_port_t,
369369
frames: &mut Vec<u64>,
370-
) -> kernel_error::Result<()> {
370+
) -> Result<(), SamplingError> {
371371
with_suspended_thread(thread_act, || {
372-
let (ip, bp) = get_unwinding_registers(thread_act)?;
372+
let (ip, bp) = get_unwinding_registers(thread_act).map_err(|err| match err {
373+
KernelError::InvalidArgument
374+
| KernelError::MachSendInvalidDest
375+
| KernelError::Terminated => {
376+
SamplingError::ThreadTerminated("thread_get_state in get_unwinding_registers", err)
377+
}
378+
err => SamplingError::Ignorable("thread_get_state in get_unwinding_registers", err),
379+
})?;
373380
do_frame_pointer_stackwalk(ip, bp, memory, frames);
374381
Ok(())
375382
})
383+
.unwrap_or_else(|err| match err {
384+
KernelError::InvalidArgument
385+
| KernelError::MachSendInvalidDest
386+
| KernelError::Terminated => Err(SamplingError::ThreadTerminated(
387+
"thread_suspend in with_suspended_thread",
388+
err,
389+
)),
390+
err => Err(SamplingError::Ignorable(
391+
"thread_suspend in with_suspended_thread",
392+
err,
393+
)),
394+
})
376395
}
377396

378397
#[cfg(target_arch = "aarch64")]

perfrecord/src/sampler.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
use super::kernel_error;
2-
use super::task_profiler::TaskProfiler;
1+
use crate::error::SamplingError;
2+
use crate::task_profiler::TaskProfiler;
33
use crossbeam_channel::Receiver;
44
use gecko_profile::ProfileBuilder;
55
use std::mem;
@@ -35,13 +35,13 @@ impl Sampler {
3535
}
3636
}
3737

38-
pub fn run(mut self) -> kernel_error::Result<ProfileBuilder> {
38+
pub fn run(mut self) -> Result<ProfileBuilder, SamplingError> {
3939
let root_task = match self.task_receiver.recv() {
4040
Ok(task) => task,
4141
Err(_) => {
4242
// The sender went away. No profiling today.
4343
eprintln!("The process we launched did not give us a task port. This commonly happens when trying to profile signed executables (system apps, system python, ...), because those ignore DYLD_INSERT_LIBRARIES (and stop it from inheriting into child processes). For now, this profiler can only be used on unsigned binaries.");
44-
return Err(kernel_error::KernelError::MachRcvPortDied);
44+
return Err(SamplingError::CouldNotObtainRootTask);
4545
}
4646
};
4747

perfrecord/src/task_profiler.rs

Lines changed: 47 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
use super::kernel_error::{self, IntoResult, KernelError};
2-
use super::proc_maps::{DyldInfo, DyldInfoManager, Modification};
3-
use super::thread_profiler::ThreadProfiler;
1+
use crate::error::SamplingError;
2+
use crate::kernel_error::{IntoResult, KernelError};
3+
use crate::proc_maps::{DyldInfo, DyldInfoManager, Modification};
4+
use crate::thread_profiler::ThreadProfiler;
45
use gecko_profile::debugid::DebugId;
56
use mach::mach_types::thread_act_port_array_t;
67
use mach::mach_types::thread_act_t;
@@ -31,6 +32,7 @@ pub struct TaskProfiler {
3132
libs: Vec<DyldInfo>,
3233
executable_lib: Option<DyldInfo>,
3334
command_name: String,
35+
ignored_errors: Vec<SamplingError>,
3436
}
3537

3638
impl TaskProfiler {
@@ -41,17 +43,17 @@ impl TaskProfiler {
4143
now_system: SystemTime,
4244
command_name: &str,
4345
interval: Duration,
44-
) -> kernel_error::Result<Self> {
45-
let thread_acts = get_thread_list(task)?;
46+
) -> Option<Self> {
47+
let thread_acts = get_thread_list(task).ok()?;
4648
let mut live_threads = HashMap::new();
4749
for (i, thread_act) in thread_acts.into_iter().enumerate() {
4850
// Pretend that the first thread is the main thread. Might not be true.
4951
let is_main = i == 0;
50-
if let Some(thread) = ThreadProfiler::new(task, pid, thread_act, now, is_main)? {
52+
if let Some(thread) = ThreadProfiler::new(task, pid, thread_act, now, is_main) {
5153
live_threads.insert(thread_act, thread);
5254
}
5355
}
54-
Ok(TaskProfiler {
56+
Some(TaskProfiler {
5557
task,
5658
pid,
5759
interval,
@@ -64,20 +66,34 @@ impl TaskProfiler {
6466
libs: Vec::new(),
6567
command_name: command_name.to_owned(),
6668
executable_lib: None,
69+
ignored_errors: Vec::new(),
6770
})
6871
}
6972

70-
pub fn sample(&mut self, now: Instant) -> kernel_error::Result<bool> {
73+
pub fn sample(&mut self, now: Instant) -> Result<bool, SamplingError> {
7174
let result = self.sample_impl(now);
7275
match result {
7376
Ok(()) => Ok(true),
74-
Err(KernelError::MachSendInvalidDest) => Ok(false),
75-
Err(KernelError::Terminated) => Ok(false),
77+
Err(SamplingError::ProcessTerminated(_, _)) => Ok(false),
78+
Err(err @ SamplingError::Ignorable(_, _)) => {
79+
self.ignored_errors.push(err);
80+
if self.ignored_errors.len() >= 10 {
81+
println!(
82+
"Treating process \"{}\" [pid: {}] as terminated after 10 unknown errors:",
83+
self.command_name, self.pid
84+
);
85+
println!("{:#?}", self.ignored_errors);
86+
Ok(false)
87+
} else {
88+
// Pretend that sampling worked and that the thread is still alive.
89+
Ok(true)
90+
}
91+
}
7692
Err(err) => Err(err),
7793
}
7894
}
7995

80-
fn sample_impl(&mut self, now: Instant) -> kernel_error::Result<()> {
96+
fn sample_impl(&mut self, now: Instant) -> Result<(), SamplingError> {
8197
// First, check for any newly-loaded libraries.
8298
let changes = self
8399
.lib_info_manager
@@ -99,10 +115,7 @@ impl TaskProfiler {
99115
}
100116

101117
// Enumerate threads.
102-
let thread_acts = get_thread_list(self.task).map_err(|err| match err {
103-
KernelError::InvalidArgument => KernelError::Terminated,
104-
err => err,
105-
})?;
118+
let thread_acts = get_thread_list(self.task)?;
106119
let previously_live_threads: HashSet<_> =
107120
self.live_threads.iter().map(|(t, _)| *t).collect();
108121
let mut now_live_threads = HashSet::new();
@@ -111,9 +124,12 @@ impl TaskProfiler {
111124
let thread = match entry {
112125
Entry::Occupied(ref mut entry) => entry.get_mut(),
113126
Entry::Vacant(entry) => {
114-
match ThreadProfiler::new(self.task, self.pid, thread_act, now, false)? {
115-
Some(thread) => entry.insert(thread),
116-
None => continue,
127+
if let Some(thread) =
128+
ThreadProfiler::new(self.task, self.pid, thread_act, now, false)
129+
{
130+
entry.insert(thread)
131+
} else {
132+
continue;
117133
}
118134
}
119135
};
@@ -209,10 +225,19 @@ impl TaskProfiler {
209225
}
210226
}
211227

212-
fn get_thread_list(task: mach_port_t) -> kernel_error::Result<Vec<thread_act_t>> {
228+
fn get_thread_list(task: mach_port_t) -> Result<Vec<thread_act_t>, SamplingError> {
213229
let mut thread_list: thread_act_port_array_t = std::ptr::null_mut();
214230
let mut thread_count: mach_msg_type_number_t = Default::default();
215-
unsafe { task_threads(task, &mut thread_list, &mut thread_count) }.into_result()?;
231+
unsafe { task_threads(task, &mut thread_list, &mut thread_count) }
232+
.into_result()
233+
.map_err(|err| match err {
234+
KernelError::InvalidArgument
235+
| KernelError::MachSendInvalidDest
236+
| KernelError::Terminated => {
237+
SamplingError::ProcessTerminated("task_threads in get_thread_list", err)
238+
}
239+
err => SamplingError::Ignorable("task_threads in get_thread_list", err),
240+
})?;
216241

217242
let thread_acts =
218243
unsafe { std::slice::from_raw_parts(thread_list, thread_count as usize) }.to_owned();
@@ -224,7 +249,8 @@ fn get_thread_list(task: mach_port_t) -> kernel_error::Result<Vec<thread_act_t>>
224249
(thread_count as usize * mem::size_of::<thread_act_t>()) as mach_vm_size_t,
225250
)
226251
}
227-
.into_result()?;
252+
.into_result()
253+
.map_err(|err| SamplingError::Fatal("mach_vm_deallocate in get_thread_list", err))?;
228254

229255
Ok(thread_acts)
230256
}

0 commit comments

Comments
 (0)