Skip to content

Commit 8a1de0c

Browse files
committed
Use a custom libunwind memory accessor for performance
Slowed part of the trace process is currently reading memory using ptrace. We can speed up the trace significantly - more than doubling its speed - by using a cache to provide memory data to libunwind if it has already read that memory location. Also, enable a few pragmas to get better write speed from SQLite. This gives us a small improvement.
1 parent 28dd341 commit 8a1de0c

File tree

5 files changed

+216
-62
lines changed

5 files changed

+216
-62
lines changed

allocscope-trace/src/context.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ pub struct TraceThreadContext {
3030
pub in_syscall: bool,
3131

3232
// ptrace accessors used by libunwind to access the thread.
33-
pub unwind_accessors: unwind::UPTAccessors,
33+
pub unwind_context: unwind::UPTContext,
3434
}
3535

3636
// Context relevant to the traced process.
@@ -85,7 +85,7 @@ impl<'trace_lifetime> TraceContext<'trace_lifetime> {
8585
pid,
8686
TraceThreadContext {
8787
in_syscall: false,
88-
unwind_accessors: unwind::UPTAccessors::new(pid as i32)?,
88+
unwind_context: unwind::UPTContext::new(pid as i32)?,
8989
},
9090
);
9191
}

allocscope-trace/src/hooks.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ fn collect_stack(
3636
&context.process_map,
3737
&context.symbol_index,
3838
&context.unwind_address_space,
39-
&thread_context.unwind_accessors,
39+
&thread_context.unwind_context,
4040
)
4141
}
4242

allocscope-trace/src/record.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,14 @@ impl TraceRecord {
306306

307307
let connection = rusqlite::Connection::open(filename)?;
308308

309+
// These pragmas improve write performance a bit.
310+
connection.execute_batch(
311+
"PRAGMA journal_mode = OFF;
312+
PRAGMA synchronous = 0;
313+
PRAGMA locking_mode = EXCLUSIVE;
314+
PRAGMA temp_store = MEMORY;",
315+
)?;
316+
309317
connection.execute(
310318
"CREATE TABLE IF NOT EXISTS trace (
311319
version TEXT NOT NULL,

allocscope-trace/src/unwind.rs

Lines changed: 197 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,25 @@
1919
use crate::process_map;
2020
use crate::symbol_index;
2121
use libunwind_sys;
22+
use std::collections::HashMap;
2223
use std::error::Error;
2324
use std::path;
2425

26+
// Accessors we will pass to libunwind for crawling the stack. We want
27+
// to override the 'access_mem' accessor because it is the most critical for
28+
// performance, and we can write a faster implementation than the default
29+
// _UPT_access_mem implementation.
30+
const UNWIND_ACCESSORS: libunwind_sys::unw_accessors_t = libunwind_sys::unw_accessors_t {
31+
find_proc_info: Some(libunwind_sys::_UPT_find_proc_info),
32+
put_unwind_info: Some(libunwind_sys::_UPT_put_unwind_info),
33+
get_dyn_info_list_addr: Some(libunwind_sys::_UPT_get_dyn_info_list_addr),
34+
access_mem: Some(unwind_access_mem),
35+
access_reg: Some(libunwind_sys::_UPT_access_reg),
36+
access_fpreg: Some(libunwind_sys::_UPT_access_fpreg),
37+
resume: Some(libunwind_sys::_UPT_resume),
38+
get_proc_name: Some(libunwind_sys::_UPT_get_proc_name),
39+
};
40+
2541
// An entry representing a stack frame in a stack backtrace.
2642
#[derive(Debug)]
2743
pub struct StackEntry {
@@ -44,11 +60,14 @@ pub struct AddressSpace {
4460
impl AddressSpace {
4561
// Create a new libunwind address space object.
4662
fn new(
47-
accessors: *mut libunwind_sys::unw_accessors_t,
63+
accessors: *const libunwind_sys::unw_accessors_t,
4864
byteorder: libc::c_int,
4965
) -> Result<AddressSpace, Box<dyn Error>> {
5066
unsafe {
51-
let handle = libunwind_sys::unw_create_addr_space(accessors, byteorder);
67+
let handle = libunwind_sys::unw_create_addr_space(
68+
accessors as *mut libunwind_sys::unw_accessors_t,
69+
byteorder,
70+
);
5271
if handle == std::ptr::null_mut() {
5372
Err("failure to create libunwind address space")?
5473
}
@@ -60,7 +79,7 @@ impl AddressSpace {
6079
// Create a new libunwind address space using libunwind's built-in ptrace
6180
// accessors to get at the memory in that address space.
6281
pub fn new_upt() -> Result<AddressSpace, Box<dyn Error>> {
63-
unsafe { AddressSpace::new(&mut libunwind_sys::_UPT_accessors, 0) }
82+
AddressSpace::new(&UNWIND_ACCESSORS, 0)
6483
}
6584
}
6685

@@ -74,26 +93,26 @@ impl Drop for AddressSpace {
7493
}
7594

7695
// ptrace accessors as implemented by libunwind.
77-
pub struct UPTAccessors {
96+
pub struct UPTContext {
7897
// The raw pointer to the accessor functions.
7998
handle: *mut std::ffi::c_void,
8099
}
81100

82-
impl UPTAccessors {
101+
impl UPTContext {
83102
// Create a new accessor using libunwind's built-in ptrace accessors.
84-
pub fn new(pid: i32) -> Result<UPTAccessors, Box<dyn Error>> {
103+
pub fn new(pid: i32) -> Result<UPTContext, Box<dyn Error>> {
85104
unsafe {
86105
let handle = libunwind_sys::_UPT_create(pid as i32);
87106
if handle == std::ptr::null_mut() {
88107
Err("failure to create libunwind UPT accessors")?
89108
}
90109

91-
Ok(UPTAccessors { handle })
110+
Ok(UPTContext { handle })
92111
}
93112
}
94113
}
95114

96-
impl Drop for UPTAccessors {
115+
impl Drop for UPTContext {
97116
// Destroy the accessors when dropped.
98117
fn drop(&mut self) {
99118
unsafe {
@@ -102,70 +121,189 @@ impl Drop for UPTAccessors {
102121
}
103122
}
104123

105-
// Collect the current stack from a stopped traced thread using libunwind.
106-
pub fn collect_stack(
124+
// A context for crawling the stack. We can speed up stack crawling
125+
// significantly if we cache values to minimize the number of ptrace()
126+
// calls to access another process's memory.
127+
struct CrawlContext {
128+
// A cache mapping addresses to value.
129+
cache: HashMap<u64, u64>,
130+
131+
// The most recent address read.
132+
previous_address: u64,
133+
134+
// The most recent value read.
135+
previous_value: u64,
136+
}
137+
138+
// A global context for crawling the stack is gross, but it is the most
139+
// practical option, because we want to use it from libunwind's accessor
140+
// functions. libunwind has a mechanism for passing a context pointer to
141+
// the callbacks, but if we want to use the standard _UPT callbacks for some
142+
// of the accessors then we need to pass the standard _UPT context to them.
143+
// We can't just use a wrapper, because the _UPT accessor callbacks are
144+
// reentrant. That is to say, some of the standard accessors will call our
145+
// 'access_mem' accessor with whatever context we pass them.
146+
//
147+
// So, global variable, and assume we will only ever be accessing it from one
148+
// thread.
149+
static mut CRAWL_CONTEXT: Option<CrawlContext> = None;
150+
151+
impl CrawlContext {
152+
// Create a new context with an empty cache.
153+
fn new() -> CrawlContext {
154+
CrawlContext {
155+
cache: HashMap::new(),
156+
previous_address: 0,
157+
previous_value: 0,
158+
}
159+
}
160+
}
161+
162+
// Get a function name and offset given and address in the traced process.
163+
fn get_function_by_address(
164+
process_map: &process_map::ProcessMap,
165+
symbol_index: &symbol_index::SymbolIndex,
166+
address: u64,
167+
) -> (String, u64) {
168+
let mut offset = 0;
169+
let mut name = "".to_string();
170+
171+
if let Some(symbol) = symbol_index.get_function_by_address(address) {
172+
name = symbol.name.clone();
173+
offset = address - symbol.address;
174+
} else {
175+
// If we can't resolve the address to a function, instead use
176+
// the filename from which the instructions are mapped.
177+
if let Some(entry) = process_map.entry_for_address(address) {
178+
if let Some(filename) = &entry.filename {
179+
let path = path::Path::new(filename);
180+
if let Some(basename) = path.file_name() {
181+
if let Some(basename_str) = basename.to_str() {
182+
name = format!("[{}]", basename_str);
183+
offset = address - entry.begin + entry.offset;
184+
}
185+
}
186+
}
187+
}
188+
}
189+
190+
(name, offset)
191+
}
192+
193+
// Collect the stack from the traced process. Assumes we have exclusive
194+
// access to the global CRAWL_CONTEXT.
195+
unsafe fn collect_stack_non_threadsafe(
107196
process_map: &process_map::ProcessMap,
108197
symbol_index: &symbol_index::SymbolIndex,
109198
address_space: &AddressSpace,
110-
upt: &UPTAccessors,
199+
upt: &UPTContext,
111200
) -> Result<Vec<StackEntry>, Box<dyn Error>> {
112201
let mut stack = Vec::<StackEntry>::new();
113202

114-
unsafe {
115-
let mut cursor =
116-
std::mem::MaybeUninit::<libunwind_sys::unw_cursor_t>::zeroed().assume_init();
117-
if libunwind_sys::unw_init_remote(&mut cursor, address_space.handle, upt.handle) != 0 {
118-
Err("failure to initialize libunwind remote address space")?
203+
let mut cursor = std::mem::MaybeUninit::<libunwind_sys::unw_cursor_t>::zeroed().assume_init();
204+
if libunwind_sys::unw_init_remote(&mut cursor, address_space.handle, upt.handle) != 0 {
205+
Err("failure to initialize libunwind remote address space")?
206+
}
207+
208+
loop {
209+
let mut address: libunwind_sys::unw_word_t = 0;
210+
211+
// Get the address for this stack frame.
212+
if libunwind_sys::unw_get_reg(&mut cursor, libunwind_sys::UNW_TDEP_IP as i32, &mut address)
213+
!= 0
214+
{
215+
Err("failure to unwind instruction pointer")?
119216
}
120217

121-
loop {
122-
let mut address: libunwind_sys::unw_word_t = 0;
123-
124-
// Get the address for this stack frame.
125-
if libunwind_sys::unw_get_reg(
126-
&mut cursor,
127-
libunwind_sys::UNW_TDEP_IP as i32,
128-
&mut address,
129-
) != 0
130-
{
131-
Err("failure to unwind instruction pointer")?
132-
}
218+
let (name, offset) = get_function_by_address(process_map, symbol_index, address);
219+
stack.push(StackEntry {
220+
address,
221+
name,
222+
offset,
223+
});
224+
225+
let step_result = libunwind_sys::unw_step(&mut cursor);
226+
if step_result < 0 {
227+
Err("failure to step libunwind stack")?
228+
} else if step_result == 0 {
229+
break;
230+
}
231+
}
232+
233+
Ok(stack)
234+
}
235+
236+
// Collect the current stack from a stopped traced thread using libunwind.
237+
// Given this uses the global CRAWL_CONTEXT, it is only safe if it is called
238+
// by one thread.
239+
pub fn collect_stack(
240+
process_map: &process_map::ProcessMap,
241+
symbol_index: &symbol_index::SymbolIndex,
242+
address_space: &AddressSpace,
243+
upt: &UPTContext,
244+
) -> Result<Vec<StackEntry>, Box<dyn Error>> {
245+
unsafe {
246+
// The assumption is that we only have one thread using CRAWL_CONTEXT.
247+
// If we had multiple threads calling collect_stack, this would not be
248+
// threadsafe.
249+
CRAWL_CONTEXT = Some(CrawlContext::new());
250+
251+
let result = collect_stack_non_threadsafe(process_map, symbol_index, address_space, upt);
252+
253+
CRAWL_CONTEXT = None;
254+
255+
result
256+
}
257+
}
133258

134-
let mut offset: libunwind_sys::unw_word_t = 0;
135-
let mut name: String = "".to_string();
136-
if let Some(symbol) = symbol_index.get_function_by_address(address) {
137-
name = symbol.name.clone();
138-
offset = address - symbol.address;
259+
// Read memory values from the traced process, but use a cache to retreive
260+
// them to speed up access.
261+
unsafe extern "C" fn unwind_access_mem(
262+
address_space: libunwind_sys::unw_addr_space_t,
263+
address: libunwind_sys::unw_word_t,
264+
value: *mut libunwind_sys::unw_word_t,
265+
write: i32,
266+
context: *mut std::ffi::c_void,
267+
) -> i32 {
268+
if write == 0 {
269+
if let Some(crawl) = &mut CRAWL_CONTEXT {
270+
// It turns out that libunwind will repeatedly ask for the same
271+
// memory value, so it is a win to check if we are getting the
272+
// most recently retrieved value.
273+
if address == crawl.previous_address {
274+
*value = crawl.previous_value;
275+
} else if let Some(cache_value) = crawl.cache.get(&address) {
276+
// Otherwise, use the cached value if it is available.
277+
crawl.previous_address = address;
278+
crawl.previous_value = *cache_value;
279+
*value = *cache_value;
139280
} else {
140-
// If we can't resolve the address to a function, instead use
141-
// the filename from which the instructions are mapped.
142-
if let Some(entry) = process_map.entry_for_address(address) {
143-
if let Some(filename) = &entry.filename {
144-
let path = path::Path::new(filename);
145-
if let Some(basename) = path.file_name() {
146-
if let Some(basename_str) = basename.to_str() {
147-
name = format!("[{}]", basename_str);
148-
offset = address - entry.begin + entry.offset;
149-
}
150-
}
151-
}
281+
let mut read_value: u64 = 0;
282+
283+
// The fallback option is to actually use ptrace() to read
284+
// from the traced process's memory.
285+
let result = libunwind_sys::_UPT_access_mem(
286+
address_space,
287+
address,
288+
&mut read_value,
289+
write,
290+
context,
291+
);
292+
if result != 0 {
293+
return result;
152294
}
153-
}
154295

155-
stack.push(StackEntry {
156-
address,
157-
name,
158-
offset,
159-
});
160-
161-
let step_result = libunwind_sys::unw_step(&mut cursor);
162-
if step_result < 0 {
163-
Err("failure to step libunwind stack")?
164-
} else if step_result == 0 {
165-
break;
296+
crawl.cache.insert(address, read_value);
297+
crawl.previous_address = address;
298+
crawl.previous_value = read_value;
299+
*value = read_value;
166300
}
301+
302+
0
303+
} else {
304+
libunwind_sys::_UPT_access_mem(address_space, address, value, write, context)
167305
}
306+
} else {
307+
libunwind_sys::_UPT_access_mem(address_space, address, value, write, context)
168308
}
169-
170-
Ok(stack)
171309
}

allocscope-view/src/trace.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,14 @@ impl Trace {
433433

434434
let scratch_connection = rusqlite::Connection::open(scratch_filename)?;
435435

436+
// These pragmas improve write performance a bit.
437+
scratch_connection.execute_batch(
438+
"PRAGMA journal_mode = OFF;
439+
PRAGMA synchronous = 0;
440+
PRAGMA locking_mode = EXCLUSIVE;
441+
PRAGMA temp_store = MEMORY;",
442+
)?;
443+
436444
scratch_connection.execute(
437445
"CREATE TABLE IF NOT EXISTS allocation_origin (
438446
address INTEGER PRIMARY KEY,

0 commit comments

Comments
 (0)