Skip to content

Commit 28dd341

Browse files
committed
Use a persistent symbol index to resolve function names
Since we are loading symbols for the object file mapped into the process we are tracing anyway, it is more efficient if we map the stack traces to function names ourselves, rather than having libunwind do it. This speeds up tracing by about 15%.
1 parent 1c75e0a commit 28dd341

File tree

6 files changed

+138
-63
lines changed

6 files changed

+138
-63
lines changed

allocscope-trace/src/breakpoint.rs

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
*/
1818

1919
use crate::context;
20-
use crate::process_map;
2120
use crate::ptrace;
2221
use crate::symbol_index;
2322
use crate::trace;
@@ -233,25 +232,25 @@ impl BreakpointSet {
233232

234233
// Resolve all loosely bound breakpoint using the current process map of
235234
// the traced process.
236-
pub fn resolve_breakpoints(&mut self, pid: u32) -> Result<(), Box<dyn Error>> {
237-
let process_map = process_map::ProcessMap::new(pid)?;
238-
let mut symbol_index = symbol_index::SymbolIndex::new();
239-
symbol_index.add_symbols(&process_map);
240-
235+
pub fn resolve_breakpoints(
236+
&mut self,
237+
pid: u32,
238+
symbol_index: &symbol_index::SymbolIndex,
239+
) -> Result<(), Box<dyn Error>> {
241240
for binding in self.bindings.iter() {
242-
match symbol_index.symbols.get(&binding.function_name) {
243-
Some(entry) => {
241+
match symbol_index.symbols_by_name.get(&binding.function_name) {
242+
Some(entry_vec) => {
244243
// For each address of the function, set a breakpoint.
245244
// Multiple addresses might be necessary, because there
246245
// might be multiple linked copies of a function with the
247246
// same name. (Consider multiple linked copies of libc
248247
// in the same process.)
249-
for address in &entry.addresses {
250-
if !self.breakpoints.contains_key(address) {
248+
for entry in entry_vec {
249+
if !self.breakpoints.contains_key(&entry.address) {
251250
add_breakpoint(
252251
&mut self.breakpoints,
253252
pid,
254-
*address,
253+
entry.address,
255254
binding.callback,
256255
true,
257256
)?;

allocscope-trace/src/context.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
use crate::breakpoint;
2020
use crate::process_map;
2121
use crate::record;
22+
use crate::symbol_index;
2223
use crate::unwind;
2324
use std::collections::HashMap;
2425
use std::error::Error;
@@ -47,6 +48,10 @@ pub struct TraceContext<'trace_lifetime> {
4748
// address space.
4849
pub process_map: process_map::ProcessMap,
4950

51+
// Bookkeeping for the symbols from the binaries mmap-ed into the
52+
// process's address space.
53+
pub symbol_index: symbol_index::SymbolIndex,
54+
5055
// Address space structure used by libunwind.
5156
pub unwind_address_space: unwind::AddressSpace,
5257

@@ -66,6 +71,7 @@ impl<'trace_lifetime> TraceContext<'trace_lifetime> {
6671
breakpoint_set,
6772
transaction,
6873
process_map: process_map::ProcessMap::new(pid)?,
74+
symbol_index: symbol_index::SymbolIndex::new(),
6975
unwind_address_space: unwind::AddressSpace::new_upt()?,
7076
thread_context: HashMap::new(),
7177
})
@@ -103,4 +109,17 @@ impl<'trace_lifetime> TraceContext<'trace_lifetime> {
103109
.get(&pid)
104110
.ok_or("missing thread context".into())
105111
}
112+
113+
// The memory map of the process we are tracing has changed, so update
114+
// the process map with all current memory mappings and reindex the
115+
// the symbols of the process as new code may have been mapped in.
116+
pub fn update_process_map(&mut self, pid: u32) -> Result<(), Box<dyn Error>> {
117+
self.process_map = process_map::ProcessMap::new(pid)?;
118+
self.symbol_index = symbol_index::SymbolIndex::new();
119+
self.symbol_index.add_symbols(&self.process_map);
120+
self.breakpoint_set
121+
.resolve_breakpoints(pid, &self.symbol_index)?;
122+
123+
Ok(())
124+
}
106125
}

allocscope-trace/src/hooks.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
use crate::breakpoint;
2020
use crate::context;
21-
use crate::process_map;
2221
use crate::ptrace;
2322
use crate::record::EventType;
2423
use crate::unwind;
@@ -35,6 +34,7 @@ fn collect_stack(
3534

3635
unwind::collect_stack(
3736
&context.process_map,
37+
&context.symbol_index,
3838
&context.unwind_address_space,
3939
&thread_context.unwind_accessors,
4040
)
@@ -48,8 +48,7 @@ fn on_mmap(
4848
complete: bool,
4949
) -> Result<(), Box<dyn Error>> {
5050
if complete {
51-
context.process_map = process_map::ProcessMap::new(pid)?;
52-
context.breakpoint_set.resolve_breakpoints(pid)?;
51+
context.update_process_map(pid)?;
5352
}
5453

5554
Ok(())

allocscope-trace/src/symbol_index.rs

Lines changed: 96 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,71 +17,115 @@
1717
*/
1818

1919
use crate::process_map;
20-
use object::{Object, ObjectSymbol};
21-
use std::collections::HashMap;
20+
use object::{Object, ObjectSegment, ObjectSymbol};
21+
use std::collections::{BTreeMap, HashMap};
2222

23-
// A list of addresses to which a particular symbol name resolves.
24-
#[derive(Debug)]
25-
pub struct SymbolEntry {
26-
// The addresses.
27-
pub addresses: Vec<u64>,
23+
// A reference to a function mapped into the traced process.
24+
#[derive(Debug, Clone)]
25+
pub struct SymbolInfo {
26+
// The name of the function.
27+
pub name: String,
28+
29+
// The address in the traced process's address space.
30+
pub address: u64,
31+
32+
// The length of the function in bytes.
33+
pub size: u64,
2834
}
2935

3036
// An index of symbol names and addresses to which those symbols resolve.
3137
#[derive(Debug)]
3238
pub struct SymbolIndex {
33-
// The map from symbol name to list of addresses.
34-
pub symbols: HashMap<String, SymbolEntry>,
39+
// The map from symbol name to information about the symbol.
40+
pub symbols_by_name: HashMap<String, Vec<SymbolInfo>>,
41+
42+
// A map from address to symbol info.
43+
pub symbols_by_address: BTreeMap<u64, SymbolInfo>,
3544
}
3645

3746
impl SymbolIndex {
3847
// Start a new empty symbol index.
3948
pub fn new() -> SymbolIndex {
40-
let symbols = HashMap::new();
41-
SymbolIndex { symbols }
49+
SymbolIndex {
50+
symbols_by_name: HashMap::new(),
51+
symbols_by_address: BTreeMap::new(),
52+
}
4253
}
4354

4455
// Check whether a particular symbol falls within the address range
4556
// mapped by a ProcessMapEntry, and if so, then store the relevant
4657
// address in the symbol map.
47-
fn add_symbol(&mut self, entry: &process_map::ProcessMapEntry, symbol: &object::Symbol) {
58+
fn add_symbol(
59+
&mut self,
60+
entry: &process_map::ProcessMapEntry,
61+
address_offset: i64,
62+
symbol: &object::Symbol,
63+
) {
4864
match symbol.name() {
4965
Ok(name) => {
50-
if symbol.address() >= entry.offset
51-
&& symbol.address() < entry.offset + (entry.end - entry.begin)
66+
let sym_address = (symbol.address() as i64 - address_offset) as u64;
67+
let size = symbol.size();
68+
69+
if sym_address >= entry.offset
70+
&& sym_address < entry.offset + (entry.end - entry.begin)
5271
{
53-
let address = entry.begin + symbol.address() - entry.offset;
54-
if !self.symbols.contains_key(name) {
55-
let addresses = Vec::new();
56-
self.symbols
57-
.insert(name.to_owned(), SymbolEntry { addresses });
72+
let address = entry.begin + sym_address - entry.offset;
73+
let symbol_info = SymbolInfo {
74+
name: name.to_owned(),
75+
address,
76+
size,
77+
};
78+
79+
if !self.symbols_by_name.contains_key(name) {
80+
self.symbols_by_name.insert(name.to_owned(), Vec::new());
5881
}
59-
let entry = self.symbols.get_mut(name).unwrap();
60-
entry.addresses.push(address);
82+
let entry = self.symbols_by_name.get_mut(name).unwrap();
83+
entry.push(symbol_info.clone());
84+
85+
self.symbols_by_address.insert(address, symbol_info);
6186
}
6287
}
6388
Err(_) => (),
6489
}
6590
}
6691

92+
// Add symbols from a parsed object file to the symbol index.
93+
fn add_elf_symbols(&mut self, entry: &process_map::ProcessMapEntry, elf: &object::File) {
94+
let mut address_offset: Option<i64> = None;
95+
96+
for segment in elf.segments() {
97+
let range = segment.file_range();
98+
99+
if range.0 == entry.offset {
100+
address_offset = Some((segment.address() - range.0) as i64);
101+
}
102+
}
103+
104+
if address_offset == None {
105+
return;
106+
}
107+
108+
// Iterate through all symbols in the binary, adding
109+
// them to the symbol map if they are in the mmap
110+
// range.
111+
for symbol in elf.symbols() {
112+
self.add_symbol(entry, address_offset.unwrap(), &symbol);
113+
}
114+
115+
// Similarly, but for dynamic symbols.
116+
for symbol in elf.dynamic_symbols() {
117+
self.add_symbol(entry, address_offset.unwrap(), &symbol);
118+
}
119+
}
120+
67121
// Add all the symbols for a particluar mmaped range of an executable
68122
// which has been mapped into a traced process.
69123
pub fn add_entry_symbols(&mut self, entry: &process_map::ProcessMapEntry) {
70124
match &entry.filename {
71125
Some(filename) => match std::fs::read(filename.clone()) {
72126
Ok(elf_data) => match object::File::parse(&*elf_data) {
73127
Ok(elf) => {
74-
// Iterate through all symbols in the binary, adding
75-
// them to the symbol map if they are in the mmap
76-
// range.
77-
for symbol in elf.symbols() {
78-
self.add_symbol(entry, &symbol);
79-
}
80-
81-
// Similarly, but for dynamic symbols.
82-
for symbol in elf.dynamic_symbols() {
83-
self.add_symbol(entry, &symbol);
84-
}
128+
self.add_elf_symbols(entry, &elf);
85129
}
86130
Err(_) => (),
87131
},
@@ -98,4 +142,24 @@ impl SymbolIndex {
98142
self.add_entry_symbols(&entry);
99143
}
100144
}
145+
146+
// Get function name by address. We'll try a few symbols which start
147+
// proir to the address we are checking, as glibc likes to leave GLIBC
148+
// symbols near the function name.
149+
pub fn get_function_by_address(&self, address: u64) -> Option<SymbolInfo> {
150+
let mut tries = 0;
151+
let mut symbols_by_range = self.symbols_by_address.range(..address + 1);
152+
while let Some((_, info)) = symbols_by_range.next_back() {
153+
if address - info.address <= info.size {
154+
return Some(info.clone());
155+
}
156+
157+
tries += 1;
158+
if tries >= 4 {
159+
break;
160+
}
161+
}
162+
163+
return None;
164+
}
101165
}

allocscope-trace/src/trace.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -208,14 +208,14 @@ fn detach_from_tracee(context: &mut context::TraceContext) -> Result<(), Box<dyn
208208
fn trace_attached_pid(record: record::TraceRecord, pid: u32) -> Result<(), Box<dyn Error>> {
209209
let mut breakpoint_set = breakpoint::BreakpointSet::new();
210210
hooks::add_hooks(&mut breakpoint_set)?;
211-
breakpoint_set.resolve_breakpoints(pid)?;
212-
ptrace::setoptions(pid, libc::PTRACE_O_TRACECLONE)?;
213-
214-
// Now that we have set breakpoints, resume execution.
215-
ptrace::syscall(pid, 0)?;
216211

217212
let transaction = record::Transaction::new(&record)?;
218213
let mut context = context::TraceContext::new(pid, breakpoint_set, transaction)?;
214+
context.update_process_map(pid)?;
215+
216+
// Now that we have set breakpoints, resume execution.
217+
ptrace::setoptions(pid, libc::PTRACE_O_TRACECLONE)?;
218+
ptrace::syscall(pid, 0)?;
219219

220220
ptrace::block_term_signals()?;
221221
match trace_loop(&mut context, pid) {

allocscope-trace/src/unwind.rs

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818

1919
use crate::process_map;
20+
use crate::symbol_index;
2021
use libunwind_sys;
2122
use std::error::Error;
2223
use std::path;
@@ -104,6 +105,7 @@ impl Drop for UPTAccessors {
104105
// Collect the current stack from a stopped traced thread using libunwind.
105106
pub fn collect_stack(
106107
process_map: &process_map::ProcessMap,
108+
symbol_index: &symbol_index::SymbolIndex,
107109
address_space: &AddressSpace,
108110
upt: &UPTAccessors,
109111
) -> Result<Vec<StackEntry>, Box<dyn Error>> {
@@ -130,18 +132,10 @@ pub fn collect_stack(
130132
}
131133

132134
let mut offset: libunwind_sys::unw_word_t = 0;
133-
let mut name_vec: Vec<libc::c_char> = vec![0; 1024];
134135
let mut name: String = "".to_string();
135-
if libunwind_sys::unw_get_proc_name(
136-
&mut cursor,
137-
name_vec.as_mut_ptr(),
138-
1024,
139-
&mut offset,
140-
) == 0
141-
{
142-
name = std::ffi::CStr::from_ptr(name_vec.as_mut_ptr())
143-
.to_string_lossy()
144-
.into_owned();
136+
if let Some(symbol) = symbol_index.get_function_by_address(address) {
137+
name = symbol.name.clone();
138+
offset = address - symbol.address;
145139
} else {
146140
// If we can't resolve the address to a function, instead use
147141
// the filename from which the instructions are mapped.
@@ -151,7 +145,7 @@ pub fn collect_stack(
151145
if let Some(basename) = path.file_name() {
152146
if let Some(basename_str) = basename.to_str() {
153147
name = format!("[{}]", basename_str);
154-
offset = address - entry.begin;
148+
offset = address - entry.begin + entry.offset;
155149
}
156150
}
157151
}

0 commit comments

Comments
 (0)