Skip to content

Commit 29bf331

Browse files
committed
Improve DWARF local variable recovery
1 parent f3e2a87 commit 29bf331

File tree

2 files changed

+87
-55
lines changed

2 files changed

+87
-55
lines changed

plugins/dwarf/dwarf_import/src/dwarfdebuginfo.rs

Lines changed: 73 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
use crate::{
16+
functions::FrameBase,
1617
helpers::{get_uid, resolve_specification, DieReference},
1718
ReaderType,
1819
};
@@ -52,7 +53,7 @@ pub(crate) struct FunctionInfoBuilder {
5253
pub(crate) platform: Option<Ref<Platform>>,
5354
pub(crate) variable_arguments: bool,
5455
pub(crate) stack_variables: Vec<NamedVariableWithType>,
55-
pub(crate) use_cfa: bool, //TODO actually store more info about the frame base
56+
pub(crate) frame_base: Option<FrameBase>,
5657
}
5758

5859
impl FunctionInfoBuilder {
@@ -63,6 +64,7 @@ impl FunctionInfoBuilder {
6364
return_type: Option<TypeUID>,
6465
address: Option<u64>,
6566
parameters: &Vec<Option<(String, TypeUID)>>,
67+
frame_base: Option<FrameBase>,
6668
) {
6769
if full_name.is_some() {
6870
self.full_name = full_name;
@@ -91,6 +93,10 @@ impl FunctionInfoBuilder {
9193
_ => self.parameters.push(new_parameter.clone()),
9294
}
9395
}
96+
97+
if frame_base.is_some() {
98+
self.frame_base = frame_base;
99+
}
94100
}
95101
}
96102

@@ -229,7 +235,7 @@ impl DebugInfoBuilder {
229235
address: Option<u64>,
230236
parameters: &Vec<Option<(String, TypeUID)>>,
231237
variable_arguments: bool,
232-
use_cfa: bool,
238+
frame_base: Option<FrameBase>,
233239
) -> Option<usize> {
234240
// Returns the index of the function
235241
// Raw names should be the primary key, but if they don't exist, use the full name
@@ -254,7 +260,14 @@ impl DebugInfoBuilder {
254260
.remove(function.full_name.as_ref().unwrap());
255261
}
256262

257-
function.update(full_name, raw_name, return_type, address, parameters);
263+
function.update(
264+
full_name,
265+
raw_name,
266+
return_type,
267+
address,
268+
parameters,
269+
frame_base,
270+
);
258271

259272
if function.full_name.is_some() {
260273
self.full_function_name_indices
@@ -276,7 +289,14 @@ impl DebugInfoBuilder {
276289
.remove(function.raw_name.as_ref().unwrap());
277290
}
278291

279-
function.update(full_name, raw_name, return_type, address, parameters);
292+
function.update(
293+
full_name,
294+
raw_name,
295+
return_type,
296+
address,
297+
parameters,
298+
frame_base,
299+
);
280300

281301
if function.raw_name.is_some() {
282302
self.raw_function_name_indices
@@ -299,7 +319,7 @@ impl DebugInfoBuilder {
299319
platform: None,
300320
variable_arguments,
301321
stack_variables: vec![],
302-
use_cfa,
322+
frame_base,
303323
};
304324

305325
if let Some(n) = &function.full_name {
@@ -367,7 +387,7 @@ impl DebugInfoBuilder {
367387
offset: i64,
368388
name: Option<String>,
369389
type_uid: Option<TypeUID>,
370-
lexical_block: Option<&iset::IntervalSet<u64>>,
390+
_lexical_block: Option<&iset::IntervalSet<u64>>,
371391
) {
372392
let name = match name {
373393
Some(x) => {
@@ -377,11 +397,11 @@ impl DebugInfoBuilder {
377397
} else {
378398
x
379399
}
380-
}
400+
},
381401
None => {
382402
// Anonymous variable, generate name
383403
format!("debug_var_{}", offset)
384-
}
404+
},
385405
};
386406

387407
let Some(function_index) = fn_idx else {
@@ -407,50 +427,56 @@ impl DebugInfoBuilder {
407427
return;
408428
};
409429

410-
let Some(adjustment_at_variable_lifetime_start) = lexical_block
411-
.and_then(|block_ranges| {
412-
block_ranges
413-
.unsorted_iter()
414-
.find_map(|x| self.range_data_offsets.values_overlap(x.start).next().cloned())
415-
})
416-
.or_else(|| {
417-
// Try using the offset at the adjustment 5 bytes after the function start, in case the function starts with a stack adjustment
418-
// Calculate final offset as (offset after initial stack adjustment) - (entry offset)
419-
// TODO: This is a decent heuristic but not perfect, since further adjustments could still be made
420-
if let Some(offset_after_stack_adjust) = self.range_data_offsets.values_overlap(func_addr + 5).next() {
421-
if let Some(entry_offset) = self.range_data_offsets.values_overlap(func_addr).next() {
422-
return Some(offset_after_stack_adjust - entry_offset);
423-
}
424-
}
425-
// No entry offset or no initial stack adjustment, fall through
426-
None
427-
})
428-
.or_else(|| {
429-
// If all else fails, use the function start address
430-
self.range_data_offsets.values_overlap(func_addr).next().cloned()
430+
let Some(frame_base) = &function.frame_base else {
431+
error!("Trying to add a local variable ({}) to a function ({:#x}) without a frame base. Please report this issue.", name, func_addr);
432+
return;
433+
};
434+
435+
// TODO: use lexical block information when we support stack variable lifetimes
436+
/*
437+
let lexical_block_adjustment = lexical_block.and_then(|block_ranges| {
438+
block_ranges.unsorted_iter().find_map(|x| {
439+
self.range_data_offsets
440+
.values_overlap(x.start)
441+
.next()
442+
.cloned()
431443
})
444+
});
445+
*/
446+
447+
let Some(entry_cfa_offset) = self
448+
.range_data_offsets
449+
.values_overlap(func_addr)
450+
.next()
451+
.cloned()
432452
else {
433453
// Unknown why, but this is happening with MachO + external dSYM
434-
debug!("Refusing to add a local variable ({}@{}) to function at {} without a known CIE offset.", name, offset, func_addr);
454+
debug!("Refusing to add a local variable ({}@{}) to function at {} without a known CFA adjustment.", name, offset, func_addr);
435455
return;
436456
};
437457

438458
// TODO: handle non-sp-based locations
439-
// TODO: if not in a lexical block these can be wrong, see https://github.com/Vector35/binaryninja-api/issues/5882#issuecomment-2406065057
440-
let adjusted_offset = if function.use_cfa {
459+
let adjusted_offset = match frame_base {
441460
// Apply CFA offset to variable storage offset if DW_AT_frame_base is DW_OP_call_frame_cfa
442-
offset + adjustment_at_variable_lifetime_start
443-
} else {
444-
// If it's using SP, we know the SP offset is <SP offset> + (<entry SP CFA offset> - <SP CFA offset>)
445-
let Some(adjustment_at_entry) =
446-
self.range_data_offsets.values_overlap(func_addr).next()
447-
else {
448-
// Unknown why, but this is happening with MachO + external dSYM
449-
debug!("Refusing to add a local variable ({}@{}) to function at {} without a known CIE offset for function start.", name, offset, func_addr);
450-
return;
451-
};
461+
FrameBase::CFA => offset + entry_cfa_offset,
462+
FrameBase::Register(_reg) => {
463+
// TODO: if not using SP, do not add var
464+
// TODO: if not in a lexical block this can be wrong, see https://github.com/Vector35/binaryninja-api/issues/5882#issuecomment-2406065057
465+
// If it's using SP, we know the SP offset is <SP offset> + (<entry SP CFA offset> - <SP CFA offset>)
452466

453-
offset + (adjustment_at_entry - adjustment_at_variable_lifetime_start)
467+
// Try using the offset at the adjustment 5 bytes after the function start, in case the function starts with a stack adjustment
468+
// Calculate final offset as (offset after initial stack adjustment) - (entry offset)
469+
// TODO: This is a decent heuristic but not perfect, since further adjustments could still be made
470+
let guessed_sp_adjustment = self
471+
.range_data_offsets
472+
.values_overlap(func_addr + 5)
473+
.next()
474+
.and_then(|cfa_offset_after_stack_adjust| {
475+
Some(entry_cfa_offset - cfa_offset_after_stack_adjust)
476+
});
477+
478+
offset + guessed_sp_adjustment.unwrap_or(0)
479+
},
454480
};
455481

456482
if adjusted_offset > 0 {
@@ -563,7 +589,7 @@ impl DebugInfoBuilder {
563589
let return_type = match function.return_type {
564590
Some(return_type_id) => {
565591
Conf::new(self.get_type(return_type_id).unwrap().ty.clone(), 128)
566-
}
592+
},
567593
_ => Conf::new(Type::void(), 0),
568594
};
569595

@@ -643,11 +669,11 @@ impl DebugInfoBuilder {
643669
match existing_functions.len().cmp(&1) {
644670
Ordering::Greater => {
645671
warn!("Multiple existing functions at address {address:08x}. One or more functions at this address may have the wrong platform information. Please report this binary.");
646-
}
672+
},
647673
Ordering::Equal => {
648674
func.platform = Some(existing_functions.get(0).platform())
649-
}
650-
Ordering::Less => {}
675+
},
676+
Ordering::Less => {},
651677
}
652678
}
653679
}

plugins/dwarf/dwarf_import/src/functions.rs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ use gimli::{constants, AttributeValue, DebuggingInformationEntry, Dwarf, Operati
2424
use log::{debug, error};
2525
use regex::Regex;
2626

27+
#[derive(PartialEq, Eq, Hash)]
28+
pub enum FrameBase {
29+
Register(gimli::Register),
30+
CFA,
31+
}
32+
2733
fn get_parameters<R: ReaderType>(
2834
dwarf: &Dwarf<R>,
2935
unit: &Unit<R>,
@@ -65,7 +71,7 @@ fn get_parameters<R: ReaderType>(
6571
} else {
6672
result.push(None)
6773
}
68-
}
74+
},
6975
constants::DW_TAG_unspecified_parameters => variable_arguments = true,
7076
_ => (),
7177
}
@@ -136,17 +142,17 @@ pub(crate) fn parse_function_entry<R: ReaderType>(
136142
return None;
137143
}
138144

139-
let use_cfa;
145+
let frame_base;
140146
if let Ok(Some(AttributeValue::Exprloc(mut expression))) =
141147
entry.attr_value(constants::DW_AT_frame_base)
142148
{
143-
use_cfa = match Operation::parse(&mut expression.0, unit.encoding()) {
144-
Ok(Operation::Register { register: _ }) => false, // TODO: handle register-relative encodings later
145-
Ok(Operation::CallFrameCFA) => true,
146-
_ => false,
149+
frame_base = match Operation::parse(&mut expression.0, unit.encoding()) {
150+
Ok(Operation::Register { register: reg }) => Some(FrameBase::Register(reg)),
151+
Ok(Operation::CallFrameCFA) => Some(FrameBase::CFA),
152+
_ => None, // TODO: warn?
147153
};
148154
} else {
149-
use_cfa = false;
155+
frame_base = None;
150156
}
151157

152158
debug_info_builder.insert_function(
@@ -156,7 +162,7 @@ pub(crate) fn parse_function_entry<R: ReaderType>(
156162
address,
157163
&parameters,
158164
variable_arguments,
159-
use_cfa,
165+
frame_base,
160166
)
161167
}
162168

0 commit comments

Comments
 (0)