Skip to content
Merged
20 changes: 18 additions & 2 deletions objdiff-core/src/diff/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,22 @@ pub fn diff_bss_symbol(
))
}

fn symbol_name_matches(left_name: &str, right_name: &str) -> bool {
// Match Metrowerks symbol$1234 against symbol$2345
if let Some((prefix, suffix)) = left_name.split_once('$') {
if !suffix.chars().all(char::is_numeric) {
return false;
}
if let Some((p, s)) = right_name.split_once('$') {
prefix == p && s.chars().all(char::is_numeric)
} else {
false
}
} else {
left_name == right_name
}
}

fn reloc_eq(
left_obj: &Object,
right_obj: &Object,
Expand All @@ -45,8 +61,8 @@ fn reloc_eq(
return false;
}

let symbol_name_addend_matches =
left.symbol.name == right.symbol.name && left.relocation.addend == right.relocation.addend;
let symbol_name_addend_matches = symbol_name_matches(&left.symbol.name, &right.symbol.name)
&& left.relocation.addend == right.relocation.addend;
match (left.symbol.section, right.symbol.section) {
(Some(sl), Some(sr)) => {
// Match if section and name+addend or address match
Expand Down
177 changes: 125 additions & 52 deletions objdiff-core/src/diff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -575,47 +575,60 @@ fn matching_symbols(
&mut matches,
)?;
}
for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
continue;
}
let section_kind = symbol_section_kind(left, symbol);
if section_kind == SectionKind::Unknown {
continue;
}
if left_used.contains(&symbol_idx) {
continue;
}
let symbol_match = SymbolMatch {
left: Some(symbol_idx),
right: find_symbol(right, left, symbol, Some(&right_used)),
prev: find_symbol(prev, left, symbol, None),
section_kind,
};
matches.push(symbol_match);
if let Some(right) = symbol_match.right {
right_used.insert(right);
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
// those are correct first, while the second pass catches near matches.
for fuzzy_literals in [false, true] {
for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
continue;
}
let section_kind = symbol_section_kind(left, symbol);
if section_kind == SectionKind::Unknown {
continue;
}
if left_used.contains(&symbol_idx) {
continue;
}
let symbol_match = SymbolMatch {
left: Some(symbol_idx),
right: find_symbol(right, left, symbol_idx, Some(&right_used), fuzzy_literals),
prev: find_symbol(prev, left, symbol_idx, None, fuzzy_literals),
section_kind,
};
matches.push(symbol_match);
if let Some(right) = symbol_match.right {
left_used.insert(symbol_idx);
right_used.insert(right);
}
}
}
}
if let Some(right) = right {
for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
continue;
}
let section_kind = symbol_section_kind(right, symbol);
if section_kind == SectionKind::Unknown {
continue;
}
if right_used.contains(&symbol_idx) {
continue;
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
// those are correct first, while the second pass catches near matches.
for fuzzy_literals in [false, true] {
for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
continue;
}
let section_kind = symbol_section_kind(right, symbol);
if section_kind == SectionKind::Unknown {
continue;
}
if right_used.contains(&symbol_idx) {
continue;
}
let symbol_match = SymbolMatch {
left: None,
right: Some(symbol_idx),
prev: find_symbol(prev, right, symbol_idx, None, fuzzy_literals),
section_kind,
};
matches.push(symbol_match);
if symbol_match.prev.is_some() {
right_used.insert(symbol_idx);
}
}
matches.push(SymbolMatch {
left: None,
right: Some(symbol_idx),
prev: find_symbol(prev, right, symbol, None),
section_kind,
});
}
}
Ok(matches)
Expand Down Expand Up @@ -645,6 +658,13 @@ fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str
}
}

fn symbol_section_name<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<&'obj str> {
if let Some((name, _kind)) = symbol_section(obj, symbol) {
return Some(name);
}
None
}

fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind {
match symbol.section {
Some(section_index) => obj.sections[section_index].kind,
Expand All @@ -653,35 +673,86 @@ fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind {
}
}

/// Check if a symbol is a compiler-generated literal like @1234.
fn is_symbol_compiler_generated_literal(symbol: &Symbol) -> bool {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious if we'd want other logic for GCC or MSVC

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the vs2022.o object in the tests, I see some literals like __real@3f800000, so I guess MSVC already puts the value of float literals in their symbol name. So ones like these shouldn't need any special logic, as comparing literals by value is what would be done anyway if name was ignored.

Other than float literals I'm not really sure what I'm looking at, but I see some things like:

  • $unwind$?Dot@Vector@@QEAAMPEAU1@@Z
  • _RTC_InitBase.rtc$IMZ
  • $pdata$?DistSq@Vector@@QEAAMPEAU1@@Z

I imagine some of these might need their own logic to pair them up properly, but it's hard to tell with just one object, someone would need both the target and the base to guess what's going on here.

if !symbol.name.starts_with('@') {
return false;
}
if !symbol.name[1..].chars().all(char::is_numeric) {
// Exclude @stringBase0, @GUARD@, etc.
return false;
}
true
}

fn find_symbol(
obj: Option<&Object>,
in_obj: &Object,
in_symbol: &Symbol,
in_symbol_idx: usize,
used: Option<&BTreeSet<usize>>,
fuzzy_literals: bool,
) -> Option<usize> {
let in_symbol = &in_obj.symbols[in_symbol_idx];
let obj = obj?;
let (section_name, section_kind) = symbol_section(in_obj, in_symbol)?;
// Try to find an exact name match
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind
}) {
return Some(symbol_idx);
}

// Match compiler-generated symbols against each other (e.g. @251 -> @60)
// If they are at the same address in the same section
if in_symbol.name.starts_with('@')
// If they are in the same section and have the same value
if is_symbol_compiler_generated_literal(in_symbol)
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
&& let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
{
let mut closest_match_symbol_idx = None;
let mut closest_match_percent = 0.0;
for (symbol_idx, symbol) in unmatched_symbols(obj, used) {
let Some(section_index) = symbol.section else {
return false;
continue;
};
symbol.name.starts_with('@')
&& symbol.address == in_symbol.address
&& obj.sections[section_index].name == section_name
})
{
if obj.sections[section_index].name != section_name {
continue;
}
if !is_symbol_compiler_generated_literal(symbol) {
continue;
}
match section_kind {
SectionKind::Data => {
// For data, pick the first symbol with exactly matching bytes and relocations.
// If no symbols match exactly, and `fuzzy_literals` is true, pick the closest
// plausible match instead.
if let Ok((left_diff, _right_diff)) =
diff_data_symbol(in_obj, obj, in_symbol_idx, symbol_idx)
&& let Some(match_percent) = left_diff.match_percent
&& (match_percent == 100.0
|| (fuzzy_literals
&& match_percent >= 50.0
&& match_percent > closest_match_percent))
{
closest_match_symbol_idx = Some(symbol_idx);
closest_match_percent = match_percent;
if match_percent == 100.0 {
break;
}
}
}
SectionKind::Bss => {
// For BSS, pick the first symbol that has the exact matching size.
if in_symbol.size == symbol.size {
closest_match_symbol_idx = Some(symbol_idx);
break;
}
}
_ => unreachable!(),
}
}
return closest_match_symbol_idx;
}

// Try to find an exact name match
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind
}) {
return Some(symbol_idx);
}

// Match Metrowerks symbol$1234 against symbol$2345
if let Some((prefix, suffix)) = in_symbol.name.split_once('$') {
if !suffix.chars().all(char::is_numeric) {
Expand All @@ -692,13 +763,15 @@ fn find_symbol(
prefix == p
&& s.chars().all(char::is_numeric)
&& symbol_section_kind(obj, symbol) == section_kind
&& symbol_section_name(obj, symbol) == Some(section_name)
} else {
false
}
}) {
return Some(symbol_idx);
}
}

None
}

Expand Down
Loading