Skip to content

Commit f2a5913

Browse files
authored
Improve automatic symbol pairing for nameless literals (#247)
* Improve automatic symbol pairing for nameless literals * Fix data reloc diffing when the reloc points to an in-function static symbol * Only pair up literals that match perfectly * Clippy * Do two separate passes when pairing up literals * Fix partially-matching literal pairups not working right * Remove duplicate $ splitting code * Implement $ splitting for section names too * Minor cleanup
1 parent 8d24ec6 commit f2a5913

File tree

2 files changed

+143
-71
lines changed

2 files changed

+143
-71
lines changed

objdiff-core/src/diff/data.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,20 @@ pub fn diff_bss_symbol(
3535
))
3636
}
3737

38+
pub fn symbol_name_matches(left_name: &str, right_name: &str) -> bool {
39+
// Match Metrowerks symbol$1234 against symbol$2345
40+
if let Some((prefix, suffix)) = left_name.split_once('$') {
41+
if !suffix.chars().all(char::is_numeric) {
42+
return false;
43+
}
44+
right_name
45+
.split_once('$')
46+
.is_some_and(|(p, s)| p == prefix && s.chars().all(char::is_numeric))
47+
} else {
48+
left_name == right_name
49+
}
50+
}
51+
3852
fn reloc_eq(
3953
left_obj: &Object,
4054
right_obj: &Object,
@@ -45,8 +59,8 @@ fn reloc_eq(
4559
return false;
4660
}
4761

48-
let symbol_name_addend_matches =
49-
left.symbol.name == right.symbol.name && left.relocation.addend == right.relocation.addend;
62+
let symbol_name_addend_matches = symbol_name_matches(&left.symbol.name, &right.symbol.name)
63+
&& left.relocation.addend == right.relocation.addend;
5064
match (left.symbol.section, right.symbol.section) {
5165
(Some(sl), Some(sr)) => {
5266
// Match if section and name+addend or address match

objdiff-core/src/diff/mod.rs

Lines changed: 127 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::{
1313
code::{diff_code, no_diff_code},
1414
data::{
1515
diff_bss_section, diff_bss_symbol, diff_data_section, diff_data_symbol,
16-
diff_generic_section, no_diff_bss_section, no_diff_data_section,
16+
diff_generic_section, no_diff_bss_section, no_diff_data_section, symbol_name_matches,
1717
},
1818
},
1919
obj::{InstructionRef, Object, Relocation, SectionKind, Symbol, SymbolFlag},
@@ -575,47 +575,60 @@ fn matching_symbols(
575575
&mut matches,
576576
)?;
577577
}
578-
for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
579-
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
580-
continue;
581-
}
582-
let section_kind = symbol_section_kind(left, symbol);
583-
if section_kind == SectionKind::Unknown {
584-
continue;
585-
}
586-
if left_used.contains(&symbol_idx) {
587-
continue;
588-
}
589-
let symbol_match = SymbolMatch {
590-
left: Some(symbol_idx),
591-
right: find_symbol(right, left, symbol, Some(&right_used)),
592-
prev: find_symbol(prev, left, symbol, None),
593-
section_kind,
594-
};
595-
matches.push(symbol_match);
596-
if let Some(right) = symbol_match.right {
597-
right_used.insert(right);
578+
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
579+
// those are correct first, while the second pass catches near matches.
580+
for fuzzy_literals in [false, true] {
581+
for (symbol_idx, symbol) in left.symbols.iter().enumerate() {
582+
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
583+
continue;
584+
}
585+
let section_kind = symbol_section_kind(left, symbol);
586+
if section_kind == SectionKind::Unknown {
587+
continue;
588+
}
589+
if left_used.contains(&symbol_idx) {
590+
continue;
591+
}
592+
let symbol_match = SymbolMatch {
593+
left: Some(symbol_idx),
594+
right: find_symbol(right, left, symbol_idx, Some(&right_used), fuzzy_literals),
595+
prev: find_symbol(prev, left, symbol_idx, None, fuzzy_literals),
596+
section_kind,
597+
};
598+
matches.push(symbol_match);
599+
if let Some(right) = symbol_match.right {
600+
left_used.insert(symbol_idx);
601+
right_used.insert(right);
602+
}
598603
}
599604
}
600605
}
601606
if let Some(right) = right {
602-
for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
603-
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
604-
continue;
605-
}
606-
let section_kind = symbol_section_kind(right, symbol);
607-
if section_kind == SectionKind::Unknown {
608-
continue;
609-
}
610-
if right_used.contains(&symbol_idx) {
611-
continue;
607+
// Do two passes for nameless literals. The first only pairs up perfect matches to ensure
608+
// those are correct first, while the second pass catches near matches.
609+
for fuzzy_literals in [false, true] {
610+
for (symbol_idx, symbol) in right.symbols.iter().enumerate() {
611+
if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) {
612+
continue;
613+
}
614+
let section_kind = symbol_section_kind(right, symbol);
615+
if section_kind == SectionKind::Unknown {
616+
continue;
617+
}
618+
if right_used.contains(&symbol_idx) {
619+
continue;
620+
}
621+
let symbol_match = SymbolMatch {
622+
left: None,
623+
right: Some(symbol_idx),
624+
prev: find_symbol(prev, right, symbol_idx, None, fuzzy_literals),
625+
section_kind,
626+
};
627+
matches.push(symbol_match);
628+
if symbol_match.prev.is_some() {
629+
right_used.insert(symbol_idx);
630+
}
612631
}
613-
matches.push(SymbolMatch {
614-
left: None,
615-
right: Some(symbol_idx),
616-
prev: find_symbol(prev, right, symbol, None),
617-
section_kind,
618-
});
619632
}
620633
}
621634
Ok(matches)
@@ -637,7 +650,10 @@ where
637650

638651
fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str, SectionKind)> {
639652
if let Some(section) = symbol.section.and_then(|section_idx| obj.sections.get(section_idx)) {
640-
Some((section.name.as_str(), section.kind))
653+
// Match x86 .rdata$r against .rdata$rs
654+
let section_name =
655+
section.name.split_once('$').map_or(section.name.as_str(), |(prefix, _)| prefix);
656+
Some((section_name, section.kind))
641657
} else if symbol.flags.contains(SymbolFlag::Common) {
642658
Some((".comm", SectionKind::Common))
643659
} else {
@@ -653,52 +669,94 @@ fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind {
653669
}
654670
}
655671

672+
/// Check if a symbol is a compiler-generated literal like @1234.
673+
fn is_symbol_compiler_generated_literal(symbol: &Symbol) -> bool {
674+
if !symbol.name.starts_with('@') {
675+
return false;
676+
}
677+
if !symbol.name[1..].chars().all(char::is_numeric) {
678+
// Exclude @stringBase0, @GUARD@, etc.
679+
return false;
680+
}
681+
true
682+
}
683+
656684
fn find_symbol(
657685
obj: Option<&Object>,
658686
in_obj: &Object,
659-
in_symbol: &Symbol,
687+
in_symbol_idx: usize,
660688
used: Option<&BTreeSet<usize>>,
689+
fuzzy_literals: bool,
661690
) -> Option<usize> {
691+
let in_symbol = &in_obj.symbols[in_symbol_idx];
662692
let obj = obj?;
663693
let (section_name, section_kind) = symbol_section(in_obj, in_symbol)?;
694+
695+
// Match compiler-generated symbols against each other (e.g. @251 -> @60)
696+
// If they are in the same section and have the same value
697+
if is_symbol_compiler_generated_literal(in_symbol)
698+
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
699+
{
700+
let mut closest_match_symbol_idx = None;
701+
let mut closest_match_percent = 0.0;
702+
for (symbol_idx, symbol) in unmatched_symbols(obj, used) {
703+
let Some(section_index) = symbol.section else {
704+
continue;
705+
};
706+
if obj.sections[section_index].name != section_name {
707+
continue;
708+
}
709+
if !is_symbol_compiler_generated_literal(symbol) {
710+
continue;
711+
}
712+
match section_kind {
713+
SectionKind::Data => {
714+
// For data, pick the first symbol with exactly matching bytes and relocations.
715+
// If no symbols match exactly, and `fuzzy_literals` is true, pick the closest
716+
// plausible match instead.
717+
if let Ok((left_diff, _right_diff)) =
718+
diff_data_symbol(in_obj, obj, in_symbol_idx, symbol_idx)
719+
&& let Some(match_percent) = left_diff.match_percent
720+
&& (match_percent == 100.0
721+
|| (fuzzy_literals
722+
&& match_percent >= 50.0
723+
&& match_percent > closest_match_percent))
724+
{
725+
closest_match_symbol_idx = Some(symbol_idx);
726+
closest_match_percent = match_percent;
727+
if match_percent == 100.0 {
728+
break;
729+
}
730+
}
731+
}
732+
SectionKind::Bss => {
733+
// For BSS, pick the first symbol that has the exact matching size.
734+
if in_symbol.size == symbol.size {
735+
closest_match_symbol_idx = Some(symbol_idx);
736+
break;
737+
}
738+
}
739+
_ => unreachable!(),
740+
}
741+
}
742+
return closest_match_symbol_idx;
743+
}
744+
664745
// Try to find an exact name match
665746
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
666747
symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind
667748
}) {
668749
return Some(symbol_idx);
669750
}
670-
// Match compiler-generated symbols against each other (e.g. @251 -> @60)
671-
// If they are at the same address in the same section
672-
if in_symbol.name.starts_with('@')
673-
&& matches!(section_kind, SectionKind::Data | SectionKind::Bss)
674-
&& let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| {
675-
let Some(section_index) = symbol.section else {
676-
return false;
677-
};
678-
symbol.name.starts_with('@')
679-
&& symbol.address == in_symbol.address
680-
&& obj.sections[section_index].name == section_name
681-
})
682-
{
751+
752+
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| {
753+
symbol_name_matches(&in_symbol.name, &symbol.name)
754+
&& symbol_section_kind(obj, symbol) == section_kind
755+
&& symbol_section(obj, symbol).is_some_and(|(name, _)| name == section_name)
756+
}) {
683757
return Some(symbol_idx);
684758
}
685-
// Match Metrowerks symbol$1234 against symbol$2345
686-
if let Some((prefix, suffix)) = in_symbol.name.split_once('$') {
687-
if !suffix.chars().all(char::is_numeric) {
688-
return None;
689-
}
690-
if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| {
691-
if let Some((p, s)) = symbol.name.split_once('$') {
692-
prefix == p
693-
&& s.chars().all(char::is_numeric)
694-
&& symbol_section_kind(obj, symbol) == section_kind
695-
} else {
696-
false
697-
}
698-
}) {
699-
return Some(symbol_idx);
700-
}
701-
}
759+
702760
None
703761
}
704762

0 commit comments

Comments
 (0)