Skip to content

Commit 1e62d46

Browse files
committed
Make function size inference logic arch-specific
For MIPS, account for delay slot nops. For x86, check for trailing nops (0x90). For PPC, check for 4-byte 0x00 padding. Resolves #229
1 parent 1205e8c commit 1e62d46

File tree

12 files changed

+131
-146
lines changed

12 files changed

+131
-146
lines changed

Cargo.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ strip = "debuginfo"
1414
codegen-units = 1
1515

1616
[workspace.package]
17-
version = "3.0.0-beta.12"
17+
version = "3.0.0-beta.13"
1818
authors = ["Luke Street <[email protected]>"]
1919
edition = "2024"
2020
license = "MIT OR Apache-2.0"

objdiff-core/src/arch/mips.rs

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::{
1515
diff::{DiffObjConfig, MipsAbi, MipsInstrCategory, display::InstructionPart},
1616
obj::{
1717
InstructionArg, InstructionArgValue, InstructionRef, Relocation, RelocationFlags,
18-
ResolvedInstructionRef, ResolvedRelocation, SymbolFlag, SymbolFlagSet,
18+
ResolvedInstructionRef, ResolvedRelocation, Section, Symbol, SymbolFlag, SymbolFlagSet,
1919
},
2020
};
2121

@@ -140,6 +140,14 @@ impl ArchMips {
140140
})
141141
}
142142

143+
fn default_instruction_flags(&self) -> rabbitizer::InstructionFlags {
144+
match self.isa_extension {
145+
Some(extension) => rabbitizer::InstructionFlags::new_extension(extension),
146+
None => rabbitizer::InstructionFlags::new(IsaVersion::MIPS_III),
147+
}
148+
.with_abi(self.abi)
149+
}
150+
143151
fn instruction_flags(&self, diff_config: &DiffObjConfig) -> rabbitizer::InstructionFlags {
144152
let isa_extension = match diff_config.mips_instr_category {
145153
MipsInstrCategory::Auto => self.isa_extension,
@@ -151,7 +159,7 @@ impl ArchMips {
151159
};
152160
match isa_extension {
153161
Some(extension) => rabbitizer::InstructionFlags::new_extension(extension),
154-
None => rabbitizer::InstructionFlags::new_isa(IsaVersion::MIPS_III, None),
162+
None => rabbitizer::InstructionFlags::new(IsaVersion::MIPS_III),
155163
}
156164
.with_abi(match diff_config.mips_abi {
157165
MipsAbi::Auto => self.abi,
@@ -331,6 +339,36 @@ impl Arch for ArchMips {
331339
}
332340
flags
333341
}
342+
343+
fn infer_function_size(
344+
&self,
345+
symbol: &Symbol,
346+
section: &Section,
347+
next_address: u64,
348+
) -> Result<u64> {
349+
// Trim any trailing 4-byte zeroes from the end (nops)
350+
let mut new_address = next_address;
351+
while new_address >= symbol.address + 4
352+
&& let Some(data) = section.data_range(new_address - 4, 4)
353+
&& data == [0u8; 4]
354+
{
355+
new_address -= 4;
356+
}
357+
// Check if the last instruction has a delay slot, if so, include the delay slot nop
358+
if new_address + 4 <= next_address
359+
&& new_address >= symbol.address + 4
360+
&& let Some(data) = section.data_range(new_address - 4, 4)
361+
&& let instruction = rabbitizer::Instruction::new(
362+
self.endianness.read_u32_bytes(data.try_into().unwrap()),
363+
Vram::new((new_address - 4) as u32),
364+
self.default_instruction_flags(),
365+
)
366+
&& instruction.opcode().has_delay_slot()
367+
{
368+
new_address += 4;
369+
}
370+
Ok(new_address.saturating_sub(symbol.address))
371+
}
334372
}
335373

336374
fn push_args(

objdiff-core/src/arch/mod.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,15 @@ pub trait Arch: Send + Sync + Debug {
406406
) -> Vec<ContextItem> {
407407
Vec::new()
408408
}
409+
410+
fn infer_function_size(
411+
&self,
412+
symbol: &Symbol,
413+
_section: &Section,
414+
next_address: u64,
415+
) -> Result<u64> {
416+
Ok(next_address.saturating_sub(symbol.address))
417+
}
409418
}
410419

411420
pub fn new_arch(object: &object::File) -> Result<Box<dyn Arch>> {

objdiff-core/src/arch/ppc/mod.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use crate::{
2020
},
2121
obj::{
2222
FlowAnalysisResult, InstructionRef, Object, Relocation, RelocationFlags,
23-
ResolvedInstructionRef, ResolvedRelocation, Symbol, SymbolFlag, SymbolFlagSet,
23+
ResolvedInstructionRef, ResolvedRelocation, Section, Symbol, SymbolFlag, SymbolFlagSet,
2424
},
2525
};
2626

@@ -455,6 +455,22 @@ impl Arch for ArchPpc {
455455
}
456456
out
457457
}
458+
459+
fn infer_function_size(
460+
&self,
461+
symbol: &Symbol,
462+
section: &Section,
463+
mut next_address: u64,
464+
) -> Result<u64> {
465+
// Trim any trailing 4-byte zeroes from the end (padding)
466+
while next_address >= symbol.address + 4
467+
&& let Some(data) = section.data_range(next_address - 4, 4)
468+
&& data == [0u8; 4]
469+
{
470+
next_address -= 4;
471+
}
472+
Ok(next_address.saturating_sub(symbol.address))
473+
}
458474
}
459475

460476
impl ArchPpc {

objdiff-core/src/arch/x86.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use object::{Endian as _, Object as _, ObjectSection as _, elf, pe};
1111
use crate::{
1212
arch::{Arch, RelocationOverride, RelocationOverrideTarget},
1313
diff::{DiffObjConfig, X86Formatter, display::InstructionPart},
14-
obj::{InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef},
14+
obj::{InstructionRef, Relocation, RelocationFlags, ResolvedInstructionRef, Section, Symbol},
1515
};
1616

1717
#[derive(Debug)]
@@ -303,6 +303,52 @@ impl Arch for ArchX86 {
303303
fn data_reloc_size(&self, flags: RelocationFlags) -> usize {
304304
self.reloc_size(flags).unwrap_or(1)
305305
}
306+
307+
fn infer_function_size(
308+
&self,
309+
symbol: &Symbol,
310+
section: &Section,
311+
next_address: u64,
312+
) -> Result<u64> {
313+
let Ok(size) = (next_address - symbol.address).try_into() else {
314+
return Ok(next_address.saturating_sub(symbol.address));
315+
};
316+
let Some(code) = section.data_range(symbol.address, size) else {
317+
return Ok(0);
318+
};
319+
// Decode instructions to find the last non-NOP instruction
320+
let mut decoder = self.decoder(code, symbol.address);
321+
let mut instruction = Instruction::default();
322+
let mut new_address = 0;
323+
let mut reloc_iter = section.relocations.iter().peekable();
324+
'outer: while decoder.can_decode() {
325+
let address = decoder.ip();
326+
while let Some(reloc) = reloc_iter.peek() {
327+
match reloc.address.cmp(&address) {
328+
Ordering::Less => {
329+
reloc_iter.next();
330+
}
331+
Ordering::Equal => {
332+
// If the instruction starts at a relocation, it's inline data
333+
let reloc_size = self.reloc_size(reloc.flags).with_context(|| {
334+
format!("Unsupported inline x86 relocation {:?}", reloc.flags)
335+
})?;
336+
if decoder.set_position(decoder.position() + reloc_size).is_ok() {
337+
new_address = address + reloc_size as u64;
338+
decoder.set_ip(new_address);
339+
continue 'outer;
340+
}
341+
}
342+
Ordering::Greater => break,
343+
}
344+
}
345+
decoder.decode_out(&mut instruction);
346+
if instruction.mnemonic() != iced_x86::Mnemonic::Nop {
347+
new_address = instruction.next_ip();
348+
}
349+
}
350+
Ok(new_address.saturating_sub(symbol.address))
351+
}
306352
}
307353

308354
struct InstructionFormatterOutput<'a> {

objdiff-core/src/obj/read.rs

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ fn map_symbols(
122122
}
123123

124124
// Infer symbol sizes for 0-size symbols
125-
infer_symbol_sizes(&mut symbols, sections);
125+
infer_symbol_sizes(arch, &mut symbols, sections)?;
126126

127127
Ok((symbols, symbol_indices))
128128
}
@@ -136,7 +136,7 @@ fn is_local_label(symbol: &Symbol) -> bool {
136136
&& LABEL_PREFIXES.iter().any(|p| symbol.name.starts_with(p))
137137
}
138138

139-
fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
139+
fn infer_symbol_sizes(arch: &dyn Arch, symbols: &mut [Symbol], sections: &[Section]) -> Result<()> {
140140
// Create a sorted list of symbol indices by section
141141
let mut symbols_with_section = Vec::<usize>::with_capacity(symbols.len());
142142
for (i, symbol) in symbols.iter().enumerate() {
@@ -206,18 +206,13 @@ fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
206206
iter_idx += 1;
207207
};
208208
let section = &sections[section_idx];
209-
let mut next_address =
209+
let next_address =
210210
next_symbol.map(|s| s.address).unwrap_or_else(|| section.address + section.size);
211-
if section.kind == SectionKind::Code {
212-
// For functions, trim any trailing 4-byte zeroes from the end (padding, nops)
213-
while next_address > symbol.address + 4
214-
&& let Some(data) = section.data_range(next_address - 4, 4)
215-
&& data == [0u8; 4]
216-
{
217-
next_address -= 4;
218-
}
219-
}
220-
let new_size = next_address.saturating_sub(symbol.address);
211+
let new_size = if section.kind == SectionKind::Code {
212+
arch.infer_function_size(symbol, section, next_address)?
213+
} else {
214+
next_address.saturating_sub(symbol.address)
215+
};
221216
if new_size > 0 {
222217
let symbol = &mut symbols[symbol_idx];
223218
symbol.size = new_size;
@@ -234,6 +229,7 @@ fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
234229
}
235230
}
236231
}
232+
Ok(())
237233
}
238234

239235
fn map_sections(

objdiff-core/tests/snapshots/arch_x86__read_x86_jumptable-2.snap

Lines changed: 0 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -507,116 +507,4 @@ expression: diff.instruction_rows
507507
),
508508
arg_diff: [],
509509
},
510-
InstructionDiffRow {
511-
ins_ref: Some(
512-
InstructionRef {
513-
address: 88,
514-
size: 1,
515-
opcode: 465,
516-
branch_dest: None,
517-
},
518-
),
519-
kind: None,
520-
branch_from: None,
521-
branch_to: None,
522-
arg_diff: [],
523-
},
524-
InstructionDiffRow {
525-
ins_ref: Some(
526-
InstructionRef {
527-
address: 89,
528-
size: 1,
529-
opcode: 465,
530-
branch_dest: None,
531-
},
532-
),
533-
kind: None,
534-
branch_from: None,
535-
branch_to: None,
536-
arg_diff: [],
537-
},
538-
InstructionDiffRow {
539-
ins_ref: Some(
540-
InstructionRef {
541-
address: 90,
542-
size: 1,
543-
opcode: 465,
544-
branch_dest: None,
545-
},
546-
),
547-
kind: None,
548-
branch_from: None,
549-
branch_to: None,
550-
arg_diff: [],
551-
},
552-
InstructionDiffRow {
553-
ins_ref: Some(
554-
InstructionRef {
555-
address: 91,
556-
size: 1,
557-
opcode: 465,
558-
branch_dest: None,
559-
},
560-
),
561-
kind: None,
562-
branch_from: None,
563-
branch_to: None,
564-
arg_diff: [],
565-
},
566-
InstructionDiffRow {
567-
ins_ref: Some(
568-
InstructionRef {
569-
address: 92,
570-
size: 1,
571-
opcode: 465,
572-
branch_dest: None,
573-
},
574-
),
575-
kind: None,
576-
branch_from: None,
577-
branch_to: None,
578-
arg_diff: [],
579-
},
580-
InstructionDiffRow {
581-
ins_ref: Some(
582-
InstructionRef {
583-
address: 93,
584-
size: 1,
585-
opcode: 465,
586-
branch_dest: None,
587-
},
588-
),
589-
kind: None,
590-
branch_from: None,
591-
branch_to: None,
592-
arg_diff: [],
593-
},
594-
InstructionDiffRow {
595-
ins_ref: Some(
596-
InstructionRef {
597-
address: 94,
598-
size: 1,
599-
opcode: 465,
600-
branch_dest: None,
601-
},
602-
),
603-
kind: None,
604-
branch_from: None,
605-
branch_to: None,
606-
arg_diff: [],
607-
},
608-
InstructionDiffRow {
609-
ins_ref: Some(
610-
InstructionRef {
611-
address: 95,
612-
size: 1,
613-
opcode: 465,
614-
branch_dest: None,
615-
},
616-
),
617-
kind: None,
618-
branch_from: None,
619-
branch_to: None,
620-
arg_diff: [],
621-
},
622510
]

0 commit comments

Comments
 (0)