Skip to content
Merged
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ android.keystore
*.frag
*.vert
*.metal
.vscode/launch.json
.vscode/
1 change: 1 addition & 0 deletions objdiff-core/src/arch/arm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ impl ObjArch for ObjArchArm {
mnemonic: Cow::Borrowed(parsed_ins.mnemonic),
args,
reloc,
fake_pool_reloc: None,
branch_dest,
line,
formatted: parsed_ins.display(display_options).to_string(),
Expand Down
2 changes: 2 additions & 0 deletions objdiff-core/src/arch/arm64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ impl ObjArch for ObjArchArm64 {
mnemonic: Cow::Borrowed("<invalid>"),
args: vec![],
reloc: None,
fake_pool_reloc: None,
branch_dest: None,
line: None,
formatted: "".to_string(),
Expand Down Expand Up @@ -121,6 +122,7 @@ impl ObjArch for ObjArchArm64 {
mnemonic: Cow::Borrowed(mnemonic),
args,
reloc,
fake_pool_reloc: None,
branch_dest,
line,
formatted: ins.to_string(),
Expand Down
1 change: 1 addition & 0 deletions objdiff-core/src/arch/mips.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ impl ObjArch for ObjArchMips {
mnemonic: Cow::Borrowed(mnemonic),
args,
reloc: reloc.cloned(),
fake_pool_reloc: None,
branch_dest,
line,
formatted,
Expand Down
23 changes: 16 additions & 7 deletions objdiff-core/src/arch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,22 @@ pub enum DataType {

impl DataType {
pub fn display_bytes<Endian: ByteOrder>(&self, bytes: &[u8]) -> Option<String> {
// TODO: Attempt to interpret large symbols as arrays of a smaller type,
// fallback to intrepreting it as bytes.
// https://github.com/encounter/objdiff/issues/124
if self.required_len().is_some_and(|l| bytes.len() != l) {
log::warn!("Failed to display a symbol value for a symbol whose size doesn't match the instruction referencing it.");
if self.required_len().is_some_and(|l| bytes.len() < l) {
log::warn!("Failed to display a symbol value for a symbol whose size is too small for instruction referencing it.");
return None;
}
let mut bytes = bytes;
if self.required_len().is_some_and(|l| bytes.len() > l) {
// If the symbol's size is larger a single instance of this data type, we take just the
// bytes necessary for one of them in order to display the first element of the array.
bytes = &bytes[0..self.required_len().unwrap()];
// TODO: Attempt to interpret large symbols as arrays of a smaller type and show all
// elements of the array instead. https://github.com/encounter/objdiff/issues/124
// However, note that the stride of an array can not always be determined just by the
// data type guessed by the single instruction accessing it. There can also be arrays of
// structs that contain multiple elements of different types, so if other elements after
// the first one were to be displayed in this manner, they may be inaccurate.
}

match self {
DataType::Int8 => {
Expand Down Expand Up @@ -86,10 +95,10 @@ impl DataType {
}
}
DataType::Float => {
format!("Float: {}", Endian::read_f32(bytes))
format!("Float: {:?}f", Endian::read_f32(bytes))
}
DataType::Double => {
format!("Double: {}", Endian::read_f64(bytes))
format!("Double: {:?}", Endian::read_f64(bytes))
}
DataType::Bytes => {
format!("Bytes: {:#?}", bytes)
Expand Down
241 changes: 222 additions & 19 deletions objdiff-core/src/arch/ppc.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use std::{borrow::Cow, collections::BTreeMap};
use std::{
borrow::Cow,
collections::{BTreeMap, HashMap},
};

use anyhow::{bail, ensure, Result};
use byteorder::BigEndian;
Expand All @@ -7,7 +10,7 @@ use object::{
elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, RelocationTarget,
Symbol, SymbolKind,
};
use ppc750cl::{Argument, InsIter, Opcode, GPR};
use ppc750cl::{Argument, InsIter, Opcode, ParsedIns, GPR};

use crate::{
arch::{DataType, ObjArch, ProcessCodeResult},
Expand Down Expand Up @@ -49,6 +52,8 @@ impl ObjArch for ObjArchPpc {
let ins_count = code.len() / 4;
let mut ops = Vec::<u16>::with_capacity(ins_count);
let mut insts = Vec::<ObjIns>::with_capacity(ins_count);
let fake_pool_reloc_for_addr =
generate_fake_pool_reloc_for_addr_mapping(address, code, relocations);
for (cur_addr, mut ins) in InsIter::new(code, address as u32) {
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);
if let Some(reloc) = reloc {
Expand Down Expand Up @@ -146,6 +151,7 @@ impl ObjArch for ObjArchPpc {
mnemonic: Cow::Borrowed(simplified.mnemonic),
args,
reloc: reloc.cloned(),
fake_pool_reloc: fake_pool_reloc_for_addr.get(&cur_addr).cloned(),
op: ins.op as u16,
branch_dest,
line,
Expand Down Expand Up @@ -173,6 +179,7 @@ impl ObjArch for ObjArchPpc {
fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> {
match flags {
RelocationFlags::Elf { r_type } => match r_type {
elf::R_PPC_NONE => Cow::Borrowed("R_PPC_NONE"), // We use this for fake pool relocs
elf::R_PPC_ADDR16_LO => Cow::Borrowed("R_PPC_ADDR16_LO"),
elf::R_PPC_ADDR16_HI => Cow::Borrowed("R_PPC_ADDR16_HI"),
elf::R_PPC_ADDR16_HA => Cow::Borrowed("R_PPC_ADDR16_HA"),
Expand All @@ -188,26 +195,29 @@ impl ObjArch for ObjArchPpc {
}

fn guess_data_type(&self, instruction: &ObjIns) -> Option<super::DataType> {
// Always shows the first string of the table. Not ideal, but it's really hard to find
// the actual string being referenced.
if instruction.reloc.as_ref().is_some_and(|r| r.target.name.starts_with("@stringBase")) {
if instruction
.reloc
.as_ref()
.or(instruction.fake_pool_reloc.as_ref())
.is_some_and(|r| r.target.name.starts_with("@stringBase"))
{
return Some(DataType::String);
}

match Opcode::from(instruction.op as u8) {
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),

Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
_ => None,
let op = Opcode::from(instruction.op as u8);
if let Some(ty) = guess_data_type_from_load_store_inst_op(op) {
Some(ty)
} else if op == Opcode::Addi {
// Assume that any addi instruction that references a local symbol is loading a string.
// This hack is not ideal and results in tons of false positives where it will show
// garbage strings (e.g. misinterpreting arrays, float literals, etc).
// But there isn't much other choice as not all strings are in the @stringBase pool.
// And even those that are would be missed by the target.name.starts_with("@stringBase")
// hack above for fake pooled relocations, as they have an empty string placeholder for
// the target symbol name.
Some(DataType::String)
} else {
None
}
}

Expand Down Expand Up @@ -381,3 +391,196 @@ fn make_symbol_ref(symbol: &Symbol) -> Result<ExtabSymbolRef> {
let demangled_name = cwdemangle::demangle(&name, &cwdemangle::DemangleOptions::default());
Ok(ExtabSymbolRef { original_index: symbol.index().0, name, demangled_name })
}

fn guess_data_type_from_load_store_inst_op(inst_op: Opcode) -> Option<DataType> {
match inst_op {
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),

Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
_ => None,
}
}

// Given an instruction, determine if it could accessing data at the address in a register.
// If so, return the offset added to the register's address, the register containing that address,
// and (optionally) which destination register the address is being copied into.
fn get_offset_and_addr_gpr_for_possible_pool_reference(
opcode: Opcode,
simplified: &ParsedIns,
) -> Option<(i16, GPR, Option<GPR>)> {
let args = &simplified.args;
if guess_data_type_from_load_store_inst_op(opcode).is_some() {
match (args[1], args[2]) {
(Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => {
// e.g. lwz. Immediate offset.
Some((offset.0, addr_src_gpr, None))
}
(Argument::GPR(addr_src_gpr), Argument::GPR(_offset_gpr)) => {
// e.g. lwzx. The offset is in a register and was likely calculated from an index.
// Treat the offset as being 0 in this case to show the first element of the array.
// It may be possible to show all elements by figuring out the stride of the array
// from the calculations performed on the index before it's put into offset_gpr, but
// this would be much more complicated, so it's not currently done.
Some((0, addr_src_gpr, None))
}
_ => None,
}
} else {
// If it's not a load/store instruction, there's two more possibilities we need to handle.
// 1. It could be loading a pointer to a string.
// 2. It could be moving the relocation address plus an offset into a different register to
// load from later.
// If either of these match, we also want to return the destination register that the
// address is being copied into so that we can detect any future references to that new
// register as well.
match (opcode, args[0], args[1], args[2]) {
(
Opcode::Addi,
Argument::GPR(addr_dst_gpr),
Argument::GPR(addr_src_gpr),
Argument::Simm(simm),
) => Some((simm.0, addr_src_gpr, Some(addr_dst_gpr))),
(
Opcode::Or,
Argument::GPR(addr_dst_gpr),
Argument::GPR(addr_src_gpr),
Argument::None,
) => Some((0, addr_src_gpr, Some(addr_dst_gpr))), // `mr` or `mr.`
_ => None,
}
}
}

// We create a fake relocation for an instruction, vaguely simulating what the actual relocation
// might have looked like if it wasn't pooled. This is so minimal changes are needed to display
// pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that
// there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense.
// Also, if this instruction is accessing the middle of a symbol instead of the start, we add an
// addend to indicate that.
fn make_fake_pool_reloc(offset: i16, cur_addr: u32, pool_reloc: &ObjReloc) -> Option<ObjReloc> {
let offset_from_pool = pool_reloc.addend + offset as i64;
let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?;
let orig_section_index = pool_reloc.target.orig_section_index?;
// We also need to create a fake target symbol to go inside our fake relocation.
// This is because we don't have access to list of all symbols in this section, so we can't find
// the real symbol yet. Instead we make a placeholder that has the correct `orig_section_index`
// and `address` fields, and then later on when this information is displayed to the user, we
// can find the real symbol by searching through the object's section's symbols for one that
// contains this address.
let fake_target_symbol = ObjSymbol {
name: "".to_string(),
demangled_name: None,
address: target_address,
section_address: 0,
size: 0,
size_known: false,
kind: Default::default(),
flags: Default::default(),
orig_section_index: Some(orig_section_index),
virtual_address: None,
original_index: None,
bytes: vec![],
};
// The addend is also fake because we don't know yet if the `target_address` here is the exact
// start of the symbol or if it's in the middle of it.
let fake_addend = 0;
Some(ObjReloc {
flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE },
address: cur_addr as u64,
target: fake_target_symbol,
addend: fake_addend,
})
}

// Searches through all instructions in a function, determining which registers have the addresses
// of pooled data relocations in them, finding which instructions load data from those addresses,
// and constructing a mapping of the address of that instruction to a "fake pool relocation" that
// simulates what that instruction's relocation would look like if data hadn't been pooled.
// Limitations: This method currently only goes through the instructions in a function in linear
// order, from start to finish. It does *not* follow any branches. This means that it could have
// false positives or false negatives in determining which relocation is currently loaded in which
// register at any given point in the function, as control flow is not respected.
// There are currently no known examples of this method producing inaccurate results in reality, but
// if examples are found, it may be possible to update this method to also follow all branches so
// that it produces more accurate results.
fn generate_fake_pool_reloc_for_addr_mapping(
address: u64,
code: &[u8],
relocations: &[ObjReloc],
) -> HashMap<u32, ObjReloc> {
let mut active_pool_relocs = HashMap::new();
let mut pool_reloc_for_addr = HashMap::new();
for (cur_addr, ins) in InsIter::new(code, address as u32) {
let simplified = ins.simplified();
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);

if let Some(reloc) = reloc {
// This instruction has a real relocation, so it may be a pool load we want to keep
// track of.
let args = &simplified.args;
match (ins.op, args[0], args[1], args[2]) {
(
Opcode::Addi,
Argument::GPR(addr_dst_gpr),
Argument::GPR(_addr_src_gpr),
Argument::Simm(_simm),
) => {
active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `addi`
}
(
Opcode::Ori,
Argument::GPR(addr_dst_gpr),
Argument::GPR(_addr_src_gpr),
Argument::Uimm(_uimm),
) => {
active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori`
}
(Opcode::B, _, _, _) => {
if simplified.mnemonic == "bl" {
// When encountering a function call, clear any active pool relocations from
// the volatile registers (r0, r3-r12), but not the nonvolatile registers.
active_pool_relocs.remove(&0);
for gpr in 3..12 {
active_pool_relocs.remove(&gpr);
}
}
}
_ => {}
}
} else if let Some((offset, addr_src_gpr, addr_dst_gpr)) =
get_offset_and_addr_gpr_for_possible_pool_reference(ins.op, &simplified)
{
// This instruction doesn't have a real relocation, so it may be a reference to one of
// the already-loaded pools.
if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) {
if let Some(fake_pool_reloc) = make_fake_pool_reloc(offset, cur_addr, pool_reloc) {
pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc);
}
if let Some(addr_dst_gpr) = addr_dst_gpr {
// If the address of the pool relocation got copied into another register, we
// need to keep track of it in that register too as future instructions may
// reference the symbol indirectly via this new register, instead of the
// register the symbol's address was originally loaded into.
// For example, the start of the function might `lis` + `addi` the start of the
// ...data pool into r25, and then later the start of a loop will `addi` r25
// with the offset within the .data section of an array variable into r21.
// Then the body of the loop will `lwzx` one of the array elements from r21.
let mut new_reloc = pool_reloc.clone();
new_reloc.addend += offset as i64;
active_pool_relocs.insert(addr_dst_gpr.0, new_reloc);
}
}
}
}

pool_reloc_for_addr
}
2 changes: 2 additions & 0 deletions objdiff-core/src/arch/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ impl ObjArch for ObjArchX86 {
mnemonic: Cow::Borrowed("<invalid>"),
args: vec![],
reloc: None,
fake_pool_reloc: None,
branch_dest: None,
line: None,
formatted: String::new(),
Expand All @@ -79,6 +80,7 @@ impl ObjArch for ObjArchX86 {
mnemonic: Cow::Borrowed("<invalid>"),
args: vec![],
reloc: reloc.cloned(),
fake_pool_reloc: None,
branch_dest: None,
line,
formatted: String::new(),
Expand Down
1 change: 1 addition & 0 deletions objdiff-core/src/obj/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ pub struct ObjIns {
pub mnemonic: Cow<'static, str>,
pub args: Vec<ObjInsArg>,
pub reloc: Option<ObjReloc>,
pub fake_pool_reloc: Option<ObjReloc>,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we can't store this in reloc?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that, there's no scenario where reloc and fake_pool_reloc need to both be Some, so it might work. The reason I didn't do that initially is because reloc is accessed in many more places in the code than just the hover tooltip and I wasn't sure if I should be messing with all those without understanding them.

I just checked all the references to reloc now:

  1. The JSON output of objdiff-cli --output (in Instruction::new, might also be used by wasm?):
    image

  2. The context menu when right-clicking on an instruction (in ins_context_menu):
    image

There are other places besides those 2 that reference the field, but I think they rely on the relocation's type being a supported one, not R_PPC_NONE, so they don't actually show up in a user-visible place in practice.

Notice that if we go with this route there's no name, size, etc for the symbol being referenced here because of the hack where I replaced the target symbol with an empty placeholder.

Without the placeholder hack, the actual symbol is correctly displayed:
image

If you want I can merge the two fields and add checks to Instruction::new and ins_context_menu to skip showing the symbol if its name is empty. If this is done, any more references to ObjIns.reloc added in the future would also need similar checks to prevent the placeholder from being used.

Alternative, maybe less hacky approach: instead of the hack where I resolve the placeholder to the real symbol in the GUI code for showing the tooltip every frame, I could do it just once diff/code.rs process_code_symbol and mutate the result before returning it.
I didn't think of this before, but unlike ObjArch::process_code, process_code_symbol already has access to obj.sections without changing the function signature.
This would allow the actual symbol name to be displayed everywhere more consistently. It would also avoid needing to add repetitive checks on !target.name.is_empty() every time ins.reloc is used anywhere in the future to avoid bugs.
Basically, anything that calls process_code_symbol would see reloc as having the correct pooled reference target symbol. And it seems like objdiff only calls ObjArch::process_code via process_code_symbol.

Do other projects that rely on objdiff like dtk use this reloc field for anything? Would it be bad for it to refer to an indirect pooled reference there?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pushed a couple commits with the changes I described above if you want to see for yourself if they're okay. From my testing it seems to work fine for both objdiff-gui and objdiff-cli and causes the fake relocation symbol to display everywhere the real one would:
2024-12-03_19_06_43_312_Code

I didn't test with other things like wasm/dtk.

pub branch_dest: Option<u64>,
/// Line number
pub line: Option<u32>,
Expand Down
2 changes: 1 addition & 1 deletion objdiff-gui/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ exec = "0.3"

# native:
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
tracing-subscriber = "0.3"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

# web:
[target.'cfg(target_arch = "wasm32")'.dependencies]
Expand Down
Loading
Loading