1- use std:: { borrow:: Cow , collections:: BTreeMap } ;
1+ use std:: {
2+ borrow:: Cow ,
3+ collections:: { BTreeMap , HashMap } ,
4+ } ;
25
36use anyhow:: { bail, ensure, Result } ;
47use byteorder:: BigEndian ;
@@ -7,7 +10,7 @@ use object::{
710 elf, File , Object , ObjectSection , ObjectSymbol , Relocation , RelocationFlags , RelocationTarget ,
811 Symbol , SymbolKind ,
912} ;
10- use ppc750cl:: { Argument , InsIter , Opcode , GPR } ;
13+ use ppc750cl:: { Argument , InsIter , Opcode , ParsedIns , GPR } ;
1114
1215use crate :: {
1316 arch:: { DataType , ObjArch , ProcessCodeResult } ,
@@ -49,6 +52,8 @@ impl ObjArch for ObjArchPpc {
4952 let ins_count = code. len ( ) / 4 ;
5053 let mut ops = Vec :: < u16 > :: with_capacity ( ins_count) ;
5154 let mut insts = Vec :: < ObjIns > :: with_capacity ( ins_count) ;
55+ let fake_pool_reloc_for_addr =
56+ generate_fake_pool_reloc_for_addr_mapping ( address, code, relocations) ;
5257 for ( cur_addr, mut ins) in InsIter :: new ( code, address as u32 ) {
5358 let reloc = relocations. iter ( ) . find ( |r| ( r. address as u32 & !3 ) == cur_addr) ;
5459 if let Some ( reloc) = reloc {
@@ -145,7 +150,7 @@ impl ObjArch for ObjArchPpc {
145150 size : 4 ,
146151 mnemonic : Cow :: Borrowed ( simplified. mnemonic ) ,
147152 args,
148- reloc : reloc. cloned ( ) ,
153+ reloc : reloc. or ( fake_pool_reloc_for_addr . get ( & cur_addr ) ) . cloned ( ) ,
149154 op : ins. op as u16 ,
150155 branch_dest,
151156 line,
@@ -173,6 +178,7 @@ impl ObjArch for ObjArchPpc {
173178 fn display_reloc ( & self , flags : RelocationFlags ) -> Cow < ' static , str > {
174179 match flags {
175180 RelocationFlags :: Elf { r_type } => match r_type {
181+ elf:: R_PPC_NONE => Cow :: Borrowed ( "R_PPC_NONE" ) , // We use this for fake pool relocs
176182 elf:: R_PPC_ADDR16_LO => Cow :: Borrowed ( "R_PPC_ADDR16_LO" ) ,
177183 elf:: R_PPC_ADDR16_HI => Cow :: Borrowed ( "R_PPC_ADDR16_HI" ) ,
178184 elf:: R_PPC_ADDR16_HA => Cow :: Borrowed ( "R_PPC_ADDR16_HA" ) ,
@@ -188,26 +194,22 @@ impl ObjArch for ObjArchPpc {
188194 }
189195
190196 fn guess_data_type ( & self , instruction : & ObjIns ) -> Option < super :: DataType > {
191- // Always shows the first string of the table. Not ideal, but it's really hard to find
192- // the actual string being referenced.
193197 if instruction. reloc . as_ref ( ) . is_some_and ( |r| r. target . name . starts_with ( "@stringBase" ) ) {
194198 return Some ( DataType :: String ) ;
195199 }
196200
197- match Opcode :: from ( instruction. op as u8 ) {
198- Opcode :: Lbz | Opcode :: Lbzu | Opcode :: Lbzux | Opcode :: Lbzx => Some ( DataType :: Int8 ) ,
199- Opcode :: Lhz | Opcode :: Lhzu | Opcode :: Lhzux | Opcode :: Lhzx => Some ( DataType :: Int16 ) ,
200- Opcode :: Lha | Opcode :: Lhau | Opcode :: Lhaux | Opcode :: Lhax => Some ( DataType :: Int16 ) ,
201- Opcode :: Lwz | Opcode :: Lwzu | Opcode :: Lwzux | Opcode :: Lwzx => Some ( DataType :: Int32 ) ,
202- Opcode :: Lfs | Opcode :: Lfsu | Opcode :: Lfsux | Opcode :: Lfsx => Some ( DataType :: Float ) ,
203- Opcode :: Lfd | Opcode :: Lfdu | Opcode :: Lfdux | Opcode :: Lfdx => Some ( DataType :: Double ) ,
204-
205- Opcode :: Stb | Opcode :: Stbu | Opcode :: Stbux | Opcode :: Stbx => Some ( DataType :: Int8 ) ,
206- Opcode :: Sth | Opcode :: Sthu | Opcode :: Sthux | Opcode :: Sthx => Some ( DataType :: Int16 ) ,
207- Opcode :: Stw | Opcode :: Stwu | Opcode :: Stwux | Opcode :: Stwx => Some ( DataType :: Int32 ) ,
208- Opcode :: Stfs | Opcode :: Stfsu | Opcode :: Stfsux | Opcode :: Stfsx => Some ( DataType :: Float ) ,
209- Opcode :: Stfd | Opcode :: Stfdu | Opcode :: Stfdux | Opcode :: Stfdx => Some ( DataType :: Double ) ,
210- _ => None ,
201+ let op = Opcode :: from ( instruction. op as u8 ) ;
202+ if let Some ( ty) = guess_data_type_from_load_store_inst_op ( op) {
203+ Some ( ty)
204+ } else if op == Opcode :: Addi {
205+ // Assume that any addi instruction that references a local symbol is loading a string.
206+ // This hack is not ideal and results in tons of false positives where it will show
207+ // garbage strings (e.g. misinterpreting arrays, float literals, etc).
208+ // But not all strings are in the @stringBase pool, so the condition above that checks
209+ // the target symbol name would miss some.
210+ Some ( DataType :: String )
211+ } else {
212+ None
211213 }
212214 }
213215
@@ -381,3 +383,196 @@ fn make_symbol_ref(symbol: &Symbol) -> Result<ExtabSymbolRef> {
381383 let demangled_name = cwdemangle:: demangle ( & name, & cwdemangle:: DemangleOptions :: default ( ) ) ;
382384 Ok ( ExtabSymbolRef { original_index : symbol. index ( ) . 0 , name, demangled_name } )
383385}
386+
387+ fn guess_data_type_from_load_store_inst_op ( inst_op : Opcode ) -> Option < DataType > {
388+ match inst_op {
389+ Opcode :: Lbz | Opcode :: Lbzu | Opcode :: Lbzux | Opcode :: Lbzx => Some ( DataType :: Int8 ) ,
390+ Opcode :: Lhz | Opcode :: Lhzu | Opcode :: Lhzux | Opcode :: Lhzx => Some ( DataType :: Int16 ) ,
391+ Opcode :: Lha | Opcode :: Lhau | Opcode :: Lhaux | Opcode :: Lhax => Some ( DataType :: Int16 ) ,
392+ Opcode :: Lwz | Opcode :: Lwzu | Opcode :: Lwzux | Opcode :: Lwzx => Some ( DataType :: Int32 ) ,
393+ Opcode :: Lfs | Opcode :: Lfsu | Opcode :: Lfsux | Opcode :: Lfsx => Some ( DataType :: Float ) ,
394+ Opcode :: Lfd | Opcode :: Lfdu | Opcode :: Lfdux | Opcode :: Lfdx => Some ( DataType :: Double ) ,
395+
396+ Opcode :: Stb | Opcode :: Stbu | Opcode :: Stbux | Opcode :: Stbx => Some ( DataType :: Int8 ) ,
397+ Opcode :: Sth | Opcode :: Sthu | Opcode :: Sthux | Opcode :: Sthx => Some ( DataType :: Int16 ) ,
398+ Opcode :: Stw | Opcode :: Stwu | Opcode :: Stwux | Opcode :: Stwx => Some ( DataType :: Int32 ) ,
399+ Opcode :: Stfs | Opcode :: Stfsu | Opcode :: Stfsux | Opcode :: Stfsx => Some ( DataType :: Float ) ,
400+ Opcode :: Stfd | Opcode :: Stfdu | Opcode :: Stfdux | Opcode :: Stfdx => Some ( DataType :: Double ) ,
401+ _ => None ,
402+ }
403+ }
404+
405+ // Given an instruction, determine if it could accessing data at the address in a register.
406+ // If so, return the offset added to the register's address, the register containing that address,
407+ // and (optionally) which destination register the address is being copied into.
408+ fn get_offset_and_addr_gpr_for_possible_pool_reference (
409+ opcode : Opcode ,
410+ simplified : & ParsedIns ,
411+ ) -> Option < ( i16 , GPR , Option < GPR > ) > {
412+ let args = & simplified. args ;
413+ if guess_data_type_from_load_store_inst_op ( opcode) . is_some ( ) {
414+ match ( args[ 1 ] , args[ 2 ] ) {
415+ ( Argument :: Offset ( offset) , Argument :: GPR ( addr_src_gpr) ) => {
416+ // e.g. lwz. Immediate offset.
417+ Some ( ( offset. 0 , addr_src_gpr, None ) )
418+ }
419+ ( Argument :: GPR ( addr_src_gpr) , Argument :: GPR ( _offset_gpr) ) => {
420+ // e.g. lwzx. The offset is in a register and was likely calculated from an index.
421+ // Treat the offset as being 0 in this case to show the first element of the array.
422+ // It may be possible to show all elements by figuring out the stride of the array
423+ // from the calculations performed on the index before it's put into offset_gpr, but
424+ // this would be much more complicated, so it's not currently done.
425+ Some ( ( 0 , addr_src_gpr, None ) )
426+ }
427+ _ => None ,
428+ }
429+ } else {
430+ // If it's not a load/store instruction, there's two more possibilities we need to handle.
431+ // 1. It could be loading a pointer to a string.
432+ // 2. It could be moving the relocation address plus an offset into a different register to
433+ // load from later.
434+ // If either of these match, we also want to return the destination register that the
435+ // address is being copied into so that we can detect any future references to that new
436+ // register as well.
437+ match ( opcode, args[ 0 ] , args[ 1 ] , args[ 2 ] ) {
438+ (
439+ Opcode :: Addi ,
440+ Argument :: GPR ( addr_dst_gpr) ,
441+ Argument :: GPR ( addr_src_gpr) ,
442+ Argument :: Simm ( simm) ,
443+ ) => Some ( ( simm. 0 , addr_src_gpr, Some ( addr_dst_gpr) ) ) ,
444+ (
445+ Opcode :: Or ,
446+ Argument :: GPR ( addr_dst_gpr) ,
447+ Argument :: GPR ( addr_src_gpr) ,
448+ Argument :: None ,
449+ ) => Some ( ( 0 , addr_src_gpr, Some ( addr_dst_gpr) ) ) , // `mr` or `mr.`
450+ _ => None ,
451+ }
452+ }
453+ }
454+
455+ // We create a fake relocation for an instruction, vaguely simulating what the actual relocation
456+ // might have looked like if it wasn't pooled. This is so minimal changes are needed to display
457+ // pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that
458+ // there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense.
459+ // Also, if this instruction is accessing the middle of a symbol instead of the start, we add an
460+ // addend to indicate that.
461+ fn make_fake_pool_reloc ( offset : i16 , cur_addr : u32 , pool_reloc : & ObjReloc ) -> Option < ObjReloc > {
462+ let offset_from_pool = pool_reloc. addend + offset as i64 ;
463+ let target_address = pool_reloc. target . address . checked_add_signed ( offset_from_pool) ?;
464+ let orig_section_index = pool_reloc. target . orig_section_index ?;
465+ // We also need to create a fake target symbol to go inside our fake relocation.
466+ // This is because we don't have access to list of all symbols in this section, so we can't find
467+ // the real symbol yet. Instead we make a placeholder that has the correct `orig_section_index`
468+ // and `address` fields, and then later on when this information is displayed to the user, we
469+ // can find the real symbol by searching through the object's section's symbols for one that
470+ // contains this address.
471+ let fake_target_symbol = ObjSymbol {
472+ name : "" . to_string ( ) ,
473+ demangled_name : None ,
474+ address : target_address,
475+ section_address : 0 ,
476+ size : 0 ,
477+ size_known : false ,
478+ kind : Default :: default ( ) ,
479+ flags : Default :: default ( ) ,
480+ orig_section_index : Some ( orig_section_index) ,
481+ virtual_address : None ,
482+ original_index : None ,
483+ bytes : vec ! [ ] ,
484+ } ;
485+ // The addend is also fake because we don't know yet if the `target_address` here is the exact
486+ // start of the symbol or if it's in the middle of it.
487+ let fake_addend = 0 ;
488+ Some ( ObjReloc {
489+ flags : RelocationFlags :: Elf { r_type : elf:: R_PPC_NONE } ,
490+ address : cur_addr as u64 ,
491+ target : fake_target_symbol,
492+ addend : fake_addend,
493+ } )
494+ }
495+
496+ // Searches through all instructions in a function, determining which registers have the addresses
497+ // of pooled data relocations in them, finding which instructions load data from those addresses,
498+ // and constructing a mapping of the address of that instruction to a "fake pool relocation" that
499+ // simulates what that instruction's relocation would look like if data hadn't been pooled.
500+ // Limitations: This method currently only goes through the instructions in a function in linear
501+ // order, from start to finish. It does *not* follow any branches. This means that it could have
502+ // false positives or false negatives in determining which relocation is currently loaded in which
503+ // register at any given point in the function, as control flow is not respected.
504+ // There are currently no known examples of this method producing inaccurate results in reality, but
505+ // if examples are found, it may be possible to update this method to also follow all branches so
506+ // that it produces more accurate results.
507+ fn generate_fake_pool_reloc_for_addr_mapping (
508+ address : u64 ,
509+ code : & [ u8 ] ,
510+ relocations : & [ ObjReloc ] ,
511+ ) -> HashMap < u32 , ObjReloc > {
512+ let mut active_pool_relocs = HashMap :: new ( ) ;
513+ let mut pool_reloc_for_addr = HashMap :: new ( ) ;
514+ for ( cur_addr, ins) in InsIter :: new ( code, address as u32 ) {
515+ let simplified = ins. simplified ( ) ;
516+ let reloc = relocations. iter ( ) . find ( |r| ( r. address as u32 & !3 ) == cur_addr) ;
517+
518+ if let Some ( reloc) = reloc {
519+ // This instruction has a real relocation, so it may be a pool load we want to keep
520+ // track of.
521+ let args = & simplified. args ;
522+ match ( ins. op , args[ 0 ] , args[ 1 ] , args[ 2 ] ) {
523+ (
524+ Opcode :: Addi ,
525+ Argument :: GPR ( addr_dst_gpr) ,
526+ Argument :: GPR ( _addr_src_gpr) ,
527+ Argument :: Simm ( _simm) ,
528+ ) => {
529+ active_pool_relocs. insert ( addr_dst_gpr. 0 , reloc. clone ( ) ) ; // `lis` + `addi`
530+ }
531+ (
532+ Opcode :: Ori ,
533+ Argument :: GPR ( addr_dst_gpr) ,
534+ Argument :: GPR ( _addr_src_gpr) ,
535+ Argument :: Uimm ( _uimm) ,
536+ ) => {
537+ active_pool_relocs. insert ( addr_dst_gpr. 0 , reloc. clone ( ) ) ; // `lis` + `ori`
538+ }
539+ ( Opcode :: B , _, _, _) => {
540+ if simplified. mnemonic == "bl" {
541+ // When encountering a function call, clear any active pool relocations from
542+ // the volatile registers (r0, r3-r12), but not the nonvolatile registers.
543+ active_pool_relocs. remove ( & 0 ) ;
544+ for gpr in 3 ..12 {
545+ active_pool_relocs. remove ( & gpr) ;
546+ }
547+ }
548+ }
549+ _ => { }
550+ }
551+ } else if let Some ( ( offset, addr_src_gpr, addr_dst_gpr) ) =
552+ get_offset_and_addr_gpr_for_possible_pool_reference ( ins. op , & simplified)
553+ {
554+ // This instruction doesn't have a real relocation, so it may be a reference to one of
555+ // the already-loaded pools.
556+ if let Some ( pool_reloc) = active_pool_relocs. get ( & addr_src_gpr. 0 ) {
557+ if let Some ( fake_pool_reloc) = make_fake_pool_reloc ( offset, cur_addr, pool_reloc) {
558+ pool_reloc_for_addr. insert ( cur_addr, fake_pool_reloc) ;
559+ }
560+ if let Some ( addr_dst_gpr) = addr_dst_gpr {
561+ // If the address of the pool relocation got copied into another register, we
562+ // need to keep track of it in that register too as future instructions may
563+ // reference the symbol indirectly via this new register, instead of the
564+ // register the symbol's address was originally loaded into.
565+ // For example, the start of the function might `lis` + `addi` the start of the
566+ // ...data pool into r25, and then later the start of a loop will `addi` r25
567+ // with the offset within the .data section of an array variable into r21.
568+ // Then the body of the loop will `lwzx` one of the array elements from r21.
569+ let mut new_reloc = pool_reloc. clone ( ) ;
570+ new_reloc. addend += offset as i64 ;
571+ active_pool_relocs. insert ( addr_dst_gpr. 0 , new_reloc) ;
572+ }
573+ }
574+ }
575+ }
576+
577+ pool_reloc_for_addr
578+ }
0 commit comments