1- use std:: { borrow:: Cow , collections:: BTreeMap } ;
1+ use std:: {
2+ borrow:: Cow ,
3+ collections:: { BTreeMap , HashMap } ,
4+ } ;
25
36use anyhow:: { bail, ensure, Result } ;
47use byteorder:: BigEndian ;
@@ -7,7 +10,7 @@ use object::{
710 elf, File , Object , ObjectSection , ObjectSymbol , Relocation , RelocationFlags , RelocationTarget ,
811 Symbol , SymbolKind ,
912} ;
10- use ppc750cl:: { Argument , InsIter , Opcode , GPR } ;
13+ use ppc750cl:: { Argument , InsIter , Opcode , ParsedIns , GPR } ;
1114
1215use crate :: {
1316 arch:: { DataType , ObjArch , ProcessCodeResult } ,
@@ -27,6 +30,180 @@ fn is_rel_abs_arg(arg: &Argument) -> bool {
2730
2831fn is_offset_arg ( arg : & Argument ) -> bool { matches ! ( arg, Argument :: Offset ( _) ) }
2932
33+ fn guess_data_type_from_load_store_inst_op ( inst_op : Opcode ) -> Option < DataType > {
34+ match inst_op {
35+ Opcode :: Lbz | Opcode :: Lbzu | Opcode :: Lbzux | Opcode :: Lbzx => Some ( DataType :: Int8 ) ,
36+ Opcode :: Lhz | Opcode :: Lhzu | Opcode :: Lhzux | Opcode :: Lhzx => Some ( DataType :: Int16 ) ,
37+ Opcode :: Lha | Opcode :: Lhau | Opcode :: Lhaux | Opcode :: Lhax => Some ( DataType :: Int16 ) ,
38+ Opcode :: Lwz | Opcode :: Lwzu | Opcode :: Lwzux | Opcode :: Lwzx => Some ( DataType :: Int32 ) ,
39+ Opcode :: Lfs | Opcode :: Lfsu | Opcode :: Lfsux | Opcode :: Lfsx => Some ( DataType :: Float ) ,
40+ Opcode :: Lfd | Opcode :: Lfdu | Opcode :: Lfdux | Opcode :: Lfdx => Some ( DataType :: Double ) ,
41+
42+ Opcode :: Stb | Opcode :: Stbu | Opcode :: Stbux | Opcode :: Stbx => Some ( DataType :: Int8 ) ,
43+ Opcode :: Sth | Opcode :: Sthu | Opcode :: Sthux | Opcode :: Sthx => Some ( DataType :: Int16 ) ,
44+ Opcode :: Stw | Opcode :: Stwu | Opcode :: Stwux | Opcode :: Stwx => Some ( DataType :: Int32 ) ,
45+ Opcode :: Stfs | Opcode :: Stfsu | Opcode :: Stfsux | Opcode :: Stfsx => Some ( DataType :: Float ) ,
46+ Opcode :: Stfd | Opcode :: Stfdu | Opcode :: Stfdux | Opcode :: Stfdx => Some ( DataType :: Double ) ,
47+ _ => None ,
48+ }
49+ }
50+
51+ // Given an instruction, determine if it could accessing data at the address in a register.
52+ // If so, return the offset added to the register's address, the register containing that address,
53+ // and (optionally) which destination register the address is being copied into.
54+ fn get_offset_and_addr_gpr_for_possible_pool_reference (
55+ opcode : Opcode ,
56+ simplified : & ParsedIns ,
57+ ) -> Option < ( i16 , GPR , Option < GPR > ) > {
58+ let args = & simplified. args ;
59+ if guess_data_type_from_load_store_inst_op ( opcode) . is_some ( ) {
60+ match ( args[ 1 ] , args[ 2 ] ) {
61+ ( Argument :: Offset ( offset) , Argument :: GPR ( addr_src_gpr) ) => {
62+ // e.g. lwz. Immediate offset.
63+ Some ( ( offset. 0 , addr_src_gpr, None ) )
64+ }
65+ ( Argument :: GPR ( addr_src_gpr) , Argument :: GPR ( _offset_gpr) ) => {
66+ // e.g. lwzx. The offset is in a register and was likely calculated from an index.
67+ // Treat the offset as being 0 in this case to show the first element of the array.
68+ // It may be possible to show all elements by figuring out the stride of the array
69+ // from the calculations performed on the index before it's put into offset_gpr, but
70+ // this would be much more complicated, so it's not currently done.
71+ Some ( ( 0 , addr_src_gpr, None ) )
72+ }
73+ _ => None ,
74+ }
75+ } else {
76+ // If it's not a load/store instruction, there's two more possibilities we need to handle.
77+ // 1. It could be a reference to @stringBase.
78+ // 2. It could be moving the relocation address plus an offset into a different register to
79+ // load from later.
80+ // If either of these match, we also want to return the destination register that the
81+ // address is being copied into so that we can detect any future references to that new
82+ // register as well.
83+ match ( opcode, args[ 0 ] , args[ 1 ] , args[ 2 ] ) {
84+ (
85+ Opcode :: Addi ,
86+ Argument :: GPR ( addr_dst_gpr) ,
87+ Argument :: GPR ( addr_src_gpr) ,
88+ Argument :: Simm ( simm) ,
89+ ) => Some ( ( simm. 0 , addr_src_gpr, Some ( addr_dst_gpr) ) ) ,
90+ (
91+ Opcode :: Or ,
92+ Argument :: GPR ( addr_dst_gpr) ,
93+ Argument :: GPR ( addr_src_gpr) ,
94+ Argument :: None ,
95+ ) => Some ( ( 0 , addr_src_gpr, Some ( addr_dst_gpr) ) ) , // `mr` or `mr.`
96+ _ => None ,
97+ }
98+ }
99+ }
100+
101+ // We create a fake relocation for an instruction, vaguely simulating what the actual relocation
102+ // might have looked like if it wasn't pooled. This is so minimal changes are needed to display
103+ // pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that
104+ // there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense.
105+ // Also, if this instruction is accessing the middle of a symbol instead of the start, we add an
106+ // addend to indicate that.
107+ fn make_fake_pool_reloc (
108+ offset : i16 ,
109+ cur_addr : u32 ,
110+ pool_reloc : & ObjReloc ,
111+ sections : & [ ObjSection ] ,
112+ ) -> Option < ObjReloc > {
113+ let offset_from_pool = pool_reloc. addend + offset as i64 ;
114+ let target_address = pool_reloc. target . address . checked_add_signed ( offset_from_pool) ?;
115+ let orig_section_index = pool_reloc. target . orig_section_index ?;
116+ let section = sections. iter ( ) . find ( |s| s. orig_index == orig_section_index) ?;
117+ let target_symbol = section
118+ . symbols
119+ . iter ( )
120+ . find ( |s| s. size > 0 && ( s. address ..s. address + s. size ) . contains ( & target_address) ) ?;
121+ let addend = ( target_address - target_symbol. address ) as i64 ;
122+ Some ( ObjReloc {
123+ flags : RelocationFlags :: Elf { r_type : elf:: R_PPC_NONE } ,
124+ address : cur_addr as u64 ,
125+ target : target_symbol. clone ( ) ,
126+ addend,
127+ } )
128+ }
129+
130+ // Searches through all instructions in a function, determining which registers have the addresses
131+ // of pooled data relocations in them, finding which instructions load data from those addresses,
132+ // and constructing a mapping of the address of that instruction to a "fake pool relocation" that
133+ // simulates what that instruction's relocation would look like if data hadn't been pooled.
134+ // Limitations: This method currently only goes through the instructions in a function in linear
135+ // order, from start to finish. It does *not* follow any branches. This means that it could have
136+ // false positives or false negatives in determining which relocation is currently loaded in which
137+ // register at any given point in the function, as control flow is not respected.
138+ // There are currently no known examples of this method producing inaccurate results in reality, but
139+ // if examples are found, it may be possible to update this method to also follow all branches so
140+ // that it produces more accurate results.
141+ fn generate_fake_pool_reloc_for_addr_mapping (
142+ address : u64 ,
143+ code : & [ u8 ] ,
144+ relocations : & [ ObjReloc ] ,
145+ sections : & [ ObjSection ] ,
146+ ) -> HashMap < u32 , ObjReloc > {
147+ let mut active_pool_relocs = HashMap :: new ( ) ;
148+ let mut pool_reloc_for_addr = HashMap :: new ( ) ;
149+ for ( cur_addr, ins) in InsIter :: new ( code, address as u32 ) {
150+ let simplified = ins. simplified ( ) ;
151+ let reloc = relocations. iter ( ) . find ( |r| ( r. address as u32 & !3 ) == cur_addr) ;
152+
153+ if let Some ( reloc) = reloc {
154+ // This instruction has a real relocation, so it may be a pool load we want to keep
155+ // track of.
156+ let args = & simplified. args ;
157+ match ( ins. op , args[ 0 ] , args[ 1 ] , args[ 2 ] ) {
158+ (
159+ Opcode :: Addi ,
160+ Argument :: GPR ( addr_dst_gpr) ,
161+ Argument :: GPR ( _addr_src_gpr) ,
162+ Argument :: Simm ( _simm) ,
163+ ) => {
164+ active_pool_relocs. insert ( addr_dst_gpr. 0 , reloc. clone ( ) ) ; // `lis` + `addi`
165+ }
166+ (
167+ Opcode :: Ori ,
168+ Argument :: GPR ( addr_dst_gpr) ,
169+ Argument :: GPR ( _addr_src_gpr) ,
170+ Argument :: Uimm ( _uimm) ,
171+ ) => {
172+ active_pool_relocs. insert ( addr_dst_gpr. 0 , reloc. clone ( ) ) ; // `lis` + `ori`
173+ }
174+ _ => { }
175+ }
176+ } else if let Some ( ( offset, addr_src_gpr, addr_dst_gpr) ) =
177+ get_offset_and_addr_gpr_for_possible_pool_reference ( ins. op , & simplified)
178+ {
179+ // This instruction doesn't have a real relocation, so it may be a reference to one of
180+ // the already-loaded pools.
181+ if let Some ( pool_reloc) = active_pool_relocs. get ( & addr_src_gpr. 0 ) {
182+ if let Some ( fake_pool_reloc) =
183+ make_fake_pool_reloc ( offset, cur_addr, pool_reloc, sections)
184+ {
185+ pool_reloc_for_addr. insert ( cur_addr, fake_pool_reloc) ;
186+ }
187+ if let Some ( addr_dst_gpr) = addr_dst_gpr {
188+ // If the address of the pool relocation got copied into another register, we
189+ // need to keep track of it in that register too as future instructions may
190+ // reference the symbol indirectly via this new register, instead of the
191+ // register the symbol's address was originally loaded into.
192+ // For example, the start of the function might `lis` + `addi` the start of the
193+ // ...data pool into r25, and then later the start of a loop will `addi` r25
194+ // with the offset within the .data section of an array variable into r21.
195+ // Then the body of the loop will `lwzx` one of the array elements from r21.
196+ let mut new_reloc = pool_reloc. clone ( ) ;
197+ new_reloc. addend += offset as i64 ;
198+ active_pool_relocs. insert ( addr_dst_gpr. 0 , new_reloc) ;
199+ }
200+ }
201+ }
202+ }
203+
204+ pool_reloc_for_addr
205+ }
206+
30207pub struct ObjArchPpc {
31208 /// Exception info
32209 pub extab : Option < BTreeMap < usize , ExceptionInfo > > ,
@@ -45,10 +222,13 @@ impl ObjArch for ObjArchPpc {
45222 relocations : & [ ObjReloc ] ,
46223 line_info : & BTreeMap < u64 , u32 > ,
47224 config : & DiffObjConfig ,
225+ sections : & [ ObjSection ] ,
48226 ) -> Result < ProcessCodeResult > {
49227 let ins_count = code. len ( ) / 4 ;
50228 let mut ops = Vec :: < u16 > :: with_capacity ( ins_count) ;
51229 let mut insts = Vec :: < ObjIns > :: with_capacity ( ins_count) ;
230+ let fake_pool_reloc_for_addr =
231+ generate_fake_pool_reloc_for_addr_mapping ( address, code, relocations, sections) ;
52232 for ( cur_addr, mut ins) in InsIter :: new ( code, address as u32 ) {
53233 let reloc = relocations. iter ( ) . find ( |r| ( r. address as u32 & !3 ) == cur_addr) ;
54234 if let Some ( reloc) = reloc {
@@ -146,6 +326,7 @@ impl ObjArch for ObjArchPpc {
146326 mnemonic : Cow :: Borrowed ( simplified. mnemonic ) ,
147327 args,
148328 reloc : reloc. cloned ( ) ,
329+ fake_pool_reloc : fake_pool_reloc_for_addr. get ( & cur_addr) . cloned ( ) ,
149330 op : ins. op as u16 ,
150331 branch_dest,
151332 line,
@@ -173,6 +354,7 @@ impl ObjArch for ObjArchPpc {
173354 fn display_reloc ( & self , flags : RelocationFlags ) -> Cow < ' static , str > {
174355 match flags {
175356 RelocationFlags :: Elf { r_type } => match r_type {
357+ elf:: R_PPC_NONE => Cow :: Borrowed ( "R_PPC_NONE" ) , // We use this for fake pool relocs
176358 elf:: R_PPC_ADDR16_LO => Cow :: Borrowed ( "R_PPC_ADDR16_LO" ) ,
177359 elf:: R_PPC_ADDR16_HI => Cow :: Borrowed ( "R_PPC_ADDR16_HI" ) ,
178360 elf:: R_PPC_ADDR16_HA => Cow :: Borrowed ( "R_PPC_ADDR16_HA" ) ,
@@ -188,27 +370,16 @@ impl ObjArch for ObjArchPpc {
188370 }
189371
190372 fn guess_data_type ( & self , instruction : & ObjIns ) -> Option < super :: DataType > {
191- // Always shows the first string of the table. Not ideal, but it's really hard to find
192- // the actual string being referenced.
193- if instruction. reloc . as_ref ( ) . is_some_and ( |r| r. target . name . starts_with ( "@stringBase" ) ) {
373+ if instruction
374+ . reloc
375+ . as_ref ( )
376+ . or ( instruction. fake_pool_reloc . as_ref ( ) )
377+ . is_some_and ( |r| r. target . name . starts_with ( "@stringBase" ) )
378+ {
194379 return Some ( DataType :: String ) ;
195380 }
196381
197- match Opcode :: from ( instruction. op as u8 ) {
198- Opcode :: Lbz | Opcode :: Lbzu | Opcode :: Lbzux | Opcode :: Lbzx => Some ( DataType :: Int8 ) ,
199- Opcode :: Lhz | Opcode :: Lhzu | Opcode :: Lhzux | Opcode :: Lhzx => Some ( DataType :: Int16 ) ,
200- Opcode :: Lha | Opcode :: Lhau | Opcode :: Lhaux | Opcode :: Lhax => Some ( DataType :: Int16 ) ,
201- Opcode :: Lwz | Opcode :: Lwzu | Opcode :: Lwzux | Opcode :: Lwzx => Some ( DataType :: Int32 ) ,
202- Opcode :: Lfs | Opcode :: Lfsu | Opcode :: Lfsux | Opcode :: Lfsx => Some ( DataType :: Float ) ,
203- Opcode :: Lfd | Opcode :: Lfdu | Opcode :: Lfdux | Opcode :: Lfdx => Some ( DataType :: Double ) ,
204-
205- Opcode :: Stb | Opcode :: Stbu | Opcode :: Stbux | Opcode :: Stbx => Some ( DataType :: Int8 ) ,
206- Opcode :: Sth | Opcode :: Sthu | Opcode :: Sthux | Opcode :: Sthx => Some ( DataType :: Int16 ) ,
207- Opcode :: Stw | Opcode :: Stwu | Opcode :: Stwux | Opcode :: Stwx => Some ( DataType :: Int32 ) ,
208- Opcode :: Stfs | Opcode :: Stfsu | Opcode :: Stfsux | Opcode :: Stfsx => Some ( DataType :: Float ) ,
209- Opcode :: Stfd | Opcode :: Stfdu | Opcode :: Stfdux | Opcode :: Stfdx => Some ( DataType :: Double ) ,
210- _ => None ,
211- }
382+ guess_data_type_from_load_store_inst_op ( Opcode :: from ( instruction. op as u8 ) )
212383 }
213384
214385 fn display_data_type ( & self , ty : DataType , bytes : & [ u8 ] ) -> Option < String > {
0 commit comments