33# Cross Platform and Multi Architecture Advanced Binary Emulation Framework
44#
55
6- import bisect
76import ctypes
87import json
8+ import re
99import libr
10- from dataclasses import dataclass , fields
10+ from dataclasses import dataclass , field , fields
1111from functools import cached_property , wraps
12- from typing import TYPE_CHECKING , Dict , List , Literal , Tuple , Union
12+ from typing import TYPE_CHECKING , Dict , List , Literal , Optional , Pattern , Tuple , Union
1313from qiling .const import QL_ARCH
1414from qiling .extensions import trace
1515from unicorn import UC_PROT_NONE , UC_PROT_READ , UC_PROT_WRITE , UC_PROT_EXEC , UC_PROT_ALL
@@ -80,6 +80,20 @@ class Symbol(R2Data):
8080 is_imported : bool
8181
8282
83+ @dataclass (unsafe_hash = True , init = False )
84+ class Instruction (R2Data ):
85+ offset : int
86+ size : int
87+ opcode : str # raw opcode
88+ disasm : str = '' # flag resolved opcode
89+ bytes : bytes
90+ type : str
91+
92+ def __init__ (self , ** kwargs ):
93+ super ().__init__ (** kwargs )
94+ self .bytes = bytes .fromhex (kwargs ["bytes" ])
95+
96+
8397@dataclass (unsafe_hash = True , init = False )
8498class Function (R2Data ):
8599 name : str
@@ -90,7 +104,7 @@ class Function(R2Data):
90104
91105@dataclass (unsafe_hash = True , init = False )
92106class Flag (R2Data ):
93- offset : int
107+ offset : int # should be addr but r2 calls it offset
94108 name : str = ''
95109 size : int = 0
96110
@@ -166,7 +180,9 @@ def _cmd(self, cmd: str) -> str:
166180 self ._r2c , ctypes .create_string_buffer (cmd .encode ("utf-8" )))
167181 return ctypes .string_at (r ).decode ('utf-8' )
168182
169- @staticmethod
183+ def _cmdj (self , cmd : str ) -> Union [Dict , List [Dict ]]:
184+ return json .loads (self ._cmd (cmd ))
185+
170186 def aaa (fun ):
171187 @wraps (fun )
172188 def wrapper (self ):
@@ -176,9 +192,6 @@ def wrapper(self):
176192 return fun (self )
177193 return wrapper
178194
179- def _cmdj (self , cmd : str ) -> Union [Dict , List [Dict ]]:
180- return json .loads (self ._cmd (cmd ))
181-
182195 @cached_property
183196 def binfo (self ) -> Dict [str , str ]:
184197 return self ._cmdj ("iIj" )
@@ -222,13 +235,24 @@ def flags(self) -> List[Flag]:
222235 def xrefs (self ) -> List [Xref ]:
223236 return [Xref (** dic ) for dic in self ._cmdj ("axj" )]
224237
225- def at (self , addr : int ) -> Tuple [Flag , int ]:
226- # the most suitable flag should have address <= addr
227- # bisect_right find the insertion point, right side if value exists
228- idx = bisect .bisect_right (self .flags , Flag (offset = addr ))
229- # minus 1 to find the corresponding flag
230- flag = self .flags [idx - 1 ]
231- return flag , addr - flag .offset
238+ def at (self , addr : int , parse = False ) -> Union [str , Tuple [str , int ]]:
239+ '''Given an address, return [name, offset] or "name + offset"'''
240+ name = self ._cmd (f'fd { addr } ' ).strip ()
241+ if parse :
242+ try :
243+ name , offset = name .split (' + ' )
244+ offset = int (offset )
245+ except ValueError : # split fail when offset=0
246+ offset = 0
247+ return name , offset
248+ return name
249+
250+ def where (self , name : str , offset : int = 0 ) -> int :
251+ '''Given a name (+ offset), return its address (0 when not found)'''
252+ if offset != 0 : # name can already have offset, multiple + is allowd
253+ name += f' + { offset } '
254+ addr = self ._cmd (f'?v { name } ' ).strip () # 0x0 when name is not found
255+ return int (addr , 16 )
232256
233257 def refrom (self , addr : int ) -> List [Xref ]:
234258 return [x for x in self .xrefs if x .fromaddr == addr ]
@@ -240,6 +264,35 @@ def read(self, addr: int, size: int) -> bytes:
240264 hexstr = self ._cmd (f"p8 { size } @ { addr } " )
241265 return bytes .fromhex (hexstr )
242266
267+ def dis_nbytes (self , addr : int , size : int ) -> List [Instruction ]:
268+ insts = [Instruction (** dic ) for dic in self ._cmdj (f"pDj { size } @ { addr } " )]
269+ return insts
270+
271+ def disassembler (self , ql : 'Qiling' , addr : int , size : int , filt : Pattern [str ]= None ) -> int :
272+ '''A human-friendly monkey patch of QlArchUtils.disassembler powered by r2, can be used for hook_code
273+ :param ql: Qiling instance
274+ :param addr: start address for disassembly
275+ :param size: size in bytes
276+ :param filt: regex pattern to filter instructions
277+ :return: progress of dissembler, should be equal to size if success
278+ '''
279+ anibbles = ql .arch .bits // 4
280+ progress = 0
281+ for inst in self .dis_nbytes (addr , size ):
282+ if inst .type .lower () == 'invalid' :
283+ break # stop disasm
284+ name , offset = self .at (inst .offset , parse = True )
285+ if filt is None or filt .search (name ):
286+ ql .log .info (f'{ inst .offset :0{anibbles }x} [{ name :20s} + { offset :#08x} ] { inst .bytes .hex (" " ):20s} { inst .disasm } ' )
287+ progress = inst .offset + inst .size - addr
288+ if progress < size :
289+ ql .arch .utils .disassembler (ql , addr + progress , size - progress )
290+ return progress
291+
292+ def enable_disasm (self , filt_str : str = '' ):
293+ filt = re .compile (filt_str )
294+ self .ql .hook_code (self .disassembler , filt )
295+
243296 def enable_trace (self , mode = 'full' ):
244297 # simple map from addr to flag name, cannot resolve addresses in the middle
245298 self .ql .loader .symsmap = {flag .offset : flag .name for flag in self .flags }
0 commit comments