11import struct
2+ import ctypes
23import capstone as cp
34
45from DyldExtractor .extraction_context import ExtractionContext
3132)
3233
3334
35+ # Change modify the disasm_lite to accept an offset
36+ # This is used to speed up the disassembly, and should
37+ # be removed when capstone is updated
38+ def disasm_lite_new (self , code , offset , count = 0 , codeOffset = 0 ):
39+ if self ._diet :
40+ # Diet engine cannot provide @mnemonic & @op_str
41+ raise cp .CsError (cp .CS_ERR_DIET )
42+
43+ all_insn = ctypes .POINTER (cp ._cs_insn )()
44+ size = len (code ) - codeOffset
45+ # Pass a bytearray by reference
46+ if isinstance (code , bytearray ):
47+ code = ctypes .byref (ctypes .c_char .from_buffer (code , codeOffset ))
48+ res = cp ._cs .cs_disasm (self .csh , code , size , offset , count , ctypes .byref (all_insn ))
49+ if res > 0 :
50+ try :
51+ for i in range (res ):
52+ insn = all_insn [i ]
53+ yield (insn .address , insn .size , insn .mnemonic .decode ('ascii' ), insn .op_str .decode ('ascii' ))
54+ finally :
55+ cp ._cs .cs_free (all_insn , res )
56+ else :
57+ status = cp ._cs .cs_errno (self .csh )
58+ if status != cp .CS_ERR_OK :
59+ raise cp .CsError (status )
60+ return
61+ yield
62+
63+
64+ cp .Cs .disasm_lite = disasm_lite_new
65+
66+
3467class _ObjCFixerError (Exception ):
3568 pass
3669
@@ -367,12 +400,12 @@ def run(self) -> None:
367400 # Make sure the new address is reachable with the new adrp
368401 delta = newRefAddr - newAdrpTarget
369402 if delta < 0 or delta > 4095 :
370- self ._logger .warning (f"Unable to reach selector reference at: { hex (textSectAddr + (addInstrIdx * 4 ))} , with new ADRP target: { hex (newAdrpTarget )} , load target: { hex (newRefAddr )} , ADRP delta: { hex (delta )} " ) # noqa
403+ self ._logger .warning (f"Unable to reach possible selector reference at: { hex (textSectAddr + (addInstrIdx * 4 ))} , with new ADRP target: { hex (newAdrpTarget )} , load target: { hex (newRefAddr )} , ADRP delta: { hex (delta )} " ) # noqa
371404 continue
372405 pass
373406
374407 # Fix it
375- ldrTargetOff = newRefAddr - ( newRefAddr & - 4096 )
408+ ldrTargetOff = newRefAddr - newAdrpTarget
376409 imm12 = (ldrTargetOff << 7 ) & 0x3FFC00
377410 ldrRegisters = addInstr & 0x3FF
378411 newLdr = 0xF9400000 | imm12 | ldrRegisters
@@ -390,7 +423,7 @@ def _disasmText(self) -> tuple[int, str, tuple[str]]:
390423
391424 textSect = self ._machoCtx .segments [b"__TEXT" ].sects [b"__text" ]
392425 textSectOff = self ._dyldCtx .convertAddr (textSect .addr )
393- textData = self ._dyldCtx .getBytes (textSectOff , textSect .size )
426+ textData = bytearray ( self ._dyldCtx .getBytes (textSectOff , textSect .size ) )
394427
395428 opStrTrans = str .maketrans ("" , "" , "[]!" )
396429 disassembler = cp .Cs (cp .CS_ARCH_ARM64 , cp .CS_MODE_LITTLE_ENDIAN )
@@ -399,9 +432,10 @@ def _disasmText(self) -> tuple[int, str, tuple[str]]:
399432 # retab or pacibsp, and when it encounters these, it just stops.
400433 # Due to this, we have to detect this and add these instructions
401434 # manually, at least until Capstone is updated.
435+ textDataOff = 0
402436 textDataAddr = textSect .addr
403437 instructions = []
404- while True :
438+ while textDataOff < textSect . size :
405439 # Format the instructions like this (address, mnemonic, (opcodes, ...))
406440 newInstrs = [
407441 (instruction [0 ], instruction [2 ], [
@@ -410,7 +444,7 @@ def _disasmText(self) -> tuple[int, str, tuple[str]]:
410444 in instruction [3 ].translate (opStrTrans ).split ("," )
411445 ])
412446 for instruction
413- in disassembler .disasm_lite (textData , textDataAddr )
447+ in disassembler .disasm_lite (textData , textDataAddr , codeOffset = textDataOff )
414448 ]
415449
416450 # Check if everything was disassembled
@@ -420,20 +454,21 @@ def _disasmText(self) -> tuple[int, str, tuple[str]]:
420454
421455 # Attempt to recover from an unknown instruction
422456 byteOffset = len (newInstrs ) * 4
457+ textDataOff += byteOffset
423458 textDataAddr += byteOffset
424- nextInstr = textData [byteOffset : byteOffset + 4 ]
425- if nextInstr == b"\xff \x0b _ \xd6 " : # retaa
459+ nextInstr = textData [textDataOff : textDataOff + 4 ]
460+ if nextInstr == b"\xff \x0b \x5f \xd6 " : # retaa
426461 newInstrs .append ((textDataAddr , "retaa" , []))
427462 pass
428- elif nextInstr == b"\xff \x0f _ \xd6 " : # retab
463+ elif nextInstr == b"\xff \x0f \x5f \xd6 " : # retab
429464 newInstrs .append ((textDataAddr , "retab" , []))
430465 pass
431466 else :
432467 newInstrs .append ((textDataAddr , "UNKNOWN" , ["" ]))
433468 pass
434469
435470 instructions += newInstrs
436- textData = textData [ len ( newInstrs ) * 4 :]
471+ textDataOff += 4
437472 textDataAddr += 4
438473 pass
439474
@@ -470,7 +505,7 @@ def _findAddInstructions(
470505 addIdxs = set ()
471506 i = startIdx
472507
473- while i < len (self ._textInstr ):
508+ while i < len (self ._textInstr ) and i >= 0 :
474509 address , mnemonic , opcodes = self ._textInstr [i ]
475510
476511 # check if the ADRP dest reg matches the base reg for the ADD
@@ -503,14 +538,14 @@ def _findAddInstructions(
503538 self ._findAddInstructions (i + idxDelta , adrpReg , _processed = _processed )
504539 )
505540 pass
506- if mnemonic == "cbz" or mnemonic == "cbnz" :
541+ elif mnemonic == "cbz" or mnemonic == "cbnz" :
507542 branchAddr = int (opcodes [1 ][1 :], 16 )
508543 idxDelta = int ((branchAddr - address ) / 4 )
509544 addIdxs .update (
510545 self ._findAddInstructions (i + idxDelta , adrpReg , _processed = _processed )
511546 )
512547 pass
513- if mnemonic == "tbz" or mnemonic == "tbnz" :
548+ elif mnemonic == "tbz" or mnemonic == "tbnz" :
514549 branchAddr = int (opcodes [2 ][1 :], 16 )
515550 idxDelta = int ((branchAddr - address ) / 4 )
516551 addIdxs .update (
0 commit comments