Skip to content
This repository was archived by the owner on Dec 28, 2025. It is now read-only.

Commit 9221809

Browse files
committed
Optimized _ObjCSelectorFixerV2, and improved
some messages.
1 parent 7648de1 commit 9221809

File tree

1 file changed

+47
-12
lines changed

1 file changed

+47
-12
lines changed

src/DyldExtractor/converter/objc_fixer.py

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import struct
2+
import ctypes
23
import capstone as cp
34

45
from DyldExtractor.extraction_context import ExtractionContext
@@ -31,6 +32,38 @@
3132
)
3233

3334

35+
# Change modify the disasm_lite to accept an offset
36+
# This is used to speed up the disassembly, and should
37+
# be removed when capstone is updated
38+
def disasm_lite_new(self, code, offset, count=0, codeOffset=0):
39+
if self._diet:
40+
# Diet engine cannot provide @mnemonic & @op_str
41+
raise cp.CsError(cp.CS_ERR_DIET)
42+
43+
all_insn = ctypes.POINTER(cp._cs_insn)()
44+
size = len(code) - codeOffset
45+
# Pass a bytearray by reference
46+
if isinstance(code, bytearray):
47+
code = ctypes.byref(ctypes.c_char.from_buffer(code, codeOffset))
48+
res = cp._cs.cs_disasm(self.csh, code, size, offset, count, ctypes.byref(all_insn))
49+
if res > 0:
50+
try:
51+
for i in range(res):
52+
insn = all_insn[i]
53+
yield (insn.address, insn.size, insn.mnemonic.decode('ascii'), insn.op_str.decode('ascii'))
54+
finally:
55+
cp._cs.cs_free(all_insn, res)
56+
else:
57+
status = cp._cs.cs_errno(self.csh)
58+
if status != cp.CS_ERR_OK:
59+
raise cp.CsError(status)
60+
return
61+
yield
62+
63+
64+
cp.Cs.disasm_lite = disasm_lite_new
65+
66+
3467
class _ObjCFixerError(Exception):
3568
pass
3669

@@ -367,12 +400,12 @@ def run(self) -> None:
367400
# Make sure the new address is reachable with the new adrp
368401
delta = newRefAddr - newAdrpTarget
369402
if delta < 0 or delta > 4095:
370-
self._logger.warning(f"Unable to reach selector reference at: {hex(textSectAddr + (addInstrIdx * 4))}, with new ADRP target: {hex(newAdrpTarget)}, load target: {hex(newRefAddr)}, ADRP delta: {hex(delta)}") # noqa
403+
self._logger.warning(f"Unable to reach possible selector reference at: {hex(textSectAddr + (addInstrIdx * 4))}, with new ADRP target: {hex(newAdrpTarget)}, load target: {hex(newRefAddr)}, ADRP delta: {hex(delta)}") # noqa
371404
continue
372405
pass
373406

374407
# Fix it
375-
ldrTargetOff = newRefAddr - (newRefAddr & -4096)
408+
ldrTargetOff = newRefAddr - newAdrpTarget
376409
imm12 = (ldrTargetOff << 7) & 0x3FFC00
377410
ldrRegisters = addInstr & 0x3FF
378411
newLdr = 0xF9400000 | imm12 | ldrRegisters
@@ -390,7 +423,7 @@ def _disasmText(self) -> tuple[int, str, tuple[str]]:
390423

391424
textSect = self._machoCtx.segments[b"__TEXT"].sects[b"__text"]
392425
textSectOff = self._dyldCtx.convertAddr(textSect.addr)
393-
textData = self._dyldCtx.getBytes(textSectOff, textSect.size)
426+
textData = bytearray(self._dyldCtx.getBytes(textSectOff, textSect.size))
394427

395428
opStrTrans = str.maketrans("", "", "[]!")
396429
disassembler = cp.Cs(cp.CS_ARCH_ARM64, cp.CS_MODE_LITTLE_ENDIAN)
@@ -399,9 +432,10 @@ def _disasmText(self) -> tuple[int, str, tuple[str]]:
399432
# retab or pacibsp, and when it encounters these, it just stops.
400433
# Due to this, we have to detect this and add these instructions
401434
# manually, at least until Capstone is updated.
435+
textDataOff = 0
402436
textDataAddr = textSect.addr
403437
instructions = []
404-
while True:
438+
while textDataOff < textSect.size:
405439
# Format the instructions like this (address, mnemonic, (opcodes, ...))
406440
newInstrs = [
407441
(instruction[0], instruction[2], [
@@ -410,7 +444,7 @@ def _disasmText(self) -> tuple[int, str, tuple[str]]:
410444
in instruction[3].translate(opStrTrans).split(",")
411445
])
412446
for instruction
413-
in disassembler.disasm_lite(textData, textDataAddr)
447+
in disassembler.disasm_lite(textData, textDataAddr, codeOffset=textDataOff)
414448
]
415449

416450
# Check if everything was disassembled
@@ -420,20 +454,21 @@ def _disasmText(self) -> tuple[int, str, tuple[str]]:
420454

421455
# Attempt to recover from an unknown instruction
422456
byteOffset = len(newInstrs) * 4
457+
textDataOff += byteOffset
423458
textDataAddr += byteOffset
424-
nextInstr = textData[byteOffset:byteOffset + 4]
425-
if nextInstr == b"\xff\x0b_\xd6": # retaa
459+
nextInstr = textData[textDataOff:textDataOff + 4]
460+
if nextInstr == b"\xff\x0b\x5f\xd6": # retaa
426461
newInstrs.append((textDataAddr, "retaa", []))
427462
pass
428-
elif nextInstr == b"\xff\x0f_\xd6": # retab
463+
elif nextInstr == b"\xff\x0f\x5f\xd6": # retab
429464
newInstrs.append((textDataAddr, "retab", []))
430465
pass
431466
else:
432467
newInstrs.append((textDataAddr, "UNKNOWN", [""]))
433468
pass
434469

435470
instructions += newInstrs
436-
textData = textData[len(newInstrs) * 4:]
471+
textDataOff += 4
437472
textDataAddr += 4
438473
pass
439474

@@ -470,7 +505,7 @@ def _findAddInstructions(
470505
addIdxs = set()
471506
i = startIdx
472507

473-
while i < len(self._textInstr):
508+
while i < len(self._textInstr) and i >= 0:
474509
address, mnemonic, opcodes = self._textInstr[i]
475510

476511
# check if the ADRP dest reg matches the base reg for the ADD
@@ -503,14 +538,14 @@ def _findAddInstructions(
503538
self._findAddInstructions(i + idxDelta, adrpReg, _processed=_processed)
504539
)
505540
pass
506-
if mnemonic == "cbz" or mnemonic == "cbnz":
541+
elif mnemonic == "cbz" or mnemonic == "cbnz":
507542
branchAddr = int(opcodes[1][1:], 16)
508543
idxDelta = int((branchAddr - address) / 4)
509544
addIdxs.update(
510545
self._findAddInstructions(i + idxDelta, adrpReg, _processed=_processed)
511546
)
512547
pass
513-
if mnemonic == "tbz" or mnemonic == "tbnz":
548+
elif mnemonic == "tbz" or mnemonic == "tbnz":
514549
branchAddr = int(opcodes[2][1:], 16)
515550
idxDelta = int((branchAddr - address) / 4)
516551
addIdxs.update(

0 commit comments

Comments
 (0)