Skip to content
This repository was archived by the owner on Dec 28, 2025. It is now read-only.

Commit 7648de1

Browse files
committed
Second try at fixing selectors.
1 parent 1a897d4 commit 7648de1

File tree

1 file changed

+290
-1
lines changed

1 file changed

+290
-1
lines changed

src/DyldExtractor/converter/objc_fixer.py

Lines changed: 290 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,294 @@ def _getOpcodes(self, opStr: str) -> list[str]:
265265
pass
266266

267267

268+
class _ObjCSelectorFixerV2(object):
269+
def __init__(
270+
self,
271+
extractionCtx: ExtractionContext,
272+
delegate: "_ObjCFixer"
273+
) -> None:
274+
"""Un-does direct selector loading... second try.
275+
276+
Args:
277+
extractionCtx: The extraction context
278+
delegate: The delegate to add more data if needed.
279+
"""
280+
281+
super().__init__()
282+
283+
self._dyldCtx = extractionCtx.dyldCtx
284+
self._machoCtx = extractionCtx.machoCtx
285+
self._statusBar = extractionCtx.statusBar
286+
self._logger = extractionCtx.logger
287+
self._delegate = delegate
288+
289+
# All the instructions in the text section.
290+
# The instruction at index 0 corresponds to
291+
# the first instruction.
292+
self._textInstr: tuple[int, str, tuple[str]] = None
293+
pass
294+
295+
def run(self) -> None:
296+
try:
297+
textSect = self._machoCtx.segments[b"__TEXT"].sects[b"__text"]
298+
pass
299+
except KeyError:
300+
self._logger.error("Unable to get __text section")
301+
return
302+
303+
self._textInstr = self._disasmText()
304+
if not self._textInstr:
305+
return
306+
307+
self._statusBar.update(status="Fixing Selectors")
308+
309+
# enumerate the text
310+
textSectAddr = textSect.addr
311+
textSectOff = self._dyldCtx.convertAddr(textSectAddr)
312+
313+
for i, instrData in enumerate(self._textInstr):
314+
if instrData[1] != "adrp":
315+
continue
316+
317+
adrpReg = instrData[2][0]
318+
addInstrIdxs = self._findAddInstructions(i + 1, adrpReg)
319+
if not addInstrIdxs:
320+
continue
321+
addInstrIdxs = sorted(addInstrIdxs)
322+
323+
adrpAddr = textSectAddr + (i * 4)
324+
adrpOff = textSectOff + (i * 4)
325+
adrpInstr = self._dyldCtx.readFormat(adrpOff, "<I")[0]
326+
327+
# Find the ADRP result
328+
immlo = (adrpInstr & 0x60000000) >> 29
329+
immhi = (adrpInstr & 0xFFFFE0) >> 3
330+
imm = (immhi | immlo) << 12
331+
imm = stub_fixer.Arm64Utilities.signExtend(imm, 33)
332+
adrpResult = (adrpAddr & ~0xFFF) + imm
333+
334+
newAdrpTarget = None
335+
336+
for addInstrIdx in addInstrIdxs:
337+
addOff = textSectOff + (addInstrIdx * 4)
338+
addInstr = self._dyldCtx.readFormat(addOff, "<I")[0]
339+
340+
# Test for a special ADD cases
341+
if addInstr & 0xffc00000 != 0x91000000:
342+
continue
343+
344+
# Find the ADD result
345+
imm = (addInstr & 0x3FFC00) >> 10
346+
loadTarget = adrpResult + imm
347+
348+
# check if it needs fixing
349+
if self._machoCtx.containsAddr(loadTarget):
350+
continue
351+
352+
if loadTarget not in self._delegate._selRefCache:
353+
continue
354+
newRefAddr = self._delegate._selRefCache[loadTarget]
355+
356+
if newAdrpTarget is None:
357+
# Fix the ADRP on the first ADD
358+
newAdrpTarget = (newRefAddr & -4096)
359+
360+
adrpDelta = newAdrpTarget - (adrpAddr & -4096)
361+
immhi = (adrpDelta >> 9) & (0x00FFFFE0)
362+
immlo = (adrpDelta << 17) & (0x60000000)
363+
newAdrp = (0x90000000) | immlo | immhi | adrpInstr & 0x1F
364+
self._machoCtx.writeBytes(adrpOff, struct.pack("<I", newAdrp))
365+
pass
366+
else:
367+
# Make sure the new address is reachable with the new adrp
368+
delta = newRefAddr - newAdrpTarget
369+
if delta < 0 or delta > 4095:
370+
self._logger.warning(f"Unable to reach selector reference at: {hex(textSectAddr + (addInstrIdx * 4))}, with new ADRP target: {hex(newAdrpTarget)}, load target: {hex(newRefAddr)}, ADRP delta: {hex(delta)}") # noqa
371+
continue
372+
pass
373+
374+
# Fix it
375+
ldrTargetOff = newRefAddr - (newRefAddr & -4096)
376+
imm12 = (ldrTargetOff << 7) & 0x3FFC00
377+
ldrRegisters = addInstr & 0x3FF
378+
newLdr = 0xF9400000 | imm12 | ldrRegisters
379+
self._machoCtx.writeBytes(addOff, struct.pack("<I", newLdr))
380+
381+
self._statusBar.update(status="Fixing Selectors")
382+
pass
383+
pass
384+
pass
385+
386+
def _disasmText(self) -> tuple[int, str, tuple[str]]:
387+
"""Disassemble and save the __text section."""
388+
389+
self._statusBar.update(status="Disassembling Text (will appear frozen)")
390+
391+
textSect = self._machoCtx.segments[b"__TEXT"].sects[b"__text"]
392+
textSectOff = self._dyldCtx.convertAddr(textSect.addr)
393+
textData = self._dyldCtx.getBytes(textSectOff, textSect.size)
394+
395+
opStrTrans = str.maketrans("", "", "[]!")
396+
disassembler = cp.Cs(cp.CS_ARCH_ARM64, cp.CS_MODE_LITTLE_ENDIAN)
397+
398+
# Capstone 4.0.2 doesn't support some newer PAC instructions like
399+
# retab or pacibsp, and when it encounters these, it just stops.
400+
# Due to this, we have to detect this and add these instructions
401+
# manually, at least until Capstone is updated.
402+
textDataAddr = textSect.addr
403+
instructions = []
404+
while True:
405+
# Format the instructions like this (address, mnemonic, (opcodes, ...))
406+
newInstrs = [
407+
(instruction[0], instruction[2], [
408+
opcode.strip()
409+
for opcode
410+
in instruction[3].translate(opStrTrans).split(",")
411+
])
412+
for instruction
413+
in disassembler.disasm_lite(textData, textDataAddr)
414+
]
415+
416+
# Check if everything was disassembled
417+
if len(instructions) + len(newInstrs) == (textSect.size / 4):
418+
instructions += newInstrs
419+
break
420+
421+
# Attempt to recover from an unknown instruction
422+
byteOffset = len(newInstrs) * 4
423+
textDataAddr += byteOffset
424+
nextInstr = textData[byteOffset:byteOffset + 4]
425+
if nextInstr == b"\xff\x0b_\xd6": # retaa
426+
newInstrs.append((textDataAddr, "retaa", []))
427+
pass
428+
elif nextInstr == b"\xff\x0f_\xd6": # retab
429+
newInstrs.append((textDataAddr, "retab", []))
430+
pass
431+
else:
432+
newInstrs.append((textDataAddr, "UNKNOWN", [""]))
433+
pass
434+
435+
instructions += newInstrs
436+
textData = textData[len(newInstrs) * 4:]
437+
textDataAddr += 4
438+
pass
439+
440+
return instructions
441+
442+
def _findAddInstructions(
443+
self,
444+
startIdx: int,
445+
adrpReg: str,
446+
_processed: set[int] = None
447+
) -> set[int]:
448+
"""Find ADD instructions given an ADRP register.
449+
450+
This will recursively follow branches and stop
451+
when the ADRP range ends.
452+
453+
Args:
454+
startIdx: The instruction index to start at.
455+
Returns:
456+
A list of indices to the ADD instructions.
457+
"""
458+
459+
# Keep track of start indexes that are already processed
460+
if _processed is None:
461+
_processed = {startIdx}
462+
pass
463+
else:
464+
if startIdx in _processed:
465+
return set()
466+
else:
467+
_processed.add(startIdx)
468+
pass
469+
470+
addIdxs = set()
471+
i = startIdx
472+
473+
while i < len(self._textInstr):
474+
address, mnemonic, opcodes = self._textInstr[i]
475+
476+
# check if the ADRP dest reg matches the base reg for the ADD
477+
if mnemonic == "add" and opcodes[1] == adrpReg:
478+
addIdxs.add(i)
479+
pass
480+
481+
# If there is an unconditional branch, and it points
482+
# within the text section, follow it. If it does not
483+
# point within the text section, end the ADRP range.
484+
if mnemonic == "b":
485+
branchAddr = int(opcodes[0][1:], 16)
486+
idxDelta = int((branchAddr - address) / 4)
487+
i += idxDelta
488+
489+
if i in _processed:
490+
break
491+
else:
492+
_processed.add(i)
493+
494+
if i < 0 or i >= len(self._textInstr):
495+
break
496+
continue
497+
498+
# If there is a conditional branch, follow it and continue
499+
if mnemonic[0:2] == "b.":
500+
branchAddr = int(opcodes[0][1:], 16)
501+
idxDelta = int((branchAddr - address) / 4)
502+
addIdxs.update(
503+
self._findAddInstructions(i + idxDelta, adrpReg, _processed=_processed)
504+
)
505+
pass
506+
if mnemonic == "cbz" or mnemonic == "cbnz":
507+
branchAddr = int(opcodes[1][1:], 16)
508+
idxDelta = int((branchAddr - address) / 4)
509+
addIdxs.update(
510+
self._findAddInstructions(i + idxDelta, adrpReg, _processed=_processed)
511+
)
512+
pass
513+
if mnemonic == "tbz" or mnemonic == "tbnz":
514+
branchAddr = int(opcodes[2][1:], 16)
515+
idxDelta = int((branchAddr - address) / 4)
516+
addIdxs.update(
517+
self._findAddInstructions(i + idxDelta, adrpReg, _processed=_processed)
518+
)
519+
pass
520+
521+
# End the ADRP range if the function returns
522+
if mnemonic in (
523+
"ret",
524+
"retaa",
525+
"retab"
526+
):
527+
break
528+
529+
# If we find an instruction modifying the register,
530+
# the adrp range probably ended.
531+
if adrpReg == opcodes[0]:
532+
break
533+
534+
# These instructions modify 2 registers.
535+
if mnemonic in (
536+
"ldaxp",
537+
"ldnp",
538+
"ldpsw",
539+
"ldxp",
540+
"stlxp",
541+
"stnp",
542+
"stp",
543+
"stxp",
544+
"ldp"
545+
):
546+
if adrpReg == opcodes[1]:
547+
break
548+
pass
549+
550+
i += 1
551+
pass
552+
553+
return addIdxs
554+
555+
268556
class _ObjCFixer(object):
269557

270558
def __init__(self, extractionCtx: ExtractionContext) -> None:
@@ -330,7 +618,8 @@ def run(self):
330618
self._finalizeFutureClasses()
331619

332620
# self._fixSelectors_OLD()
333-
_ObjCSelectorFixer(self._extractionCtx, self).run()
621+
# _ObjCSelectorFixer(self._extractionCtx, self).run()
622+
_ObjCSelectorFixerV2(self._extractionCtx, self).run()
334623

335624
self._checkSpaceConstraints()
336625
self._addExtraDataSeg()

0 commit comments

Comments
 (0)