@@ -265,6 +265,294 @@ def _getOpcodes(self, opStr: str) -> list[str]:
265265 pass
266266
267267
268+ class _ObjCSelectorFixerV2 (object ):
269+ def __init__ (
270+ self ,
271+ extractionCtx : ExtractionContext ,
272+ delegate : "_ObjCFixer"
273+ ) -> None :
274+ """Un-does direct selector loading... second try.
275+
276+ Args:
277+ extractionCtx: The extraction context
278+ delegate: The delegate to add more data if needed.
279+ """
280+
281+ super ().__init__ ()
282+
283+ self ._dyldCtx = extractionCtx .dyldCtx
284+ self ._machoCtx = extractionCtx .machoCtx
285+ self ._statusBar = extractionCtx .statusBar
286+ self ._logger = extractionCtx .logger
287+ self ._delegate = delegate
288+
289+ # All the instructions in the text section.
290+ # The instruction at index 0 corresponds to
291+ # the first instruction.
292+ self ._textInstr : tuple [int , str , tuple [str ]] = None
293+ pass
294+
295+ def run (self ) -> None :
296+ try :
297+ textSect = self ._machoCtx .segments [b"__TEXT" ].sects [b"__text" ]
298+ pass
299+ except KeyError :
300+ self ._logger .error ("Unable to get __text section" )
301+ return
302+
303+ self ._textInstr = self ._disasmText ()
304+ if not self ._textInstr :
305+ return
306+
307+ self ._statusBar .update (status = "Fixing Selectors" )
308+
309+ # enumerate the text
310+ textSectAddr = textSect .addr
311+ textSectOff = self ._dyldCtx .convertAddr (textSectAddr )
312+
313+ for i , instrData in enumerate (self ._textInstr ):
314+ if instrData [1 ] != "adrp" :
315+ continue
316+
317+ adrpReg = instrData [2 ][0 ]
318+ addInstrIdxs = self ._findAddInstructions (i + 1 , adrpReg )
319+ if not addInstrIdxs :
320+ continue
321+ addInstrIdxs = sorted (addInstrIdxs )
322+
323+ adrpAddr = textSectAddr + (i * 4 )
324+ adrpOff = textSectOff + (i * 4 )
325+ adrpInstr = self ._dyldCtx .readFormat (adrpOff , "<I" )[0 ]
326+
327+ # Find the ADRP result
328+ immlo = (adrpInstr & 0x60000000 ) >> 29
329+ immhi = (adrpInstr & 0xFFFFE0 ) >> 3
330+ imm = (immhi | immlo ) << 12
331+ imm = stub_fixer .Arm64Utilities .signExtend (imm , 33 )
332+ adrpResult = (adrpAddr & ~ 0xFFF ) + imm
333+
334+ newAdrpTarget = None
335+
336+ for addInstrIdx in addInstrIdxs :
337+ addOff = textSectOff + (addInstrIdx * 4 )
338+ addInstr = self ._dyldCtx .readFormat (addOff , "<I" )[0 ]
339+
340+ # Test for a special ADD cases
341+ if addInstr & 0xffc00000 != 0x91000000 :
342+ continue
343+
344+ # Find the ADD result
345+ imm = (addInstr & 0x3FFC00 ) >> 10
346+ loadTarget = adrpResult + imm
347+
348+ # check if it needs fixing
349+ if self ._machoCtx .containsAddr (loadTarget ):
350+ continue
351+
352+ if loadTarget not in self ._delegate ._selRefCache :
353+ continue
354+ newRefAddr = self ._delegate ._selRefCache [loadTarget ]
355+
356+ if newAdrpTarget is None :
357+ # Fix the ADRP on the first ADD
358+ newAdrpTarget = (newRefAddr & - 4096 )
359+
360+ adrpDelta = newAdrpTarget - (adrpAddr & - 4096 )
361+ immhi = (adrpDelta >> 9 ) & (0x00FFFFE0 )
362+ immlo = (adrpDelta << 17 ) & (0x60000000 )
363+ newAdrp = (0x90000000 ) | immlo | immhi | adrpInstr & 0x1F
364+ self ._machoCtx .writeBytes (adrpOff , struct .pack ("<I" , newAdrp ))
365+ pass
366+ else :
367+ # Make sure the new address is reachable with the new adrp
368+ delta = newRefAddr - newAdrpTarget
369+ if delta < 0 or delta > 4095 :
370+ self ._logger .warning (f"Unable to reach selector reference at: { hex (textSectAddr + (addInstrIdx * 4 ))} , with new ADRP target: { hex (newAdrpTarget )} , load target: { hex (newRefAddr )} , ADRP delta: { hex (delta )} " ) # noqa
371+ continue
372+ pass
373+
374+ # Fix it
375+ ldrTargetOff = newRefAddr - (newRefAddr & - 4096 )
376+ imm12 = (ldrTargetOff << 7 ) & 0x3FFC00
377+ ldrRegisters = addInstr & 0x3FF
378+ newLdr = 0xF9400000 | imm12 | ldrRegisters
379+ self ._machoCtx .writeBytes (addOff , struct .pack ("<I" , newLdr ))
380+
381+ self ._statusBar .update (status = "Fixing Selectors" )
382+ pass
383+ pass
384+ pass
385+
386+ def _disasmText (self ) -> tuple [int , str , tuple [str ]]:
387+ """Disassemble and save the __text section."""
388+
389+ self ._statusBar .update (status = "Disassembling Text (will appear frozen)" )
390+
391+ textSect = self ._machoCtx .segments [b"__TEXT" ].sects [b"__text" ]
392+ textSectOff = self ._dyldCtx .convertAddr (textSect .addr )
393+ textData = self ._dyldCtx .getBytes (textSectOff , textSect .size )
394+
395+ opStrTrans = str .maketrans ("" , "" , "[]!" )
396+ disassembler = cp .Cs (cp .CS_ARCH_ARM64 , cp .CS_MODE_LITTLE_ENDIAN )
397+
398+ # Capstone 4.0.2 doesn't support some newer PAC instructions like
399+ # retab or pacibsp, and when it encounters these, it just stops.
400+ # Due to this, we have to detect this and add these instructions
401+ # manually, at least until Capstone is updated.
402+ textDataAddr = textSect .addr
403+ instructions = []
404+ while True :
405+ # Format the instructions like this (address, mnemonic, (opcodes, ...))
406+ newInstrs = [
407+ (instruction [0 ], instruction [2 ], [
408+ opcode .strip ()
409+ for opcode
410+ in instruction [3 ].translate (opStrTrans ).split ("," )
411+ ])
412+ for instruction
413+ in disassembler .disasm_lite (textData , textDataAddr )
414+ ]
415+
416+ # Check if everything was disassembled
417+ if len (instructions ) + len (newInstrs ) == (textSect .size / 4 ):
418+ instructions += newInstrs
419+ break
420+
421+ # Attempt to recover from an unknown instruction
422+ byteOffset = len (newInstrs ) * 4
423+ textDataAddr += byteOffset
424+ nextInstr = textData [byteOffset :byteOffset + 4 ]
425+ if nextInstr == b"\xff \x0b _\xd6 " : # retaa
426+ newInstrs .append ((textDataAddr , "retaa" , []))
427+ pass
428+ elif nextInstr == b"\xff \x0f _\xd6 " : # retab
429+ newInstrs .append ((textDataAddr , "retab" , []))
430+ pass
431+ else :
432+ newInstrs .append ((textDataAddr , "UNKNOWN" , ["" ]))
433+ pass
434+
435+ instructions += newInstrs
436+ textData = textData [len (newInstrs ) * 4 :]
437+ textDataAddr += 4
438+ pass
439+
440+ return instructions
441+
442+ def _findAddInstructions (
443+ self ,
444+ startIdx : int ,
445+ adrpReg : str ,
446+ _processed : set [int ] = None
447+ ) -> set [int ]:
448+ """Find ADD instructions given an ADRP register.
449+
450+ This will recursively follow branches and stop
451+ when the ADRP range ends.
452+
453+ Args:
454+ startIdx: The instruction index to start at.
455+ Returns:
456+ A list of indices to the ADD instructions.
457+ """
458+
459+ # Keep track of start indexes that are already processed
460+ if _processed is None :
461+ _processed = {startIdx }
462+ pass
463+ else :
464+ if startIdx in _processed :
465+ return set ()
466+ else :
467+ _processed .add (startIdx )
468+ pass
469+
470+ addIdxs = set ()
471+ i = startIdx
472+
473+ while i < len (self ._textInstr ):
474+ address , mnemonic , opcodes = self ._textInstr [i ]
475+
476+ # check if the ADRP dest reg matches the base reg for the ADD
477+ if mnemonic == "add" and opcodes [1 ] == adrpReg :
478+ addIdxs .add (i )
479+ pass
480+
481+ # If there is an unconditional branch, and it points
482+ # within the text section, follow it. If it does not
483+ # point within the text section, end the ADRP range.
484+ if mnemonic == "b" :
485+ branchAddr = int (opcodes [0 ][1 :], 16 )
486+ idxDelta = int ((branchAddr - address ) / 4 )
487+ i += idxDelta
488+
489+ if i in _processed :
490+ break
491+ else :
492+ _processed .add (i )
493+
494+ if i < 0 or i >= len (self ._textInstr ):
495+ break
496+ continue
497+
498+ # If there is a conditional branch, follow it and continue
499+ if mnemonic [0 :2 ] == "b." :
500+ branchAddr = int (opcodes [0 ][1 :], 16 )
501+ idxDelta = int ((branchAddr - address ) / 4 )
502+ addIdxs .update (
503+ self ._findAddInstructions (i + idxDelta , adrpReg , _processed = _processed )
504+ )
505+ pass
506+ if mnemonic == "cbz" or mnemonic == "cbnz" :
507+ branchAddr = int (opcodes [1 ][1 :], 16 )
508+ idxDelta = int ((branchAddr - address ) / 4 )
509+ addIdxs .update (
510+ self ._findAddInstructions (i + idxDelta , adrpReg , _processed = _processed )
511+ )
512+ pass
513+ if mnemonic == "tbz" or mnemonic == "tbnz" :
514+ branchAddr = int (opcodes [2 ][1 :], 16 )
515+ idxDelta = int ((branchAddr - address ) / 4 )
516+ addIdxs .update (
517+ self ._findAddInstructions (i + idxDelta , adrpReg , _processed = _processed )
518+ )
519+ pass
520+
521+ # End the ADRP range if the function returns
522+ if mnemonic in (
523+ "ret" ,
524+ "retaa" ,
525+ "retab"
526+ ):
527+ break
528+
529+ # If we find an instruction modifying the register,
530+ # the adrp range probably ended.
531+ if adrpReg == opcodes [0 ]:
532+ break
533+
534+ # These instructions modify 2 registers.
535+ if mnemonic in (
536+ "ldaxp" ,
537+ "ldnp" ,
538+ "ldpsw" ,
539+ "ldxp" ,
540+ "stlxp" ,
541+ "stnp" ,
542+ "stp" ,
543+ "stxp" ,
544+ "ldp"
545+ ):
546+ if adrpReg == opcodes [1 ]:
547+ break
548+ pass
549+
550+ i += 1
551+ pass
552+
553+ return addIdxs
554+
555+
268556class _ObjCFixer (object ):
269557
270558 def __init__ (self , extractionCtx : ExtractionContext ) -> None :
@@ -330,7 +618,8 @@ def run(self):
330618 self ._finalizeFutureClasses ()
331619
332620 # self._fixSelectors_OLD()
333- _ObjCSelectorFixer (self ._extractionCtx , self ).run ()
621+ # _ObjCSelectorFixer(self._extractionCtx, self).run()
622+ _ObjCSelectorFixerV2 (self ._extractionCtx , self ).run ()
334623
335624 self ._checkSpaceConstraints ()
336625 self ._addExtraDataSeg ()
0 commit comments