11import struct
22import ctypes
3- from typing import List , Set , Dict , Tuple
43import capstone as cp
4+ from typing import (
5+ List ,
6+ Set ,
7+ Dict ,
8+ Tuple ,
9+ Generator
10+ )
511
612from DyldExtractor .extraction_context import ExtractionContext
713from DyldExtractor .converter import (
@@ -398,29 +404,19 @@ def run(self):
398404
399405 self ._createExtraSegment ()
400406
401- # Get __OBJC_RO from the libobjc.A.dylib image
407+ # Get the libobjc.A.dylib image
402408 for image in self ._dyldCtx .images :
403409 path = self ._dyldCtx .fileCtx .readString (image .pathFileOffset )
404410 if b"libobjc.A.dylib" in path :
405411 offset , ctx = self ._dyldCtx .convertAddr (image .address )
406- libobjcImage = MachOContext (ctx .fileCtx , offset )
407- if b"__OBJC_RO" in libobjcImage .segments :
408- self ._objcRoSeg = libobjcImage .segments [b"__OBJC_RO" ].seg
409- self ._objcRwSeg = libobjcImage .segments [b"__OBJC_RW" ].seg
410- else :
411- self ._logger .error ("libobjc does not contain __OBJC_RO" )
412- return
412+ self ._libobjcImage = MachOContext (ctx .fileCtx , offset )
413413 break
414414 pass
415415 else :
416416 self ._logger .error ("Unable to find libobjc.A.dylib" )
417417 return
418418
419- self ._objcRoRelativeNames = self ._getMethodNameStorage ()
420- if not self ._objcRoRelativeNames :
421- print ("Not using objc_ro" )
422- pass
423- # return
419+ self ._checkMethodNameStorage ()
424420
425421 # caches that map the original definition address
426422 # to its new processed address.
@@ -434,6 +430,7 @@ def run(self):
434430 self ._methodListCache : Dict [int , int ] = {}
435431 self ._stringCache : Dict [int , int ] = {}
436432 self ._intCache : Dict [int , int ] = {}
433+ self ._methodNameCache : Dict [int , int ] = {}
437434
438435 # connects a selrefs old target to its pointer address
439436 self ._selRefCache : Dict [int , int ] = {}
@@ -456,61 +453,110 @@ def run(self):
456453 self ._addExtraDataSeg ()
457454 pass
458455
459- def _getMethodNameStorage (self ) -> bool :
456+ def _checkMethodNameStorage (self ) -> None :
460457 """Check where method names are stored.
461458
462459 Starting around iOS 15, relative method names
463460 pointers are relative to the start of the __OBJC_RO of
464461 libobjc, instead of being relative to itself.
465462 This tries to detect which is being used.
466-
467- Returns:
468- A bool that determines if the method names
469- are relative to the __OBJC_RO.
470463 """
471464
472- # TODO: Maybe there is a better way to detect this
465+ self ._usesObjcRoRelativeNames = False
466+ self ._optMethodNamesAddr = None
473467
474- # Get a method list
475- methodListAddr = None
476- for seg in self ._machoCtx .segmentsI :
468+ # Get __objc_scoffs, __objc_classlist
469+ objcScoffs = None
470+ objcClasslist = None
471+ for seg in self ._libobjcImage .segmentsI :
477472 for sect in seg .sectsI :
478- if sect .segname == b"__objc_methlist" :
479- methodListAddr = sect .addr
480- break
473+ if sect .sectname == b"__objc_scoffs" :
474+ objcScoffs = sect
475+ pass
476+ elif sect .sectname == b"__objc_classlist" :
477+ objcClasslist = sect
478+ pass
481479 pass
482- if methodListAddr :
480+
481+ if objcScoffs and objcClasslist :
483482 break
484483 pass
485484
486- if methodListAddr is None :
487- self ._logger .warning ("Unable to determine the type of method name addressing" ) # noqa
488- return False
485+ if objcScoffs is None :
486+ # Older caches do not have this
487+ return
488+ elif objcClasslist is None :
489+ self ._logger .error ("libobjc does not have __objc_classlist" )
490+ return
489491
490- methodListDef = self ._slider .slideStruct (methodListAddr , objc_method_list_t )
491- if methodListDef .entsize == objc_method_large_t .SIZE :
492- # TODO: probably want to test at least 2 method lists
493- return False
492+ # Parse __objc_scoffs
493+ if objcScoffs .size == 0x20 :
494+ # Just 4 pointers, starting with methods start and methods end
495+ self ._optMethodNamesAddr = self ._slider .slideAddress (objcScoffs .addr )
496+ _optMethodNamesEnd = self ._slider .slideAddress (objcScoffs .addr + 8 )
497+ pass
498+ elif objcScoffs .size == 0x28 :
499+ # First the version number and then pointers
500+ verOff , ctx = self ._dyldCtx .convertAddr (objcScoffs .addr )
501+ version = ctx .fileCtx .readFormat ("<Q" , verOff )[0 ]
502+ if version != 2 :
503+ self ._logger .warning (f"Unknown objc opt version: { version } " )
504+ pass
494505
495- for i in range (methodListDef .count ):
496- methodAddr = (
497- methodListAddr
498- + objc_method_list_t .SIZE
499- + (i * methodListDef .entsize )
500- )
501- methodDef = self ._slider .slideStruct (methodAddr , objc_method_small_t )
506+ self ._optMethodNamesAddr = self ._slider .slideAddress (objcScoffs .addr + 8 )
507+ _optMethodNamesEnd = self ._slider .slideAddress (objcScoffs .addr + 16 )
508+ pass
509+ else :
510+ self ._logger .error ("Unable to parse objc scoffs" )
511+ return
502512
503- # test if the offset is negative or greater than __OBJC_RO's size
504- if methodDef .name <= 0 or methodDef .name > self ._objcRoSeg .vmsize :
505- return False
513+ optMethodNamesSize = _optMethodNamesEnd - self ._optMethodNamesAddr
514+
515+ # Create a generator to get method lists
516+ def getMethodLists () -> Generator [int , None , None ]:
517+ for ptrAddr in range (
518+ objcClasslist .addr ,
519+ objcClasslist .addr + objcClasslist .size ,
520+ 8
521+ ):
522+ classAddr = self ._slider .slideAddress (ptrAddr )
523+ classDef = self ._slider .slideStruct (classAddr , objc_class_t )
524+ classDataDef = self ._slider .slideStruct (
525+ classDef .data & ~ 0x3 ,
526+ objc_class_data_t
527+ )
528+
529+ if classDataDef .baseMethods :
530+ yield classDataDef .baseMethods
531+ pass
532+ pass
506533
507- # if the offset results in a string with non ascii characters
508- nameOff , ctx = self ._dyldCtx .convertAddr (methodAddr + methodDef .name )
509- name = ctx .fileCtx .readString (nameOff )
510- if not all (c < 128 for c in name ):
511- return True
534+ for methodListAddr in getMethodLists ():
535+ methodListDef = self ._slider .slideStruct (methodListAddr , objc_method_list_t )
512536
513- return False
537+ # Try to find an objc_method_small_t
538+ entsize = methodListDef .getEntsize ()
539+ if (
540+ entsize != objc_method_small_t .SIZE
541+ or not methodListDef .usesRelativeMethods ()
542+ ):
543+ methodListAddr += (
544+ objc_method_list_t .SIZE
545+ + (methodListDef .count * entsize )
546+ )
547+ continue
548+
549+ # Test the first method
550+ methodAddr = methodListAddr + objc_method_list_t .SIZE
551+ nameOff , ctx = self ._dyldCtx .convertAddr (methodAddr )
552+ name = ctx .fileCtx .readFormat ("<i" , nameOff )[0 ]
553+
554+ # TODO: Hopefully there is a better way to detect this.
555+ if name >= 0 and name < optMethodNamesSize :
556+ self ._usesObjcRoRelativeNames = True
557+ pass
558+ return
559+ return
514560
515561 def _createExtraSegment (self ) -> None :
516562 """Create an extra segment to store data in.
@@ -1108,14 +1154,6 @@ def _processMethodList(self, methodListAddr: int, noImp=False) -> int:
11081154 self ._logger .error (f"Large method list at { hex (methodListAddr )} , has an entsize that doesn't match the size of objc_method_large_t" ) # noqa
11091155 return 0
11101156
1111- if (
1112- methodListAddr >= self ._objcRoSeg .vmaddr
1113- and methodListAddr < self ._objcRoSeg .vmaddr + self ._objcRoSeg .vmsize
1114- ):
1115- pass
1116- else :
1117- self ._logger .debug ("method list outside" )
1118-
11191157 # fix relative pointers after we reserve a new address for the method list
11201158 # contains a list of tuples of field offsets and their target addresses
11211159 relativeFixups : list [tuple [int , int ]] = []
@@ -1131,17 +1169,20 @@ def _processMethodList(self, methodListAddr: int, noImp=False) -> int:
11311169 methodOff = objc_method_list_t .SIZE + (i * entsize )
11321170
11331171 if methodDef .name :
1134- nameAddr = methodAddr + methodDef .name
1135- if (newNameAddr := self ._processString (nameAddr )) is not None :
1136- methodDef .name = newNameAddr - methodAddr
1137-
1138- relativeFixups .append ((methodOff , newNameAddr ))
1172+ if self ._usesObjcRoRelativeNames :
1173+ baseAddr = self ._optMethodNamesAddr
1174+ pass
11391175 else :
1140- methodDef .name = 0
1141- # self._logger.warning(f"Unable to get string at {hex(nameAddr)}, for method def at {hex(methodAddr)}") # noqa
1176+ baseAddr = methodAddr
1177+ pass
1178+
1179+ nameAddr = baseAddr + methodDef .name
1180+ newNamePtr = self ._processMethodName (nameAddr )
1181+
1182+ # make the name ptr relative to itself
1183+ methodDef .name = newNamePtr - methodAddr
1184+ relativeFixups .append ((methodOff , newNamePtr ))
11421185 pass
1143- else :
1144- self ._logger .debug ("Null method name" )
11451186
11461187 if methodDef .types :
11471188 typesAddr = methodAddr + 4 + methodDef .types
@@ -1241,6 +1282,26 @@ def _processInt(self, intAddr: int, intSize: int) -> int:
12411282 self ._intCache [intAddr ] = newIntAddr
12421283 return newIntAddr
12431284
1285+ def _processMethodName (self , stringAddr : int ) -> int :
1286+ """Process a method name.
1287+
1288+ Returns:
1289+ A the address of the pointer that points
1290+ to the method string.
1291+ """
1292+
1293+ if stringAddr in self ._methodNameCache :
1294+ return self ._methodNameCache [stringAddr ]
1295+
1296+ # TODO: search selrefs first
1297+ newStringAddr = self ._processString (stringAddr )
1298+ ptrAddr = self ._extraDataHead
1299+
1300+ self ._addExtraData (struct .pack ("<Q" , newStringAddr ))
1301+
1302+ self ._methodNameCache [stringAddr ] = ptrAddr
1303+ return ptrAddr
1304+
12441305 def _finalizeFutureClasses (self ) -> None :
12451306 extraSegStart = self ._extraDataHead - len (self ._extraData )
12461307
0 commit comments