2424 objc_protocol_t
2525)
2626
27- from DyldExtractor .macho .macho_context import MachOContext
2827from DyldExtractor .macho .macho_structs import (
2928 LoadCommands ,
3029 linkedit_data_command ,
3130 mach_header_64 ,
3231 segment_command_64
3332)
3433
34+ from DyldExtractor .dyld .dyld_structs import dyld_cache_mapping_info
35+
3536
3637# Change modify the disasm_lite to accept an offset
3738# This is used to speed up the disassembly, and should
@@ -252,11 +253,10 @@ def _findAddInstructions(
252253 self ,
253254 startIdx : int ,
254255 adrpReg : str ,
255- _processed : Set [int ] = None
256256 ) -> Set [int ]:
257257 """Find ADD instructions given an ADRP register.
258258
259- This will recursively follow branches and stop
259+ This will iteratively follow branches and stop
260260 when the ADRP range ends.
261261
262262 Args:
@@ -265,98 +265,92 @@ def _findAddInstructions(
265265 A list of indices to the ADD instructions.
266266 """
267267
268- # Keep track of start indexes that are already processed
269- if _processed is None :
270- _processed = {startIdx }
271- pass
272- else :
273- if startIdx in _processed :
274- return set ()
275- else :
276- _processed .add (startIdx )
277- pass
278-
279268 addIdxs = set ()
280- i = startIdx
281269
282- while i < len (self ._textInstr ) and i >= 0 :
283- address , mnemonic , opcodes = self ._textInstr [i ]
270+ # set of indexes that are or being processed
271+ processedIdx = set ()
272+
273+ # list of indices that need to be processed
274+ startIndices = [startIdx ]
284275
285- # check if the ADRP dest reg matches the base reg for the ADD
286- if mnemonic == "add" and opcodes [1 ] == adrpReg :
287- addIdxs .add (i )
276+ while len (startIndices ):
277+ i = startIndices .pop ()
278+ if i in processedIdx :
279+ continue
280+ else :
281+ processedIdx .add (i )
288282 pass
289283
290- # If there is an unconditional branch, and it points
291- # within the text section, follow it. If it does not
292- # point within the text section, end the ADRP range.
293- if mnemonic == "b" :
294- branchAddr = int (opcodes [0 ][1 :], 16 )
295- idxDelta = int ((branchAddr - address ) / 4 )
296- i += idxDelta
284+ while i < len (self ._textInstr ) and i >= 0 :
285+ address , mnemonic , opcodes = self ._textInstr [i ]
297286
298- if i in _processed :
299- break
300- else :
301- _processed . add ( i )
287+ # check if the ADRP dest reg matches the base reg for the ADD
288+ if mnemonic == "add" and opcodes [ 1 ] == adrpReg :
289+ addIdxs . add ( i )
290+ pass
302291
303- if i < 0 or i >= len (self ._textInstr ):
304- break
305- continue
292+ # If there is an unconditional branch, and it points
293+ # within the text section, follow it. If it does not
294+ # point within the text section, end the ADRP range.
295+ if mnemonic == "b" :
296+ branchAddr = int (opcodes [0 ][1 :], 16 )
297+ idxDelta = int ((branchAddr - address ) / 4 )
298+ i += idxDelta
306299
307- # If there is a conditional branch, follow it and continue
308- if mnemonic [0 :2 ] == "b." :
309- branchAddr = int (opcodes [0 ][1 :], 16 )
310- idxDelta = int ((branchAddr - address ) / 4 )
311- addIdxs .update (
312- self ._findAddInstructions (i + idxDelta , adrpReg , _processed = _processed )
313- )
314- pass
315- elif mnemonic == "cbz" or mnemonic == "cbnz" :
316- branchAddr = int (opcodes [1 ][1 :], 16 )
317- idxDelta = int ((branchAddr - address ) / 4 )
318- addIdxs .update (
319- self ._findAddInstructions (i + idxDelta , adrpReg , _processed = _processed )
320- )
321- pass
322- elif mnemonic == "tbz" or mnemonic == "tbnz" :
323- branchAddr = int (opcodes [2 ][1 :], 16 )
324- idxDelta = int ((branchAddr - address ) / 4 )
325- addIdxs .update (
326- self ._findAddInstructions (i + idxDelta , adrpReg , _processed = _processed )
327- )
328- pass
300+ if i < 0 or i >= len (self ._textInstr ):
301+ break
329302
330- # End the ADRP range if the function returns
331- if mnemonic in (
332- "ret" ,
333- "retaa" ,
334- "retab"
335- ):
336- break
303+ startIndices .append (i )
304+ break
337305
338- # If we find an instruction modifying the register,
339- # the adrp range probably ended.
340- if adrpReg == opcodes [0 ]:
341- break
306+ # If there is a conditional branch, follow it and continue
307+ elif mnemonic [0 :2 ] == "b." :
308+ branchAddr = int (opcodes [0 ][1 :], 16 )
309+ idxDelta = int ((branchAddr - address ) / 4 )
310+ startIndices .append (i + idxDelta )
311+ pass
312+ elif mnemonic == "cbz" or mnemonic == "cbnz" :
313+ branchAddr = int (opcodes [1 ][1 :], 16 )
314+ idxDelta = int ((branchAddr - address ) / 4 )
315+ startIndices .append (i + idxDelta )
316+ pass
317+ elif mnemonic == "tbz" or mnemonic == "tbnz" :
318+ branchAddr = int (opcodes [2 ][1 :], 16 )
319+ idxDelta = int ((branchAddr - address ) / 4 )
320+ startIndices .append (i + idxDelta )
321+ pass
342322
343- # These instructions modify 2 registers.
344- if mnemonic in (
345- "ldaxp" ,
346- "ldnp" ,
347- "ldpsw" ,
348- "ldxp" ,
349- "stlxp" ,
350- "stnp" ,
351- "stp" ,
352- "stxp" ,
353- "ldp"
354- ):
355- if adrpReg == opcodes [1 ]:
323+ # End the ADRP range if the function returns
324+ if mnemonic in (
325+ "ret" ,
326+ "retaa" ,
327+ "retab"
328+ ):
329+ break
330+
331+ # If we find an instruction modifying the register,
332+ # the adrp range probably ended.
333+ if adrpReg == opcodes [0 ]:
356334 break
357- pass
358335
359- i += 1
336+ # These instructions modify 2 registers.
337+ if mnemonic in (
338+ "ldaxp" ,
339+ "ldnp" ,
340+ "ldpsw" ,
341+ "ldxp" ,
342+ "stlxp" ,
343+ "stnp" ,
344+ "stp" ,
345+ "stxp" ,
346+ "ldp"
347+ ):
348+ if adrpReg == opcodes [1 ]:
349+ break
350+ pass
351+
352+ i += 1
353+ pass
360354 pass
361355
362356 return addIdxs
@@ -475,11 +469,9 @@ def _createExtraSegment(self) -> None:
475469 raise _ObjCFixerError ("Unable to find space for the extra ObjC segment." )
476470
477471 # Get a starting address for the new segment
472+ leftSegOff = self ._dyldCtx .convertAddr (leftSeg .vmaddr )[0 ]
478473 newSegStartAddr = (leftSeg .vmaddr + leftSeg .vmsize + 0x1000 ) & ~ 0xFFF
479- newSegStartOff = (
480- self ._dyldCtx .convertAddr (leftSeg .vmaddr )[0 ] + leftSeg .vmsize
481- + 0x1000
482- ) & ~ 0xFFF
474+ newSegStartOff = (leftSegOff + leftSeg .vmsize + 0x1000 ) & ~ 0xFFF
483475
484476 # adjust max gap size to account for page alignment
485477 maxGapSize -= newSegStartAddr - (leftSeg .vmaddr + leftSeg .vmsize )
@@ -488,7 +480,7 @@ def _createExtraSegment(self) -> None:
488480 newSegment = segment_command_64 ()
489481 newSegment .cmd = LoadCommands .LC_SEGMENT_64
490482 newSegment .cmdsize = segment_command_64 .SIZE # no sections
491- newSegment .segname = b"__EXTRA_OBJC"
483+ newSegment .segname = self . _extractionCtx . EXTRA_SEGMENT_NAME
492484 newSegment .vmaddr = newSegStartAddr
493485 newSegment .fileoff = newSegStartOff
494486 newSegment .maxprot = 3 # read and write
@@ -1040,7 +1032,6 @@ def _processMethodList(self, methodListAddr: int, noImp=False) -> int:
10401032 # fix relative pointers after we reserve a new address for the method list
10411033 # contains a list of tuples of field offsets and their target addresses
10421034 relativeFixups : list [tuple [int , int ]] = []
1043-
10441035 for i in range (methodListDef .count ):
10451036 methodAddr = (
10461037 methodListAddr
@@ -1054,10 +1045,13 @@ def _processMethodList(self, methodListAddr: int, noImp=False) -> int:
10541045
10551046 if methodDef .name :
10561047 nameAddr = methodAddr + methodDef .name
1057- newNameAddr = self ._processString (nameAddr )
1058- methodDef .name = newNameAddr - methodAddr
1048+ if ( newNameAddr : = self ._processString (nameAddr )) is not None :
1049+ methodDef .name = newNameAddr - methodAddr
10591050
1060- relativeFixups .append ((methodOff , newNameAddr ))
1051+ relativeFixups .append ((methodOff , newNameAddr ))
1052+ else :
1053+ methodDef .name = 0
1054+ self ._logger .warning (f"Unable to get string for method at { hex (methodAddr )} " ) # noqa
10611055 pass
10621056
10631057 if methodDef .types :
@@ -1128,7 +1122,10 @@ def _processString(self, stringAddr: int) -> int:
11281122 else :
11291123 newStringAddr = self ._extraDataHead
11301124
1131- stringOff , ctx = self ._dyldCtx .convertAddr (stringAddr )
1125+ stringOff , ctx = self ._dyldCtx .convertAddr (stringAddr ) or (None , None )
1126+ if not stringOff :
1127+ return None
1128+
11321129 stringData = ctx .fileCtx .readString (stringOff )
11331130 self ._addExtraData (stringData )
11341131 pass
@@ -1259,9 +1256,10 @@ def _makeHeaderSpace(self, spaceNeeded: int) -> None:
12591256 pass
12601257
12611258 def _addExtraDataSeg (self ) -> None :
1262- # update the size on the new segment
1263- self ._extraSegment .vmsize = len (self ._extraData )
1264- self ._extraSegment .filesize = len (self ._extraData )
1259+ # update the size on the new segment and mappings
1260+ extraDataLen = len (self ._extraData )
1261+ self ._extraSegment .vmsize = extraDataLen
1262+ self ._extraSegment .filesize = extraDataLen
12651263
12661264 # insert the segment command right before the linkedit
12671265 moveStart = self ._machoCtx .segments [b"__LINKEDIT" ].seg ._fileOff_
@@ -1279,16 +1277,8 @@ def _addExtraDataSeg(self) -> None:
12791277
12801278 self ._machoCtx .fileCtx .writeBytes (moveStart , self ._extraSegment )
12811279
1282- # add the new data and the segment command
1283- file = self ._machoCtx .fileForAddr (self ._extraSegment .vmaddr )
1284- if not file :
1285- self ._logger .error ("Encountered edge case when adding extra data!" )
1286- return
1287-
1288- file .writeBytes (
1289- self ._extraSegment .fileoff ,
1290- self ._extraData
1291- )
1280+ # update the extraction context
1281+ self ._extractionCtx .extraSegmentData = self ._extraData
12921282
12931283 # update the header
12941284 self ._machoCtx .header .ncmds += 1
0 commit comments