bug fixes

arandomdev · arandomdev · commit e30bb025b53c · 2021-12-25T22:55:30.000-05:00
diff --git a/bin/dyldex b/bin/dyldex
@@ -160,9 +160,9 @@ def _extractImage(
 
 		extractionCtx = ExtractionContext(dyldCtx, machoCtx, statusBar, logger)
 
-		# slide_info.processSlideInfo(extractionCtx)
+		slide_info.processSlideInfo(extractionCtx)
 		linkedit_optimizer.optimizeLinkedit(extractionCtx)
-		# stub_fixer.fixStubs(extractionCtx)
+		stub_fixer.fixStubs(extractionCtx)
 		objc_fixer.fixObjC(extractionCtx)
 
 		writeProcedures = macho_offset.optimizeOffsets(extractionCtx)
diff --git a/src/DyldExtractor/converter/macho_offset.py b/src/DyldExtractor/converter/macho_offset.py
@@ -29,6 +29,19 @@ class WriteProcedure(object):
 	"""The file to read from."""
 
 
+class BytesFileContext(object):
+	"""Create a FileContext like object for bytes"""
+
+	def __init__(self, buffer: bytes) -> None:
+		super().__init__()
+		self._buffer = buffer
+		pass
+
+	def getBytes(self, offset: int, size: int) -> bytes:
+		return self._buffer[offset:offset + size]
+	pass
+
+
 def _updateLinkEdit(
 	machoCtx: MachOContext,
 	shiftDelta: int
@@ -90,12 +103,22 @@ def optimizeOffsets(extractionCtx: ExtractionContext) -> List[WriteProcedure]:
 	for segname, segment in machoCtx.segments.items():
 		shiftDelta = dataHead - segment.seg.fileoff
 
-		procedure = WriteProcedure(
-			segment.seg.fileoff + shiftDelta,
-			dyldCtx.convertAddr(segment.seg.vmaddr)[0],
-			segment.seg.filesize,
-			machoCtx.fileForAddr(segment.seg.vmaddr)
-		)
+		if segname == extractionCtx.EXTRA_SEGMENT_NAME:
+			procedure = WriteProcedure(
+				segment.seg.fileoff + shiftDelta,
+				0,
+				segment.seg.filesize,
+				BytesFileContext(extractionCtx.extraSegmentData)
+			)
+			pass
+		else:
+			procedure = WriteProcedure(
+				segment.seg.fileoff + shiftDelta,
+				dyldCtx.convertAddr(segment.seg.vmaddr)[0],
+				segment.seg.filesize,
+				machoCtx.fileForAddr(segment.seg.vmaddr)
+			)
+			pass
 		writeProcedures.append(procedure)
 
 		if segname == b"__LINKEDIT":
diff --git a/src/DyldExtractor/converter/objc_fixer.py b/src/DyldExtractor/converter/objc_fixer.py
@@ -24,14 +24,15 @@
 	objc_protocol_t
 )
 
-from DyldExtractor.macho.macho_context import MachOContext
 from DyldExtractor.macho.macho_structs import (
 	LoadCommands,
 	linkedit_data_command,
 	mach_header_64,
 	segment_command_64
 )
 
+from DyldExtractor.dyld.dyld_structs import dyld_cache_mapping_info
+
 
 # Change modify the disasm_lite to accept an offset
 # This is used to speed up the disassembly, and should
@@ -252,11 +253,10 @@ def _findAddInstructions(
 		self,
 		startIdx: int,
 		adrpReg: str,
-		_processed: Set[int] = None
 	) -> Set[int]:
 		"""Find ADD instructions given an ADRP register.
 
-		This will recursively follow branches and stop
+		This will iteratively follow branches and stop
 		when the ADRP range ends.
 
 		Args:
@@ -265,98 +265,92 @@ def _findAddInstructions(
 			A list of indices to the ADD instructions.
 		"""
 
-		# Keep track of start indexes that are already processed
-		if _processed is None:
-			_processed = {startIdx}
-			pass
-		else:
-			if startIdx in _processed:
-				return set()
-			else:
-				_processed.add(startIdx)
-			pass
-
 		addIdxs = set()
-		i = startIdx
 
-		while i < len(self._textInstr) and i >= 0:
-			address, mnemonic, opcodes = self._textInstr[i]
+		# set of indexes that are or being processed
+		processedIdx = set()
+
+		# list of indices that need to be processed
+		startIndices = [startIdx]
 
-			# check if the ADRP dest reg matches the base reg for the ADD
-			if mnemonic == "add" and opcodes[1] == adrpReg:
-				addIdxs.add(i)
+		while len(startIndices):
+			i = startIndices.pop()
+			if i in processedIdx:
+				continue
+			else:
+				processedIdx.add(i)
 				pass
 
-			# If there is an unconditional branch, and it points
-			# within the text section, follow it. If it does not
-			# point within the text section, end the ADRP range.
-			if mnemonic == "b":
-				branchAddr = int(opcodes[0][1:], 16)
-				idxDelta = int((branchAddr - address) / 4)
-				i += idxDelta
+			while i < len(self._textInstr) and i >= 0:
+				address, mnemonic, opcodes = self._textInstr[i]
 
-				if i in _processed:
-					break
-				else:
-					_processed.add(i)
+				# check if the ADRP dest reg matches the base reg for the ADD
+				if mnemonic == "add" and opcodes[1] == adrpReg:
+					addIdxs.add(i)
+					pass
 
-				if i < 0 or i >= len(self._textInstr):
-					break
-				continue
+				# If there is an unconditional branch, and it points
+				# within the text section, follow it. If it does not
+				# point within the text section, end the ADRP range.
+				if mnemonic == "b":
+					branchAddr = int(opcodes[0][1:], 16)
+					idxDelta = int((branchAddr - address) / 4)
+					i += idxDelta
 
-			# If there is a conditional branch, follow it and continue
-			if mnemonic[0:2] == "b.":
-				branchAddr = int(opcodes[0][1:], 16)
-				idxDelta = int((branchAddr - address) / 4)
-				addIdxs.update(
-					self._findAddInstructions(i + idxDelta, adrpReg, _processed=_processed)
-				)
-				pass
-			elif mnemonic == "cbz" or mnemonic == "cbnz":
-				branchAddr = int(opcodes[1][1:], 16)
-				idxDelta = int((branchAddr - address) / 4)
-				addIdxs.update(
-					self._findAddInstructions(i + idxDelta, adrpReg, _processed=_processed)
-				)
-				pass
-			elif mnemonic == "tbz" or mnemonic == "tbnz":
-				branchAddr = int(opcodes[2][1:], 16)
-				idxDelta = int((branchAddr - address) / 4)
-				addIdxs.update(
-					self._findAddInstructions(i + idxDelta, adrpReg, _processed=_processed)
-				)
-				pass
+					if i < 0 or i >= len(self._textInstr):
+						break
 
-			# End the ADRP range if the function returns
-			if mnemonic in (
-				"ret",
-				"retaa",
-				"retab"
-			):
-				break
+					startIndices.append(i)
+					break
 
-			# If we find an instruction modifying the register,
-			# the adrp range probably ended.
-			if adrpReg == opcodes[0]:
-				break
+				# If there is a conditional branch, follow it and continue
+				elif mnemonic[0:2] == "b.":
+					branchAddr = int(opcodes[0][1:], 16)
+					idxDelta = int((branchAddr - address) / 4)
+					startIndices.append(i + idxDelta)
+					pass
+				elif mnemonic == "cbz" or mnemonic == "cbnz":
+					branchAddr = int(opcodes[1][1:], 16)
+					idxDelta = int((branchAddr - address) / 4)
+					startIndices.append(i + idxDelta)
+					pass
+				elif mnemonic == "tbz" or mnemonic == "tbnz":
+					branchAddr = int(opcodes[2][1:], 16)
+					idxDelta = int((branchAddr - address) / 4)
+					startIndices.append(i + idxDelta)
+					pass
 
-			# These instructions modify 2 registers.
-			if mnemonic in (
-				"ldaxp",
-				"ldnp",
-				"ldpsw",
-				"ldxp",
-				"stlxp",
-				"stnp",
-				"stp",
-				"stxp",
-				"ldp"
-			):
-				if adrpReg == opcodes[1]:
+				# End the ADRP range if the function returns
+				if mnemonic in (
+					"ret",
+					"retaa",
+					"retab"
+				):
+					break
+
+				# If we find an instruction modifying the register,
+				# the adrp range probably ended.
+				if adrpReg == opcodes[0]:
 					break
-				pass
 
-			i += 1
+				# These instructions modify 2 registers.
+				if mnemonic in (
+					"ldaxp",
+					"ldnp",
+					"ldpsw",
+					"ldxp",
+					"stlxp",
+					"stnp",
+					"stp",
+					"stxp",
+					"ldp"
+				):
+					if adrpReg == opcodes[1]:
+						break
+					pass
+
+				i += 1
+				pass
 			pass
 
 		return addIdxs
@@ -475,11 +469,9 @@ def _createExtraSegment(self) -> None:
 			raise _ObjCFixerError("Unable to find space for the extra ObjC segment.")
 
 		# Get a starting address for the new segment
+		leftSegOff = self._dyldCtx.convertAddr(leftSeg.vmaddr)[0]
 		newSegStartAddr = (leftSeg.vmaddr + leftSeg.vmsize + 0x1000) & ~0xFFF
-		newSegStartOff = (
-			self._dyldCtx.convertAddr(leftSeg.vmaddr)[0] + leftSeg.vmsize
-			+ 0x1000
-		) & ~0xFFF
+		newSegStartOff = (leftSegOff + leftSeg.vmsize + 0x1000) & ~0xFFF
 
 		# adjust max gap size to account for page alignment
 		maxGapSize -= newSegStartAddr - (leftSeg.vmaddr + leftSeg.vmsize)
@@ -488,7 +480,7 @@ def _createExtraSegment(self) -> None:
 		newSegment = segment_command_64()
 		newSegment.cmd = LoadCommands.LC_SEGMENT_64
 		newSegment.cmdsize = segment_command_64.SIZE  # no sections
-		newSegment.segname = b"__EXTRA_OBJC"
+		newSegment.segname = self._extractionCtx.EXTRA_SEGMENT_NAME
 		newSegment.vmaddr = newSegStartAddr
 		newSegment.fileoff = newSegStartOff
 		newSegment.maxprot = 3  # read and write
@@ -1040,7 +1032,6 @@ def _processMethodList(self, methodListAddr: int, noImp=False) -> int:
 		# fix relative pointers after we reserve a new address for the method list
 		# contains a list of tuples of field offsets and their target addresses
 		relativeFixups: list[tuple[int, int]] = []
-
 		for i in range(methodListDef.count):
 			methodAddr = (
 				methodListAddr
@@ -1054,10 +1045,13 @@ def _processMethodList(self, methodListAddr: int, noImp=False) -> int:
 
 				if methodDef.name:
 					nameAddr = methodAddr + methodDef.name
-					newNameAddr = self._processString(nameAddr)
-					methodDef.name = newNameAddr - methodAddr
+					if (newNameAddr := self._processString(nameAddr)) is not None:
+						methodDef.name = newNameAddr - methodAddr
 
-					relativeFixups.append((methodOff, newNameAddr))
+						relativeFixups.append((methodOff, newNameAddr))
+					else:
+						methodDef.name = 0
+						self._logger.warning(f"Unable to get string for method at {hex(methodAddr)}")  # noqa
 					pass
 
 				if methodDef.types:
@@ -1128,7 +1122,10 @@ def _processString(self, stringAddr: int) -> int:
 		else:
 			newStringAddr = self._extraDataHead
 
-			stringOff, ctx = self._dyldCtx.convertAddr(stringAddr)
+			stringOff, ctx = self._dyldCtx.convertAddr(stringAddr) or (None, None)
+			if not stringOff:
+				return None
+
 			stringData = ctx.fileCtx.readString(stringOff)
 			self._addExtraData(stringData)
 			pass
@@ -1259,9 +1256,10 @@ def _makeHeaderSpace(self, spaceNeeded: int) -> None:
 		pass
 
 	def _addExtraDataSeg(self) -> None:
-		# update the size on the new segment
-		self._extraSegment.vmsize = len(self._extraData)
-		self._extraSegment.filesize = len(self._extraData)
+		# update the size on the new segment and mappings
+		extraDataLen = len(self._extraData)
+		self._extraSegment.vmsize = extraDataLen
+		self._extraSegment.filesize = extraDataLen
 
 		# insert the segment command right before the linkedit
 		moveStart = self._machoCtx.segments[b"__LINKEDIT"].seg._fileOff_
@@ -1279,16 +1277,8 @@ def _addExtraDataSeg(self) -> None:
 
 		self._machoCtx.fileCtx.writeBytes(moveStart, self._extraSegment)
 
-		# add the new data and the segment command
-		file = self._machoCtx.fileForAddr(self._extraSegment.vmaddr)
-		if not file:
-			self._logger.error("Encountered edge case when adding extra data!")
-			return
-
-		file.writeBytes(
-			self._extraSegment.fileoff,
-			self._extraData
-		)
+		# update the extraction context
+		self._extractionCtx.extraSegmentData = self._extraData
 
 		# update the header
 		self._machoCtx.header.ncmds += 1
diff --git a/src/DyldExtractor/converter/slide_info.py b/src/DyldExtractor/converter/slide_info.py
@@ -22,7 +22,6 @@
 	dyld_cache_slide_pointer3
 )
 
-from DyldExtractor.macho.macho_context import MachOContext
 from DyldExtractor.macho.macho_structs import (
 	segment_command_64
 )
diff --git a/src/DyldExtractor/dyld/dyld_structs.py b/src/DyldExtractor/dyld/dyld_structs.py
@@ -4,9 +4,7 @@
 https://opensource.apple.com/source/dyld/dyld-832.7.3/dyld3/shared-cache/dyld_cache_format.h.auto.html
 """
 
-import struct
 import sys
-from enum import IntEnum
 from ctypes import (
 	c_char,
 	c_uint8,
diff --git a/src/DyldExtractor/dyld/dyld_trie.py b/src/DyldExtractor/dyld/dyld_trie.py
@@ -1,4 +1,3 @@
-import logging
 import dataclasses
 from mmap import mmap
 
diff --git a/src/DyldExtractor/extraction_context.py b/src/DyldExtractor/extraction_context.py
@@ -26,6 +26,11 @@ class ExtractionContext(object):
 	"""
 	hasRedactedIndirect: bool = False
 
+	# The name of the extra data segment
+	# And an out of file location to store it.
+	EXTRA_SEGMENT_NAME = b"__EXTRA_OBJC"
+	extraSegmentData: bytes
+
 	def __init__(
 		self,
 		dyldCtx: DyldContext,
diff --git a/src/DyldExtractor/macho/segment_context.py b/src/DyldExtractor/macho/segment_context.py
@@ -1,7 +1,6 @@
 from mmap import mmap
 from typing import List, Dict
 
-from DyldExtractor.structure import Structure
 
 from DyldExtractor.macho.macho_structs import (
 	segment_command_64,
diff --git a/src/DyldExtractor/objc/objc_structs.py b/src/DyldExtractor/objc/objc_structs.py
diff --git a/tests/run_all_images_multiprocess.py b/tests/run_all_images_multiprocess.py

Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,6 @@`
`22`	`22`	`dyld_cache_slide_pointer3`
`23`	`23`	`)`
`24`	`24`
`25`		`-from DyldExtractor.macho.macho_context import MachOContext`
`26`	`25`	`from DyldExtractor.macho.macho_structs import (`
`27`	`26`	`segment_command_64`
`28`	`27`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-import logging`
`2`	`1`	`import dataclasses`
`3`	`2`	`from mmap import mmap`
`4`	`3`