From 3ba0297e18c769072c0d7e8a87d45a71a74d8d50 Mon Sep 17 00:00:00 2001 From: samjviana Date: Thu, 18 Sep 2025 22:20:42 -0300 Subject: [PATCH 01/12] feat: add fallback to hash the game's binary using the bytes from the running process --- pymhf/core/hashing.py | 364 ++++++++++++++++++++++++++++++++++++++++++ pymhf/core/utils.py | 16 -- pymhf/main.py | 10 +- 3 files changed, 371 insertions(+), 19 deletions(-) create mode 100644 pymhf/core/hashing.py diff --git a/pymhf/core/hashing.py b/pymhf/core/hashing.py new file mode 100644 index 0000000..1acf711 --- /dev/null +++ b/pymhf/core/hashing.py @@ -0,0 +1,364 @@ +import ctypes +import ctypes.wintypes as wintypes +import hashlib +from io import BufferedReader +from os.path import basename, normcase +from typing import TYPE_CHECKING, Any, TypeAlias + +import psutil + +# Just doing this for type hinting purposes to avoid using "Any" for the ctypes objects +if TYPE_CHECKING: + from ctypes import _CData, _Pointer, _SimpleCData + from typing import TypeAlias + + CDataLike: TypeAlias = ( + _CData | _SimpleCData | _Pointer[Any] | ctypes.Structure | ctypes.Union | ctypes.Array[Any] + ) +else: + CDataLike = Any + +kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) +psapi = ctypes.WinDLL("psapi", use_last_error=True) + +PROCESS_VM_READ = 0x0010 +PROCESS_QUERY_INFORMATION = 0x0400 +PROCESS_QUERY_LIMITED_INFORMATION = 0x1000 + +LIST_MODULES_ALL = 0x01 | 0x02 # LIST_MODULES_32BIT | LIST_MODULES_64BIT + +IMAGE_DOS_SIGNATURE = 0x5A4D +IMAGE_NT_SIGNATURE = 0x00004550 + +IMAGE_SIZEOF_SHORT_NAME = 8 + +IMAGE_SCN_MEM_WRITE = 0x80000000 + + +# ctypes/wintypes/kernel32/psapi struct definitions +class MODULEINFO(ctypes.Structure): + _fields_ = [ + ("lpBaseOfDll", wintypes.LPVOID), + ("SizeOfImage", wintypes.DWORD), + ("EntryPoint", wintypes.LPVOID), + ] + + +class IMAGE_DOS_HEADER(ctypes.Structure): + _fields_ = [ + ("e_magic", wintypes.WORD), + ("e_cblp", wintypes.WORD), + ("e_cp", wintypes.WORD), + ("e_crlc", wintypes.WORD), + ("e_cparhdr", wintypes.WORD), + ("e_minalloc", wintypes.WORD), + ("e_maxalloc", wintypes.WORD), + ("e_ss", wintypes.WORD), + ("e_sp", wintypes.WORD), + ("e_csum", wintypes.WORD), + ("e_ip", wintypes.WORD), + ("e_cs", wintypes.WORD), + ("e_lfarlc", wintypes.WORD), + ("e_ovno", wintypes.WORD), + ("e_res", wintypes.WORD * 4), + ("e_oemid", wintypes.WORD), + ("e_oeminfo", wintypes.WORD), + ("e_res2", wintypes.WORD * 10), + ("e_lfanew", wintypes.LONG), + ] + + +class IMAGE_FILE_HEADER(ctypes.Structure): + _fields_ = [ + ("Machine", wintypes.WORD), + ("NumberOfSections", wintypes.WORD), + ("TimeDateStamp", wintypes.DWORD), + ("PointerToSymbolTable", wintypes.DWORD), + ("NumberOfSymbols", wintypes.DWORD), + ("SizeOfOptionalHeader", wintypes.WORD), + ("Characteristics", wintypes.WORD), + ] + + +class IMAGE_SECTION_HEADER(ctypes.Structure): + class _Misc(ctypes.Union): + _fields_ = [("PhysicalAddress", wintypes.DWORD), ("VirtualSize", wintypes.DWORD)] + + _anonymous_ = ("Misc",) + _fields_ = [ + ("Name", wintypes.BYTE * IMAGE_SIZEOF_SHORT_NAME), + ("Misc", _Misc), + ("VirtualAddress", wintypes.DWORD), + ("SizeOfRawData", wintypes.DWORD), + ("PointerToRawData", wintypes.DWORD), + ("PointerToRelocations", wintypes.DWORD), + ("PointerToLinenumbers", wintypes.DWORD), + ("NumberOfRelocations", wintypes.WORD), + ("NumberOfLinenumbers", wintypes.WORD), + ("Characteristics", wintypes.DWORD), + ] + + +class SYSTEM_INFO(ctypes.Structure): + class _DUMMYUNIONNAME(ctypes.Union): + class _DUMMYSTRUCTNAME(ctypes.Structure): + _fields_ = [ + ("wProcessorArchitecture", wintypes.WORD), + ("wReserved", wintypes.WORD), + ] + + _fields_ = [("dwOemId", wintypes.DWORD), ("s", _DUMMYSTRUCTNAME)] + + _anonymous_ = ("u",) + _fields_ = [ + ("u", _DUMMYUNIONNAME), + ("dwPageSize", wintypes.DWORD), + ("lpMinimumApplicationAddress", wintypes.LPVOID), + ("lpMaximumApplicationAddress", wintypes.LPVOID), + ("dwActiveProcessorMask", ctypes.POINTER(wintypes.DWORD)), + ("dwNumberOfProcessors", wintypes.DWORD), + ("dwProcessorType", wintypes.DWORD), + ("dwAllocationGranularity", wintypes.DWORD), + ("wProcessorLevel", wintypes.WORD), + ("wProcessorRevision", wintypes.WORD), + ] + + +kernel32.OpenProcess.argtypes = [wintypes.DWORD, wintypes.BOOL, wintypes.DWORD] +kernel32.OpenProcess.restype = wintypes.HANDLE + +kernel32.ReadProcessMemory.argtypes = [ + wintypes.HANDLE, + wintypes.LPCVOID, + wintypes.LPVOID, + ctypes.c_size_t, + ctypes.POINTER(ctypes.c_size_t), +] +kernel32.ReadProcessMemory.restype = wintypes.BOOL + +kernel32.GetSystemInfo.argtypes = [ctypes.POINTER(SYSTEM_INFO)] +kernel32.GetSystemInfo.restype = None + + +psapi.EnumProcessModulesEx.argtypes = [ + wintypes.HANDLE, + ctypes.POINTER(wintypes.HMODULE), + wintypes.DWORD, + wintypes.LPDWORD, + wintypes.DWORD, +] +psapi.EnumProcessModulesEx.restype = wintypes.BOOL + +psapi.GetModuleFileNameExW.argtypes = [ + wintypes.HANDLE, + wintypes.HMODULE, + wintypes.LPWSTR, + wintypes.DWORD, +] +psapi.GetModuleFileNameExW.restype = wintypes.DWORD + +psapi.GetModuleInformation.argtypes = [ + wintypes.HANDLE, + wintypes.HMODULE, + ctypes.POINTER(MODULEINFO), + wintypes.DWORD, +] +psapi.GetModuleInformation.restype = wintypes.BOOL + + +def hash_bytes_from_file(fileobj: BufferedReader, _bufsize: int = 2**18) -> str: + # Essentially implement hashlib.file_digest since it's python 3.11+ + # cf. https://github.com/python/cpython/blob/main/Lib/hashlib.py#L195 + digestobj = hashlib.sha1() + buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. + view = memoryview(buf) + while True: + size = fileobj.readinto(buf) + if size == 0: + break # EOF + digestobj.update(view[:size]) + return digestobj.hexdigest() + + +def _read_process_memory( + process_handle: wintypes.HANDLE, + address: int, + out_obj: CDataLike, + size: int | None = None, + out_bytes_read: ctypes.c_size_t = ctypes.c_size_t(), + raise_on_err: bool = True, +) -> wintypes.BOOL: + if size is None: + size = ctypes.sizeof(out_obj) + res = kernel32.ReadProcessMemory( + process_handle, + ctypes.c_void_p(address), + ctypes.byref(out_obj), + size, + ctypes.byref(out_bytes_read), + ) + if raise_on_err and not res or out_bytes_read.value != size: + raise OSError(f"Failed to read memory at 0x{address:X}: {ctypes.get_last_error()}") + + return res + + +def _get_page_size() -> int: + sys_info = SYSTEM_INFO() + kernel32.GetSystemInfo(ctypes.byref(sys_info)) + return sys_info.dwPageSize + + +def hash_bytes_from_memory(binary_path: str, _bufsize: int = 2**18) -> str: + # Compute SHA-1 hash for the static parts of the given binary if it is currently running. + # "Static parts" here refers to areas without "WRITE" permissions (e.g.: .text, .rdata, etc) + pid = None + normalized_path = normcase(binary_path) + exe_name = basename(normalized_path).lower() + + # Find the PID of the process with the given binary path + for process in psutil.process_iter(["pid", "name", "exe"]): + try: + exe = process.info.get("exe", "") + name = (process.info.get("name", "")).lower() + if exe and normcase(exe) == normalized_path and name == exe_name: + pid = process.info["pid"] + break + except (psutil.NoSuchProcess, psutil.AccessDenied): + continue + + if not pid: + raise ProcessLookupError(f"Could not find running process for {binary_path}") + + # Open the process with some basic read/query permissions + access = PROCESS_VM_READ | PROCESS_QUERY_INFORMATION | PROCESS_QUERY_LIMITED_INFORMATION + process_handle = kernel32.OpenProcess(access, False, pid) + if not process_handle: + raise OSError(f"Failed to open process {pid}: {ctypes.get_last_error()}") + + # Find the main module of the opened process + try: # This try/finally ensures we always close the process handle + modules = (wintypes.HMODULE * 256)() + bytes_needed = wintypes.DWORD() + res = psapi.EnumProcessModulesEx( + process_handle, modules, ctypes.sizeof(modules), ctypes.byref(bytes_needed), LIST_MODULES_ALL + ) + if not res: + raise OSError(f"Failed to enumerate process modules for {pid}: {ctypes.get_last_error()}") + + num_modules = min(bytes_needed.value // ctypes.sizeof(wintypes.HMODULE), 256) + main_module = None + for i in range(num_modules): + buffer = ctypes.create_unicode_buffer(1024) + psapi.GetModuleFileNameExW(process_handle, modules[i], buffer, ctypes.sizeof(buffer)) + module_path = buffer.value + if normcase(module_path) == normalized_path or basename(module_path).lower() == exe_name: + main_module = modules[i] + break + if not main_module: + main_module = modules[0] # Usually the first module is the main module + # If you want to be strict, maybe raise an error or return here instead + # raise OSError(f"Could not find main module for process {pid}") + + # Read module information + modinfo = MODULEINFO() + res = psapi.GetModuleInformation( + process_handle, main_module, ctypes.byref(modinfo), ctypes.sizeof(modinfo) + ) + if not res: + raise OSError(f"Failed to get module information for {pid}: {ctypes.get_last_error()}") + + base_address = int(modinfo.lpBaseOfDll) + module_size = int(modinfo.SizeOfImage) + + # Parse PE headers to find sections + dos_header = IMAGE_DOS_HEADER() + _read_process_memory(process_handle, base_address, dos_header) + if dos_header.e_magic != IMAGE_DOS_SIGNATURE: + raise ValueError(f"Invalid DOS header magic for process {pid}") + + signature = wintypes.DWORD() + _read_process_memory(process_handle, base_address + dos_header.e_lfanew, signature) + if signature.value != IMAGE_NT_SIGNATURE: + raise ValueError(f"Invalid PE header signature for process {pid}") + + header = IMAGE_FILE_HEADER() + _read_process_memory( + process_handle, base_address + dos_header.e_lfanew + ctypes.sizeof(wintypes.DWORD), header + ) + + num_sections = header.NumberOfSections + opt_header_size = header.SizeOfOptionalHeader + section_base = ( + base_address + + dos_header.e_lfanew + + ctypes.sizeof(wintypes.DWORD) + + ctypes.sizeof(IMAGE_FILE_HEADER) + + opt_header_size + ) + + # Build a list with all sections to be hashed + sections = [] + for i in range(num_sections): + section_header = IMAGE_SECTION_HEADER() + _read_process_memory( + process_handle, section_base + i * ctypes.sizeof(IMAGE_SECTION_HEADER), section_header + ) + + characteristics = section_header.Characteristics + if characteristics & IMAGE_SCN_MEM_WRITE: + continue + + virtual_address = int(section_header.VirtualAddress) + virtual_size = int(section_header.Misc.VirtualSize) or int(section_header.SizeOfRawData) + if virtual_address == 0 or virtual_size == 0: + continue + + end_address = min(virtual_address + virtual_size, module_size) + if end_address <= virtual_address: + continue + + section = ( + virtual_address, + end_address - virtual_address, + bytes(bytearray(section_header.Name)).rstrip(b"\x00").decode(errors="ignore"), + ) + sections.append(section) + + page_size = _get_page_size() + if not page_size: + page_size = 4096 # Usually Windows page size is 4KiB + + # Apply the hashing on all sections after sorting by address to ensure consistency + if not sections: + raise ValueError(f"No valid sections found to hash for process {pid}") + + sections.sort(key=lambda s: s[0]) + + digest = hashlib.sha1() + buffer = (ctypes.c_ubyte * _bufsize)() + bytes_read = ctypes.c_size_t() + for rva, size, name in sections: + offset = 0 + while offset < size: + to_read = min(_bufsize, size - offset) + res = _read_process_memory( + process_handle, + base_address + rva + offset, + buffer, + to_read, + bytes_read, + raise_on_err=False, + ) + if not res or bytes_read.value == 0: + offset += page_size + continue + + mem_view = memoryview(buffer)[: bytes_read.value] + digest.update(mem_view) + offset += bytes_read.value + + return digest.hexdigest() + + finally: + kernel32.CloseHandle(process_handle) diff --git a/pymhf/core/utils.py b/pymhf/core/utils.py index 99713fa..26df176 100644 --- a/pymhf/core/utils.py +++ b/pymhf/core/utils.py @@ -1,10 +1,8 @@ -import hashlib import logging from collections.abc import Callable from concurrent.futures import ThreadPoolExecutor from ctypes import byref, c_ulong, create_unicode_buffer, windll from functools import wraps -from io import BufferedReader from typing import Optional import psutil @@ -124,20 +122,6 @@ def does_pid_have_focus(pid: int) -> bool: return pid == get_foreground_pid() -def hash_bytes(fileobj: BufferedReader, _bufsize: int = 2**18) -> str: - # Essentially implement hashlib.file_digest since it's python 3.11+ - # cf. https://github.com/python/cpython/blob/main/Lib/hashlib.py#L195 - digestobj = hashlib.sha1() - buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. - view = memoryview(buf) - while True: - size = fileobj.readinto(buf) - if size == 0: - break # EOF - digestobj.update(view[:size]) - return digestobj.hexdigest() - - # TODO: Do something about this... # class AutosavingConfig(ConfigParser): # def __init__(self, *args, **kwargs): diff --git a/pymhf/main.py b/pymhf/main.py index 21b9023..194a7c3 100644 --- a/pymhf/main.py +++ b/pymhf/main.py @@ -22,7 +22,7 @@ from pymhf.core.log_handling import open_log_console from pymhf.core.process import start_process from pymhf.core.protocols import ESCAPE_SEQUENCE, TerminalProtocol -from pymhf.core.utils import hash_bytes +from pymhf.core.hashing import hash_bytes_from_file, hash_bytes_from_memory from pymhf.utils.config import canonicalize_setting from pymhf.utils.parse_toml import read_pymhf_settings from pymhf.utils.winapi import get_exe_path_from_pid @@ -319,8 +319,12 @@ def kill_injected_code(loop: asyncio.AbstractEventLoop): # Have a small nap just to give it some time. time.sleep(0.5) if binary_path: - with open(binary_path, "rb") as f: - binary_hash = hash_bytes(f) + try: + with open(binary_path, "rb") as f: + binary_hash = hash_bytes_from_file(f) + except PermissionError: + print(f"Cannot open {binary_path!r} to hash it. Trying to read from memory...") + binary_hash = hash_bytes_from_memory(binary_path) print(f"Exe hash is: {binary_hash}") else: binary_hash = 0 From 79a85f614351ad2ee696de0c94a78e2e52a9b431 Mon Sep 17 00:00:00 2001 From: samjviana Date: Sun, 21 Sep 2025 09:12:16 -0300 Subject: [PATCH 02/12] changed hashing to use `pymem` as input --- pymhf/core/hashing.py | 482 ++++++++++++++++-------------------------- pymhf/main.py | 2 +- pymhf/utils/winapi.py | 102 ++++++++- 3 files changed, 281 insertions(+), 305 deletions(-) diff --git a/pymhf/core/hashing.py b/pymhf/core/hashing.py index 1acf711..03ba95e 100644 --- a/pymhf/core/hashing.py +++ b/pymhf/core/hashing.py @@ -1,16 +1,38 @@ import ctypes import ctypes.wintypes as wintypes import hashlib +import os from io import BufferedReader -from os.path import basename, normcase from typing import TYPE_CHECKING, Any, TypeAlias import psutil +import pymem +from pymem.ressources.structure import ( + MEMORY_BASIC_INFORMATION, + MEMORY_BASIC_INFORMATION32, + MEMORY_BASIC_INFORMATION64, + MEMORY_PROTECTION, + MEMORY_STATE, + MEMORY_TYPES, + MODULEINFO, + SYSTEM_INFO, +) + +from pymhf.utils.winapi import ( + IMAGE_DOS_HEADER, + IMAGE_DOS_SIGNATURE, + IMAGE_FILE_HEADER, + IMAGE_NT_SIGNATURE, + IMAGE_SCN_MEM_EXECUTE, + IMAGE_SCN_MEM_WRITE, + IMAGE_SECTION_HEADER, + GetSystemInfo, + ReadProcessMemory, + VirtualQueryEx, +) -# Just doing this for type hinting purposes to avoid using "Any" for the ctypes objects if TYPE_CHECKING: from ctypes import _CData, _Pointer, _SimpleCData - from typing import TypeAlias CDataLike: TypeAlias = ( _CData | _SimpleCData | _Pointer[Any] | ctypes.Structure | ctypes.Union | ctypes.Array[Any] @@ -18,166 +40,23 @@ else: CDataLike = Any -kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) -psapi = ctypes.WinDLL("psapi", use_last_error=True) - -PROCESS_VM_READ = 0x0010 -PROCESS_QUERY_INFORMATION = 0x0400 -PROCESS_QUERY_LIMITED_INFORMATION = 0x1000 - -LIST_MODULES_ALL = 0x01 | 0x02 # LIST_MODULES_32BIT | LIST_MODULES_64BIT - -IMAGE_DOS_SIGNATURE = 0x5A4D -IMAGE_NT_SIGNATURE = 0x00004550 - -IMAGE_SIZEOF_SHORT_NAME = 8 - -IMAGE_SCN_MEM_WRITE = 0x80000000 - - -# ctypes/wintypes/kernel32/psapi struct definitions -class MODULEINFO(ctypes.Structure): - _fields_ = [ - ("lpBaseOfDll", wintypes.LPVOID), - ("SizeOfImage", wintypes.DWORD), - ("EntryPoint", wintypes.LPVOID), - ] - - -class IMAGE_DOS_HEADER(ctypes.Structure): - _fields_ = [ - ("e_magic", wintypes.WORD), - ("e_cblp", wintypes.WORD), - ("e_cp", wintypes.WORD), - ("e_crlc", wintypes.WORD), - ("e_cparhdr", wintypes.WORD), - ("e_minalloc", wintypes.WORD), - ("e_maxalloc", wintypes.WORD), - ("e_ss", wintypes.WORD), - ("e_sp", wintypes.WORD), - ("e_csum", wintypes.WORD), - ("e_ip", wintypes.WORD), - ("e_cs", wintypes.WORD), - ("e_lfarlc", wintypes.WORD), - ("e_ovno", wintypes.WORD), - ("e_res", wintypes.WORD * 4), - ("e_oemid", wintypes.WORD), - ("e_oeminfo", wintypes.WORD), - ("e_res2", wintypes.WORD * 10), - ("e_lfanew", wintypes.LONG), - ] - - -class IMAGE_FILE_HEADER(ctypes.Structure): - _fields_ = [ - ("Machine", wintypes.WORD), - ("NumberOfSections", wintypes.WORD), - ("TimeDateStamp", wintypes.DWORD), - ("PointerToSymbolTable", wintypes.DWORD), - ("NumberOfSymbols", wintypes.DWORD), - ("SizeOfOptionalHeader", wintypes.WORD), - ("Characteristics", wintypes.WORD), - ] - - -class IMAGE_SECTION_HEADER(ctypes.Structure): - class _Misc(ctypes.Union): - _fields_ = [("PhysicalAddress", wintypes.DWORD), ("VirtualSize", wintypes.DWORD)] - - _anonymous_ = ("Misc",) - _fields_ = [ - ("Name", wintypes.BYTE * IMAGE_SIZEOF_SHORT_NAME), - ("Misc", _Misc), - ("VirtualAddress", wintypes.DWORD), - ("SizeOfRawData", wintypes.DWORD), - ("PointerToRawData", wintypes.DWORD), - ("PointerToRelocations", wintypes.DWORD), - ("PointerToLinenumbers", wintypes.DWORD), - ("NumberOfRelocations", wintypes.WORD), - ("NumberOfLinenumbers", wintypes.WORD), - ("Characteristics", wintypes.DWORD), - ] - - -class SYSTEM_INFO(ctypes.Structure): - class _DUMMYUNIONNAME(ctypes.Union): - class _DUMMYSTRUCTNAME(ctypes.Structure): - _fields_ = [ - ("wProcessorArchitecture", wintypes.WORD), - ("wReserved", wintypes.WORD), - ] - - _fields_ = [("dwOemId", wintypes.DWORD), ("s", _DUMMYSTRUCTNAME)] - - _anonymous_ = ("u",) - _fields_ = [ - ("u", _DUMMYUNIONNAME), - ("dwPageSize", wintypes.DWORD), - ("lpMinimumApplicationAddress", wintypes.LPVOID), - ("lpMaximumApplicationAddress", wintypes.LPVOID), - ("dwActiveProcessorMask", ctypes.POINTER(wintypes.DWORD)), - ("dwNumberOfProcessors", wintypes.DWORD), - ("dwProcessorType", wintypes.DWORD), - ("dwAllocationGranularity", wintypes.DWORD), - ("wProcessorLevel", wintypes.WORD), - ("wProcessorRevision", wintypes.WORD), - ] - - -kernel32.OpenProcess.argtypes = [wintypes.DWORD, wintypes.BOOL, wintypes.DWORD] -kernel32.OpenProcess.restype = wintypes.HANDLE - -kernel32.ReadProcessMemory.argtypes = [ - wintypes.HANDLE, - wintypes.LPCVOID, - wintypes.LPVOID, - ctypes.c_size_t, - ctypes.POINTER(ctypes.c_size_t), -] -kernel32.ReadProcessMemory.restype = wintypes.BOOL - -kernel32.GetSystemInfo.argtypes = [ctypes.POINTER(SYSTEM_INFO)] -kernel32.GetSystemInfo.restype = None - - -psapi.EnumProcessModulesEx.argtypes = [ - wintypes.HANDLE, - ctypes.POINTER(wintypes.HMODULE), - wintypes.DWORD, - wintypes.LPDWORD, - wintypes.DWORD, -] -psapi.EnumProcessModulesEx.restype = wintypes.BOOL - -psapi.GetModuleFileNameExW.argtypes = [ - wintypes.HANDLE, - wintypes.HMODULE, - wintypes.LPWSTR, - wintypes.DWORD, -] -psapi.GetModuleFileNameExW.restype = wintypes.DWORD - -psapi.GetModuleInformation.argtypes = [ - wintypes.HANDLE, - wintypes.HMODULE, - ctypes.POINTER(MODULEINFO), - wintypes.DWORD, -] -psapi.GetModuleInformation.restype = wintypes.BOOL - -def hash_bytes_from_file(fileobj: BufferedReader, _bufsize: int = 2**18) -> str: - # Essentially implement hashlib.file_digest since it's python 3.11+ - # cf. https://github.com/python/cpython/blob/main/Lib/hashlib.py#L195 - digestobj = hashlib.sha1() - buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. - view = memoryview(buf) - while True: - size = fileobj.readinto(buf) - if size == 0: - break # EOF - digestobj.update(view[:size]) - return digestobj.hexdigest() +def _is_hashable_page(mbi: MEMORY_BASIC_INFORMATION32 | MEMORY_BASIC_INFORMATION64) -> bool: + if mbi.State != MEMORY_STATE.MEM_COMMIT: + return False + if mbi.Type != MEMORY_TYPES.MEM_IMAGE: + return False + if mbi.Protect & ( + MEMORY_PROTECTION.PAGE_GUARD + | MEMORY_PROTECTION.PAGE_WRITECOPY + | MEMORY_PROTECTION.PAGE_EXECUTE_WRITECOPY + ): + return False + if mbi.Protect & (MEMORY_PROTECTION.PAGE_READWRITE | MEMORY_PROTECTION.PAGE_EXECUTE_READWRITE): + return False + if not (mbi.Protect & (MEMORY_PROTECTION.PAGE_EXECUTE | MEMORY_PROTECTION.PAGE_EXECUTE_READ)): + return False + return True def _read_process_memory( @@ -190,175 +69,180 @@ def _read_process_memory( ) -> wintypes.BOOL: if size is None: size = ctypes.sizeof(out_obj) - res = kernel32.ReadProcessMemory( + res = ReadProcessMemory( process_handle, ctypes.c_void_p(address), ctypes.byref(out_obj), size, ctypes.byref(out_bytes_read), ) - if raise_on_err and not res or out_bytes_read.value != size: + if raise_on_err and (not res or out_bytes_read.value != size): raise OSError(f"Failed to read memory at 0x{address:X}: {ctypes.get_last_error()}") - return res def _get_page_size() -> int: sys_info = SYSTEM_INFO() - kernel32.GetSystemInfo(ctypes.byref(sys_info)) - return sys_info.dwPageSize - - -def hash_bytes_from_memory(binary_path: str, _bufsize: int = 2**18) -> str: - # Compute SHA-1 hash for the static parts of the given binary if it is currently running. - # "Static parts" here refers to areas without "WRITE" permissions (e.g.: .text, .rdata, etc) - pid = None - normalized_path = normcase(binary_path) - exe_name = basename(normalized_path).lower() - - # Find the PID of the process with the given binary path - for process in psutil.process_iter(["pid", "name", "exe"]): - try: - exe = process.info.get("exe", "") - name = (process.info.get("name", "")).lower() - if exe and normcase(exe) == normalized_path and name == exe_name: - pid = process.info["pid"] - break - except (psutil.NoSuchProcess, psutil.AccessDenied): + GetSystemInfo(ctypes.byref(sys_info)) + return sys_info.dwPageSize or 4096 + + +def _get_main_module(pm_binary: pymem.Pymem) -> MODULEINFO: + binary_path = psutil.Process(pm_binary.process_id).exe().lower() + binary_exe = os.path.basename(binary_path).lower() + + main_module = None + modules = list(pm_binary.list_modules()) + for module in modules: + if module.filename.lower() == binary_path or module.name.lower() == binary_exe: + main_module = module + break + if not main_module: + main_module = modules[0] # Usually the first module is the main module + # Maybe raising an error or returning `None` here instead would be safer + # raise OSError(f"Could not find main module for process {pid}") + + return main_module + + +def _get_sections_info(process_handle: wintypes.HANDLE, address: int) -> tuple[int, int]: + dos_header = IMAGE_DOS_HEADER() + _read_process_memory(process_handle, address, dos_header) + if dos_header.e_magic != IMAGE_DOS_SIGNATURE: + raise ValueError(f"Invalid DOS header magic for address 0x{address:X}") + + address += dos_header.e_lfanew + signature = wintypes.DWORD() + _read_process_memory(process_handle, address, signature) + if signature.value != IMAGE_NT_SIGNATURE: + raise ValueError(f"Invalid PE header signature for address 0x{address:X}") + + address += ctypes.sizeof(wintypes.DWORD) + file_header = IMAGE_FILE_HEADER() + _read_process_memory(process_handle, address, file_header) + + num_sections = int(file_header.NumberOfSections) + opt_header_size = int(file_header.SizeOfOptionalHeader) + sections_base = address + ctypes.sizeof(IMAGE_FILE_HEADER) + opt_header_size + + return sections_base, num_sections + + +def _get_read_only_sections( + process_handle: wintypes.HANDLE, + sections_base: int, + num_sections: int, + max_module_size: int, +): + sections = [] + for i in range(num_sections): + section_address = sections_base + i * ctypes.sizeof(IMAGE_SECTION_HEADER) + section_header = IMAGE_SECTION_HEADER() + _read_process_memory(process_handle, section_address, section_header) + + characteristics = section_header.Characteristics + if not (characteristics & IMAGE_SCN_MEM_EXECUTE) or (characteristics & IMAGE_SCN_MEM_WRITE): continue - if not pid: - raise ProcessLookupError(f"Could not find running process for {binary_path}") - - # Open the process with some basic read/query permissions - access = PROCESS_VM_READ | PROCESS_QUERY_INFORMATION | PROCESS_QUERY_LIMITED_INFORMATION - process_handle = kernel32.OpenProcess(access, False, pid) - if not process_handle: - raise OSError(f"Failed to open process {pid}: {ctypes.get_last_error()}") - - # Find the main module of the opened process - try: # This try/finally ensures we always close the process handle - modules = (wintypes.HMODULE * 256)() - bytes_needed = wintypes.DWORD() - res = psapi.EnumProcessModulesEx( - process_handle, modules, ctypes.sizeof(modules), ctypes.byref(bytes_needed), LIST_MODULES_ALL - ) - if not res: - raise OSError(f"Failed to enumerate process modules for {pid}: {ctypes.get_last_error()}") - - num_modules = min(bytes_needed.value // ctypes.sizeof(wintypes.HMODULE), 256) - main_module = None - for i in range(num_modules): - buffer = ctypes.create_unicode_buffer(1024) - psapi.GetModuleFileNameExW(process_handle, modules[i], buffer, ctypes.sizeof(buffer)) - module_path = buffer.value - if normcase(module_path) == normalized_path or basename(module_path).lower() == exe_name: - main_module = modules[i] - break - if not main_module: - main_module = modules[0] # Usually the first module is the main module - # If you want to be strict, maybe raise an error or return here instead - # raise OSError(f"Could not find main module for process {pid}") - - # Read module information - modinfo = MODULEINFO() - res = psapi.GetModuleInformation( - process_handle, main_module, ctypes.byref(modinfo), ctypes.sizeof(modinfo) - ) - if not res: - raise OSError(f"Failed to get module information for {pid}: {ctypes.get_last_error()}") - - base_address = int(modinfo.lpBaseOfDll) - module_size = int(modinfo.SizeOfImage) - - # Parse PE headers to find sections - dos_header = IMAGE_DOS_HEADER() - _read_process_memory(process_handle, base_address, dos_header) - if dos_header.e_magic != IMAGE_DOS_SIGNATURE: - raise ValueError(f"Invalid DOS header magic for process {pid}") - - signature = wintypes.DWORD() - _read_process_memory(process_handle, base_address + dos_header.e_lfanew, signature) - if signature.value != IMAGE_NT_SIGNATURE: - raise ValueError(f"Invalid PE header signature for process {pid}") - - header = IMAGE_FILE_HEADER() - _read_process_memory( - process_handle, base_address + dos_header.e_lfanew + ctypes.sizeof(wintypes.DWORD), header - ) + virtual_addr = int(section_header.VirtualAddress) + virtual_size = int(section_header.Misc.VirtualSize) or int(section_header.SizeOfRawData) + if virtual_addr == 0 or virtual_size == 0: + continue + + end_addr = min(virtual_addr + virtual_size, max_module_size) + if end_addr <= virtual_addr: + continue - num_sections = header.NumberOfSections - opt_header_size = header.SizeOfOptionalHeader - section_base = ( - base_address - + dos_header.e_lfanew - + ctypes.sizeof(wintypes.DWORD) - + ctypes.sizeof(IMAGE_FILE_HEADER) - + opt_header_size + section = ( + virtual_addr, + end_addr - virtual_addr, + bytes(bytearray(section_header.Name)).rstrip(b"\x00").decode(errors="ignore"), ) + sections.append(section) - # Build a list with all sections to be hashed - sections = [] - for i in range(num_sections): - section_header = IMAGE_SECTION_HEADER() - _read_process_memory( - process_handle, section_base + i * ctypes.sizeof(IMAGE_SECTION_HEADER), section_header - ) + return sections + + +def hash_bytes_from_file(fileobj: BufferedReader, _bufsize: int = 2**18) -> str: + # Essentially implement hashlib.file_digest since it's python 3.11+ + # cf. https://github.com/python/cpython/blob/main/Lib/hashlib.py#L195 + digestobj = hashlib.sha1() + buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. + view = memoryview(buf) + while True: + size = fileobj.readinto(buf) + if size == 0: + break # EOF + digestobj.update(view[:size]) + return digestobj.hexdigest() - characteristics = section_header.Characteristics - if characteristics & IMAGE_SCN_MEM_WRITE: - continue - virtual_address = int(section_header.VirtualAddress) - virtual_size = int(section_header.Misc.VirtualSize) or int(section_header.SizeOfRawData) - if virtual_address == 0 or virtual_size == 0: +def hash_bytes_from_memory(pm_binary: pymem.Pymem, _bufsize: int = 2**18) -> str: + process_handle = pm_binary.process_handle + pid = pm_binary.process_id + if not process_handle or not pid: + raise ValueError("Pymem instance does not have a valid process handle") + + main_module = _get_main_module(pm_binary) + if not main_module: + raise OSError(f"Could not find main module for process {pid}") + + base_address = main_module.lpBaseOfDll + module_size = main_module.SizeOfImage + if not base_address or not module_size: + raise OSError("Failed to resolve main module base/size") + + sections_base, num_sections = _get_sections_info(process_handle, base_address) + sections = _get_read_only_sections(process_handle, sections_base, num_sections, module_size) + if not sections: + raise ValueError("No read-only sections found in the main module") + sections.sort(key=lambda s: s[0]) + + page_size = _get_page_size() + digest = hashlib.sha1() + buffer = (ctypes.c_ubyte * _bufsize)() + bytes_read = ctypes.c_size_t() + for rva, size, name in sections: + start = base_address + rva + end = start + size + address = start + + while address < end: + page = MEMORY_BASIC_INFORMATION() + if not VirtualQueryEx( + process_handle, + ctypes.c_void_p(address), + ctypes.byref(page), + ctypes.sizeof(page), + ): + address += page_size continue - end_address = min(virtual_address + virtual_size, module_size) - if end_address <= virtual_address: + region_end = min(end, address + page.RegionSize) + if not _is_hashable_page(page): + address = region_end continue - section = ( - virtual_address, - end_address - virtual_address, - bytes(bytearray(section_header.Name)).rstrip(b"\x00").decode(errors="ignore"), - ) - sections.append(section) - - page_size = _get_page_size() - if not page_size: - page_size = 4096 # Usually Windows page size is 4KiB - - # Apply the hashing on all sections after sorting by address to ensure consistency - if not sections: - raise ValueError(f"No valid sections found to hash for process {pid}") - - sections.sort(key=lambda s: s[0]) - - digest = hashlib.sha1() - buffer = (ctypes.c_ubyte * _bufsize)() - bytes_read = ctypes.c_size_t() - for rva, size, name in sections: - offset = 0 - while offset < size: - to_read = min(_bufsize, size - offset) + current = address + while current < region_end: + to_read = min(_bufsize, region_end - current) res = _read_process_memory( process_handle, - base_address + rva + offset, + current, buffer, to_read, bytes_read, raise_on_err=False, ) if not res or bytes_read.value == 0: - offset += page_size + current = (current + page_size) & ~(page_size - 1) + if current < address: + current = address + page_size continue - mem_view = memoryview(buffer)[: bytes_read.value] - digest.update(mem_view) - offset += bytes_read.value + digest.update(memoryview(buffer)[: bytes_read.value]) + current += bytes_read.value - return digest.hexdigest() + address = region_end - finally: - kernel32.CloseHandle(process_handle) + return digest.hexdigest() diff --git a/pymhf/main.py b/pymhf/main.py index 194a7c3..e8a1a7b 100644 --- a/pymhf/main.py +++ b/pymhf/main.py @@ -324,7 +324,7 @@ def kill_injected_code(loop: asyncio.AbstractEventLoop): binary_hash = hash_bytes_from_file(f) except PermissionError: print(f"Cannot open {binary_path!r} to hash it. Trying to read from memory...") - binary_hash = hash_bytes_from_memory(binary_path) + binary_hash = hash_bytes_from_memory(pm_binary) print(f"Exe hash is: {binary_hash}") else: binary_hash = 0 diff --git a/pymhf/utils/winapi.py b/pymhf/utils/winapi.py index 0bb931c..510ed39 100644 --- a/pymhf/utils/winapi.py +++ b/pymhf/utils/winapi.py @@ -7,6 +7,80 @@ import pymem import pymem.ressources.structure +MAX_EXE_NAME_SIZE = 1024 +WS_EX_LAYERED = 0x00080000 # layered window +GWL_EXSTYLE = -20 # "extended window style" + +LWA_COLORKEY = 0x00000001 +LWA_ALPHA = 0x00000002 + +IMAGE_DOS_SIGNATURE = 0x5A4D +IMAGE_NT_SIGNATURE = 0x00004550 + +IMAGE_SIZEOF_SHORT_NAME = 8 + +IMAGE_SCN_MEM_WRITE = 0x80000000 +IMAGE_SCN_MEM_EXECUTE = 0x20000000 + + +class IMAGE_DOS_HEADER(ctypes.Structure): + _fields_ = [ + ("e_magic", wintypes.WORD), + ("e_cblp", wintypes.WORD), + ("e_cp", wintypes.WORD), + ("e_crlc", wintypes.WORD), + ("e_cparhdr", wintypes.WORD), + ("e_minalloc", wintypes.WORD), + ("e_maxalloc", wintypes.WORD), + ("e_ss", wintypes.WORD), + ("e_sp", wintypes.WORD), + ("e_csum", wintypes.WORD), + ("e_ip", wintypes.WORD), + ("e_cs", wintypes.WORD), + ("e_lfarlc", wintypes.WORD), + ("e_ovno", wintypes.WORD), + ("e_res", wintypes.WORD * 4), + ("e_oemid", wintypes.WORD), + ("e_oeminfo", wintypes.WORD), + ("e_res2", wintypes.WORD * 10), + ("e_lfanew", wintypes.LONG), + ] + + +class IMAGE_FILE_HEADER(ctypes.Structure): + _fields_ = [ + ("Machine", wintypes.WORD), + ("NumberOfSections", wintypes.WORD), + ("TimeDateStamp", wintypes.DWORD), + ("PointerToSymbolTable", wintypes.DWORD), + ("NumberOfSymbols", wintypes.DWORD), + ("SizeOfOptionalHeader", wintypes.WORD), + ("Characteristics", wintypes.WORD), + ] + + +class IMAGE_SECTION_HEADER(ctypes.Structure): + class _Misc(ctypes.Union): + _fields_ = [ + ("PhysicalAddress", wintypes.DWORD), + ("VirtualSize", wintypes.DWORD), + ] + + _anonymous_ = ("Misc",) + _fields_ = [ + ("Name", wintypes.BYTE * IMAGE_SIZEOF_SHORT_NAME), + ("Misc", _Misc), + ("VirtualAddress", wintypes.DWORD), + ("SizeOfRawData", wintypes.DWORD), + ("PointerToRawData", wintypes.DWORD), + ("PointerToRelocations", wintypes.DWORD), + ("PointerToLinenumbers", wintypes.DWORD), + ("NumberOfRelocations", wintypes.WORD), + ("NumberOfLinenumbers", wintypes.WORD), + ("Characteristics", wintypes.DWORD), + ] + + GetModuleFileNameExA = ctypes.windll.psapi.GetModuleFileNameExA GetModuleFileNameExA.restype = wintypes.DWORD GetModuleFileNameExA.argtypes = [ @@ -58,12 +132,30 @@ VirtualQuery.restype = ctypes.c_size_t -MAX_EXE_NAME_SIZE = 1024 -WS_EX_LAYERED = 0x00080000 # layered window -GWL_EXSTYLE = -20 # "extended window style" +ReadProcessMemory = ctypes.windll.kernel32.ReadProcessMemory +ReadProcessMemory.argtypes = [ + wintypes.HANDLE, + wintypes.LPCVOID, + wintypes.LPVOID, + ctypes.c_size_t, + ctypes.POINTER(ctypes.c_size_t), +] +ReadProcessMemory.restype = wintypes.BOOL -LWA_COLORKEY = 0x00000001 -LWA_ALPHA = 0x00000002 + +GetSystemInfo = ctypes.windll.kernel32.GetSystemInfo +GetSystemInfo.argtypes = [ctypes.POINTER(pymem.ressources.structure.SYSTEM_INFO)] +GetSystemInfo.restype = None + + +VirtualQueryEx = ctypes.windll.kernel32.VirtualQueryEx +VirtualQueryEx.argtypes = [ + wintypes.HANDLE, + wintypes.LPCVOID, + ctypes.POINTER(pymem.ressources.structure.MEMORY_BASIC_INFORMATION), + ctypes.c_size_t, +] +VirtualQueryEx.restype = ctypes.c_size_t def get_exe_path_from_pid(proc: pymem.Pymem) -> str: From b04e56ed96fcf2d6f7e3ce0807f736dd1270d1b9 Mon Sep 17 00:00:00 2001 From: samjviana Date: Sun, 21 Sep 2025 09:59:32 -0300 Subject: [PATCH 03/12] stopped using `ReadProcessMemory` in favor of `Pymem.read_bytes` --- pymhf/core/hashing.py | 51 +++++++++++++++++++++++-------------------- pymhf/utils/winapi.py | 11 ---------- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/pymhf/core/hashing.py b/pymhf/core/hashing.py index 03ba95e..40bf668 100644 --- a/pymhf/core/hashing.py +++ b/pymhf/core/hashing.py @@ -27,7 +27,6 @@ IMAGE_SCN_MEM_WRITE, IMAGE_SECTION_HEADER, GetSystemInfo, - ReadProcessMemory, VirtualQueryEx, ) @@ -59,26 +58,30 @@ def _is_hashable_page(mbi: MEMORY_BASIC_INFORMATION32 | MEMORY_BASIC_INFORMATION return True -def _read_process_memory( - process_handle: wintypes.HANDLE, +def _read_bytes_into( + pm_binary: pymem.Pymem, address: int, out_obj: CDataLike, size: int | None = None, out_bytes_read: ctypes.c_size_t = ctypes.c_size_t(), raise_on_err: bool = True, -) -> wintypes.BOOL: +) -> bool: if size is None: size = ctypes.sizeof(out_obj) - res = ReadProcessMemory( - process_handle, - ctypes.c_void_p(address), - ctypes.byref(out_obj), - size, - ctypes.byref(out_bytes_read), - ) - if raise_on_err and (not res or out_bytes_read.value != size): - raise OSError(f"Failed to read memory at 0x{address:X}: {ctypes.get_last_error()}") - return res + + try: + data = pm_binary.read_bytes(address, size) + + buffer = (ctypes.c_char * len(data)).from_buffer_copy(data) + ctypes.memmove(ctypes.byref(out_obj), buffer, len(data)) + + out_bytes_read.value = len(data) + return True + except Exception as e: + out_bytes_read.value = 0 + if raise_on_err: + raise OSError(f"Failed to read memory at 0x{address:X}") from e + return False def _get_page_size() -> int: @@ -105,21 +108,21 @@ def _get_main_module(pm_binary: pymem.Pymem) -> MODULEINFO: return main_module -def _get_sections_info(process_handle: wintypes.HANDLE, address: int) -> tuple[int, int]: +def _get_sections_info(pm_binary: pymem.Pymem, address: int) -> tuple[int, int]: dos_header = IMAGE_DOS_HEADER() - _read_process_memory(process_handle, address, dos_header) + _read_bytes_into(pm_binary, address, dos_header) if dos_header.e_magic != IMAGE_DOS_SIGNATURE: raise ValueError(f"Invalid DOS header magic for address 0x{address:X}") address += dos_header.e_lfanew signature = wintypes.DWORD() - _read_process_memory(process_handle, address, signature) + _read_bytes_into(pm_binary, address, signature) if signature.value != IMAGE_NT_SIGNATURE: raise ValueError(f"Invalid PE header signature for address 0x{address:X}") address += ctypes.sizeof(wintypes.DWORD) file_header = IMAGE_FILE_HEADER() - _read_process_memory(process_handle, address, file_header) + _read_bytes_into(pm_binary, address, file_header) num_sections = int(file_header.NumberOfSections) opt_header_size = int(file_header.SizeOfOptionalHeader) @@ -129,7 +132,7 @@ def _get_sections_info(process_handle: wintypes.HANDLE, address: int) -> tuple[i def _get_read_only_sections( - process_handle: wintypes.HANDLE, + pm_binary: pymem.Pymem, sections_base: int, num_sections: int, max_module_size: int, @@ -138,7 +141,7 @@ def _get_read_only_sections( for i in range(num_sections): section_address = sections_base + i * ctypes.sizeof(IMAGE_SECTION_HEADER) section_header = IMAGE_SECTION_HEADER() - _read_process_memory(process_handle, section_address, section_header) + _read_bytes_into(pm_binary, section_address, section_header) characteristics = section_header.Characteristics if not (characteristics & IMAGE_SCN_MEM_EXECUTE) or (characteristics & IMAGE_SCN_MEM_WRITE): @@ -192,8 +195,8 @@ def hash_bytes_from_memory(pm_binary: pymem.Pymem, _bufsize: int = 2**18) -> str if not base_address or not module_size: raise OSError("Failed to resolve main module base/size") - sections_base, num_sections = _get_sections_info(process_handle, base_address) - sections = _get_read_only_sections(process_handle, sections_base, num_sections, module_size) + sections_base, num_sections = _get_sections_info(pm_binary, base_address) + sections = _get_read_only_sections(pm_binary, sections_base, num_sections, module_size) if not sections: raise ValueError("No read-only sections found in the main module") sections.sort(key=lambda s: s[0]) @@ -226,8 +229,8 @@ def hash_bytes_from_memory(pm_binary: pymem.Pymem, _bufsize: int = 2**18) -> str current = address while current < region_end: to_read = min(_bufsize, region_end - current) - res = _read_process_memory( - process_handle, + res = _read_bytes_into( + pm_binary, current, buffer, to_read, diff --git a/pymhf/utils/winapi.py b/pymhf/utils/winapi.py index 510ed39..cba030e 100644 --- a/pymhf/utils/winapi.py +++ b/pymhf/utils/winapi.py @@ -132,17 +132,6 @@ class _Misc(ctypes.Union): VirtualQuery.restype = ctypes.c_size_t -ReadProcessMemory = ctypes.windll.kernel32.ReadProcessMemory -ReadProcessMemory.argtypes = [ - wintypes.HANDLE, - wintypes.LPCVOID, - wintypes.LPVOID, - ctypes.c_size_t, - ctypes.POINTER(ctypes.c_size_t), -] -ReadProcessMemory.restype = wintypes.BOOL - - GetSystemInfo = ctypes.windll.kernel32.GetSystemInfo GetSystemInfo.argtypes = [ctypes.POINTER(pymem.ressources.structure.SYSTEM_INFO)] GetSystemInfo.restype = None From 9d7cef0e3edc7e2a5dd82d8e7c94f9b18c03f4cb Mon Sep 17 00:00:00 2001 From: samjviana Date: Sun, 21 Sep 2025 10:10:14 -0300 Subject: [PATCH 04/12] added a few inline docs --- pymhf/core/hashing.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pymhf/core/hashing.py b/pymhf/core/hashing.py index 40bf668..30e4baf 100644 --- a/pymhf/core/hashing.py +++ b/pymhf/core/hashing.py @@ -41,6 +41,8 @@ def _is_hashable_page(mbi: MEMORY_BASIC_INFORMATION32 | MEMORY_BASIC_INFORMATION64) -> bool: + """Check if a memory page is suitable for hashing. The page must not change during runtime and/or + between runs.""" if mbi.State != MEMORY_STATE.MEM_COMMIT: return False if mbi.Type != MEMORY_TYPES.MEM_IMAGE: @@ -66,6 +68,7 @@ def _read_bytes_into( out_bytes_read: ctypes.c_size_t = ctypes.c_size_t(), raise_on_err: bool = True, ) -> bool: + """Read bytes from the process memory into a `ctypes` object.""" if size is None: size = ctypes.sizeof(out_obj) @@ -85,6 +88,7 @@ def _read_bytes_into( def _get_page_size() -> int: + """Get the system page size. Defaults to 4096 if it cannot be determined.""" sys_info = SYSTEM_INFO() GetSystemInfo(ctypes.byref(sys_info)) return sys_info.dwPageSize or 4096 @@ -109,6 +113,7 @@ def _get_main_module(pm_binary: pymem.Pymem) -> MODULEINFO: def _get_sections_info(pm_binary: pymem.Pymem, address: int) -> tuple[int, int]: + """Get the base address and number of sections in the PE file at the given address.""" dos_header = IMAGE_DOS_HEADER() _read_bytes_into(pm_binary, address, dos_header) if dos_header.e_magic != IMAGE_DOS_SIGNATURE: @@ -137,6 +142,7 @@ def _get_read_only_sections( num_sections: int, max_module_size: int, ): + """Get a list of read-only sections in the PE file at the given address.""" sections = [] for i in range(num_sections): section_address = sections_base + i * ctypes.sizeof(IMAGE_SECTION_HEADER) @@ -181,6 +187,9 @@ def hash_bytes_from_file(fileobj: BufferedReader, _bufsize: int = 2**18) -> str: def hash_bytes_from_memory(pm_binary: pymem.Pymem, _bufsize: int = 2**18) -> str: + """Hash the bytes of the main module of the given `pymem.Pymem` instance. + In order to ensure that the hash is stable across runs, this only read from sections that are not expected + to change between runs.""" process_handle = pm_binary.process_handle pid = pm_binary.process_id if not process_handle or not pid: From 45933dbc9a415c4fdabe3ad83be6e6b322cd8168 Mon Sep 17 00:00:00 2001 From: samjviana Date: Sun, 21 Sep 2025 17:31:10 -0300 Subject: [PATCH 05/12] changed a import to `typing_extensions` instead of `typing` --- pymhf/core/hashing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymhf/core/hashing.py b/pymhf/core/hashing.py index 30e4baf..6217cc1 100644 --- a/pymhf/core/hashing.py +++ b/pymhf/core/hashing.py @@ -3,7 +3,6 @@ import hashlib import os from io import BufferedReader -from typing import TYPE_CHECKING, Any, TypeAlias import psutil import pymem @@ -17,6 +16,7 @@ MODULEINFO, SYSTEM_INFO, ) +from typing_extensions import TYPE_CHECKING, Any, TypeAlias from pymhf.utils.winapi import ( IMAGE_DOS_HEADER, From 694253cab5470eaaa35592b9d3e7b12de0a727ab Mon Sep 17 00:00:00 2001 From: samjviana Date: Sun, 21 Sep 2025 22:41:17 -0300 Subject: [PATCH 06/12] fixed some python 3.9 incompatibilities --- pymhf/core/hashing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pymhf/core/hashing.py b/pymhf/core/hashing.py index 6217cc1..b8ee71b 100644 --- a/pymhf/core/hashing.py +++ b/pymhf/core/hashing.py @@ -16,7 +16,7 @@ MODULEINFO, SYSTEM_INFO, ) -from typing_extensions import TYPE_CHECKING, Any, TypeAlias +from typing_extensions import TYPE_CHECKING, Any, TypeAlias, Union from pymhf.utils.winapi import ( IMAGE_DOS_HEADER, @@ -40,7 +40,7 @@ CDataLike = Any -def _is_hashable_page(mbi: MEMORY_BASIC_INFORMATION32 | MEMORY_BASIC_INFORMATION64) -> bool: +def _is_hashable_page(mbi: Union[MEMORY_BASIC_INFORMATION32, MEMORY_BASIC_INFORMATION64]) -> bool: """Check if a memory page is suitable for hashing. The page must not change during runtime and/or between runs.""" if mbi.State != MEMORY_STATE.MEM_COMMIT: @@ -64,7 +64,7 @@ def _read_bytes_into( pm_binary: pymem.Pymem, address: int, out_obj: CDataLike, - size: int | None = None, + size: Union[int, None] = None, out_bytes_read: ctypes.c_size_t = ctypes.c_size_t(), raise_on_err: bool = True, ) -> bool: From 4615ada58eb9728a35103cfb49427e8e9b7f6dcb Mon Sep 17 00:00:00 2001 From: samjviana Date: Sun, 21 Sep 2025 22:49:10 -0300 Subject: [PATCH 07/12] ci: trigger From 505efe13b0640690b41ef0ee76d3827c5afadda6 Mon Sep 17 00:00:00 2001 From: samjviana Date: Sun, 28 Sep 2025 17:44:02 -0300 Subject: [PATCH 08/12] changed pipeline to use `Pymem`'s `read_ctype` and `read_bytes` intead of `_read_bytes_into` --- pymhf/core/hashing.py | 69 +++++++------------------------------------ 1 file changed, 10 insertions(+), 59 deletions(-) diff --git a/pymhf/core/hashing.py b/pymhf/core/hashing.py index b8ee71b..8402338 100644 --- a/pymhf/core/hashing.py +++ b/pymhf/core/hashing.py @@ -16,7 +16,7 @@ MODULEINFO, SYSTEM_INFO, ) -from typing_extensions import TYPE_CHECKING, Any, TypeAlias, Union +from typing_extensions import Union, cast from pymhf.utils.winapi import ( IMAGE_DOS_HEADER, @@ -30,15 +30,6 @@ VirtualQueryEx, ) -if TYPE_CHECKING: - from ctypes import _CData, _Pointer, _SimpleCData - - CDataLike: TypeAlias = ( - _CData | _SimpleCData | _Pointer[Any] | ctypes.Structure | ctypes.Union | ctypes.Array[Any] - ) -else: - CDataLike = Any - def _is_hashable_page(mbi: Union[MEMORY_BASIC_INFORMATION32, MEMORY_BASIC_INFORMATION64]) -> bool: """Check if a memory page is suitable for hashing. The page must not change during runtime and/or @@ -60,33 +51,6 @@ def _is_hashable_page(mbi: Union[MEMORY_BASIC_INFORMATION32, MEMORY_BASIC_INFORM return True -def _read_bytes_into( - pm_binary: pymem.Pymem, - address: int, - out_obj: CDataLike, - size: Union[int, None] = None, - out_bytes_read: ctypes.c_size_t = ctypes.c_size_t(), - raise_on_err: bool = True, -) -> bool: - """Read bytes from the process memory into a `ctypes` object.""" - if size is None: - size = ctypes.sizeof(out_obj) - - try: - data = pm_binary.read_bytes(address, size) - - buffer = (ctypes.c_char * len(data)).from_buffer_copy(data) - ctypes.memmove(ctypes.byref(out_obj), buffer, len(data)) - - out_bytes_read.value = len(data) - return True - except Exception as e: - out_bytes_read.value = 0 - if raise_on_err: - raise OSError(f"Failed to read memory at 0x{address:X}") from e - return False - - def _get_page_size() -> int: """Get the system page size. Defaults to 4096 if it cannot be determined.""" sys_info = SYSTEM_INFO() @@ -111,23 +75,19 @@ def _get_main_module(pm_binary: pymem.Pymem) -> MODULEINFO: return main_module - def _get_sections_info(pm_binary: pymem.Pymem, address: int) -> tuple[int, int]: """Get the base address and number of sections in the PE file at the given address.""" - dos_header = IMAGE_DOS_HEADER() - _read_bytes_into(pm_binary, address, dos_header) + dos_header = cast(IMAGE_DOS_HEADER, pm_binary.read_ctype(address, IMAGE_DOS_HEADER())) if dos_header.e_magic != IMAGE_DOS_SIGNATURE: raise ValueError(f"Invalid DOS header magic for address 0x{address:X}") address += dos_header.e_lfanew - signature = wintypes.DWORD() - _read_bytes_into(pm_binary, address, signature) - if signature.value != IMAGE_NT_SIGNATURE: + signature = pm_binary.read_ctype(address, wintypes.DWORD()) + if signature != IMAGE_NT_SIGNATURE: raise ValueError(f"Invalid PE header signature for address 0x{address:X}") address += ctypes.sizeof(wintypes.DWORD) - file_header = IMAGE_FILE_HEADER() - _read_bytes_into(pm_binary, address, file_header) + file_header = cast(IMAGE_FILE_HEADER, pm_binary.read_ctype(address, IMAGE_FILE_HEADER())) num_sections = int(file_header.NumberOfSections) opt_header_size = int(file_header.SizeOfOptionalHeader) @@ -146,8 +106,7 @@ def _get_read_only_sections( sections = [] for i in range(num_sections): section_address = sections_base + i * ctypes.sizeof(IMAGE_SECTION_HEADER) - section_header = IMAGE_SECTION_HEADER() - _read_bytes_into(pm_binary, section_address, section_header) + section_header = cast(IMAGE_SECTION_HEADER, pm_binary.read_ctype(section_address, IMAGE_SECTION_HEADER())) characteristics = section_header.Characteristics if not (characteristics & IMAGE_SCN_MEM_EXECUTE) or (characteristics & IMAGE_SCN_MEM_WRITE): @@ -213,7 +172,6 @@ def hash_bytes_from_memory(pm_binary: pymem.Pymem, _bufsize: int = 2**18) -> str page_size = _get_page_size() digest = hashlib.sha1() buffer = (ctypes.c_ubyte * _bufsize)() - bytes_read = ctypes.c_size_t() for rva, size, name in sections: start = base_address + rva end = start + size @@ -238,22 +196,15 @@ def hash_bytes_from_memory(pm_binary: pymem.Pymem, _bufsize: int = 2**18) -> str current = address while current < region_end: to_read = min(_bufsize, region_end - current) - res = _read_bytes_into( - pm_binary, - current, - buffer, - to_read, - bytes_read, - raise_on_err=False, - ) - if not res or bytes_read.value == 0: + buffer = pm_binary.read_bytes(current, to_read) + if len(buffer) == 0: current = (current + page_size) & ~(page_size - 1) if current < address: current = address + page_size continue - digest.update(memoryview(buffer)[: bytes_read.value]) - current += bytes_read.value + digest.update(memoryview(buffer)[:len(buffer)]) + current += len(buffer) address = region_end From 0770b0f9203e01efc100f69d39867875a7cabee0 Mon Sep 17 00:00:00 2001 From: samjviana Date: Sun, 28 Sep 2025 17:45:58 -0300 Subject: [PATCH 09/12] just organized some imports --- pymhf/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymhf/main.py b/pymhf/main.py index e8a1a7b..3bada47 100644 --- a/pymhf/main.py +++ b/pymhf/main.py @@ -18,11 +18,11 @@ import pyrun_injected.dllinject as dllinject from pymhf.core._types import LoadTypeEnum, pymhfConfig +from pymhf.core.hashing import hash_bytes_from_file, hash_bytes_from_memory from pymhf.core.importing import parse_file_for_mod from pymhf.core.log_handling import open_log_console from pymhf.core.process import start_process from pymhf.core.protocols import ESCAPE_SEQUENCE, TerminalProtocol -from pymhf.core.hashing import hash_bytes_from_file, hash_bytes_from_memory from pymhf.utils.config import canonicalize_setting from pymhf.utils.parse_toml import read_pymhf_settings from pymhf.utils.winapi import get_exe_path_from_pid From d99b42a1d5d18c5150ab2c8612042e4e0e53e5da Mon Sep 17 00:00:00 2001 From: samjviana Date: Sun, 28 Sep 2025 17:48:33 -0300 Subject: [PATCH 10/12] fixed some linting problems --- pymhf/core/hashing.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pymhf/core/hashing.py b/pymhf/core/hashing.py index 8402338..9b031e9 100644 --- a/pymhf/core/hashing.py +++ b/pymhf/core/hashing.py @@ -75,9 +75,10 @@ def _get_main_module(pm_binary: pymem.Pymem) -> MODULEINFO: return main_module + def _get_sections_info(pm_binary: pymem.Pymem, address: int) -> tuple[int, int]: """Get the base address and number of sections in the PE file at the given address.""" - dos_header = cast(IMAGE_DOS_HEADER, pm_binary.read_ctype(address, IMAGE_DOS_HEADER())) + dos_header = cast(IMAGE_DOS_HEADER, pm_binary.read_ctype(address, IMAGE_DOS_HEADER())) if dos_header.e_magic != IMAGE_DOS_SIGNATURE: raise ValueError(f"Invalid DOS header magic for address 0x{address:X}") @@ -106,7 +107,9 @@ def _get_read_only_sections( sections = [] for i in range(num_sections): section_address = sections_base + i * ctypes.sizeof(IMAGE_SECTION_HEADER) - section_header = cast(IMAGE_SECTION_HEADER, pm_binary.read_ctype(section_address, IMAGE_SECTION_HEADER())) + section_header = cast( + IMAGE_SECTION_HEADER, pm_binary.read_ctype(section_address, IMAGE_SECTION_HEADER()) + ) characteristics = section_header.Characteristics if not (characteristics & IMAGE_SCN_MEM_EXECUTE) or (characteristics & IMAGE_SCN_MEM_WRITE): @@ -203,7 +206,7 @@ def hash_bytes_from_memory(pm_binary: pymem.Pymem, _bufsize: int = 2**18) -> str current = address + page_size continue - digest.update(memoryview(buffer)[:len(buffer)]) + digest.update(memoryview(buffer)[: len(buffer)]) current += len(buffer) address = region_end From a8351143752e5e646de69fa55915c30a34e76888 Mon Sep 17 00:00:00 2001 From: samjviana Date: Mon, 29 Sep 2025 19:50:40 -0300 Subject: [PATCH 11/12] updated the changelog --- docs/docs/change_log.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/docs/change_log.rst b/docs/docs/change_log.rst index 4e15066..68d5351 100644 --- a/docs/docs/change_log.rst +++ b/docs/docs/change_log.rst @@ -5,6 +5,7 @@ Current (0.1.17.dev) -------------------- - Further improved partial structs to allow nesting references to themselves as a type (must be "indirect", ie. the type of a pointer, or dynamic array for example). +- Added a fallback method to calculate the binary hash in case opening the file fails. Thanks to [@sparrow](https://github.com/samjviana) for impleenting this. 0.1.16 (16/08/2025) ------------------- From 94b52627bf2f81c22bb83109f4e980affeadb460 Mon Sep 17 00:00:00 2001 From: samjviana Date: Mon, 29 Sep 2025 20:00:42 -0300 Subject: [PATCH 12/12] typo fix --- docs/docs/change_log.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/change_log.rst b/docs/docs/change_log.rst index 68d5351..7dfb8ab 100644 --- a/docs/docs/change_log.rst +++ b/docs/docs/change_log.rst @@ -5,7 +5,7 @@ Current (0.1.17.dev) -------------------- - Further improved partial structs to allow nesting references to themselves as a type (must be "indirect", ie. the type of a pointer, or dynamic array for example). -- Added a fallback method to calculate the binary hash in case opening the file fails. Thanks to [@sparrow](https://github.com/samjviana) for impleenting this. +- Added a fallback method to calculate the binary hash in case opening the file fails. Thanks to [@sparrow](https://github.com/samjviana) for implementing this. 0.1.16 (16/08/2025) -------------------