Skip to content

Commit 324cd73

Browse files
authored
Merge pull request #85 from samjviana/master
Add "ctypes-based" fallback to the hashing by reading the non-writable sections of the running game
2 parents 0828f59 + 94b5262 commit 324cd73

File tree

5 files changed

+308
-24
lines changed

5 files changed

+308
-24
lines changed

docs/docs/change_log.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Current (0.1.17.dev)
55
--------------------
66

77
- Further improved partial structs to allow nesting references to themselves as a type (must be "indirect", ie. the type of a pointer, or dynamic array for example).
8+
- Added a fallback method to calculate the binary hash in case opening the file fails. Thanks to [@sparrow](https://github.com/samjviana) for implementing this.
89

910
0.1.16 (16/08/2025)
1011
-------------------

pymhf/core/hashing.py

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
import ctypes
2+
import ctypes.wintypes as wintypes
3+
import hashlib
4+
import os
5+
from io import BufferedReader
6+
7+
import psutil
8+
import pymem
9+
from pymem.ressources.structure import (
10+
MEMORY_BASIC_INFORMATION,
11+
MEMORY_BASIC_INFORMATION32,
12+
MEMORY_BASIC_INFORMATION64,
13+
MEMORY_PROTECTION,
14+
MEMORY_STATE,
15+
MEMORY_TYPES,
16+
MODULEINFO,
17+
SYSTEM_INFO,
18+
)
19+
from typing_extensions import Union, cast
20+
21+
from pymhf.utils.winapi import (
22+
IMAGE_DOS_HEADER,
23+
IMAGE_DOS_SIGNATURE,
24+
IMAGE_FILE_HEADER,
25+
IMAGE_NT_SIGNATURE,
26+
IMAGE_SCN_MEM_EXECUTE,
27+
IMAGE_SCN_MEM_WRITE,
28+
IMAGE_SECTION_HEADER,
29+
GetSystemInfo,
30+
VirtualQueryEx,
31+
)
32+
33+
34+
def _is_hashable_page(mbi: Union[MEMORY_BASIC_INFORMATION32, MEMORY_BASIC_INFORMATION64]) -> bool:
35+
"""Check if a memory page is suitable for hashing. The page must not change during runtime and/or
36+
between runs."""
37+
if mbi.State != MEMORY_STATE.MEM_COMMIT:
38+
return False
39+
if mbi.Type != MEMORY_TYPES.MEM_IMAGE:
40+
return False
41+
if mbi.Protect & (
42+
MEMORY_PROTECTION.PAGE_GUARD
43+
| MEMORY_PROTECTION.PAGE_WRITECOPY
44+
| MEMORY_PROTECTION.PAGE_EXECUTE_WRITECOPY
45+
):
46+
return False
47+
if mbi.Protect & (MEMORY_PROTECTION.PAGE_READWRITE | MEMORY_PROTECTION.PAGE_EXECUTE_READWRITE):
48+
return False
49+
if not (mbi.Protect & (MEMORY_PROTECTION.PAGE_EXECUTE | MEMORY_PROTECTION.PAGE_EXECUTE_READ)):
50+
return False
51+
return True
52+
53+
54+
def _get_page_size() -> int:
55+
"""Get the system page size. Defaults to 4096 if it cannot be determined."""
56+
sys_info = SYSTEM_INFO()
57+
GetSystemInfo(ctypes.byref(sys_info))
58+
return sys_info.dwPageSize or 4096
59+
60+
61+
def _get_main_module(pm_binary: pymem.Pymem) -> MODULEINFO:
62+
binary_path = psutil.Process(pm_binary.process_id).exe().lower()
63+
binary_exe = os.path.basename(binary_path).lower()
64+
65+
main_module = None
66+
modules = list(pm_binary.list_modules())
67+
for module in modules:
68+
if module.filename.lower() == binary_path or module.name.lower() == binary_exe:
69+
main_module = module
70+
break
71+
if not main_module:
72+
main_module = modules[0] # Usually the first module is the main module
73+
# Maybe raising an error or returning `None` here instead would be safer
74+
# raise OSError(f"Could not find main module for process {pid}")
75+
76+
return main_module
77+
78+
79+
def _get_sections_info(pm_binary: pymem.Pymem, address: int) -> tuple[int, int]:
80+
"""Get the base address and number of sections in the PE file at the given address."""
81+
dos_header = cast(IMAGE_DOS_HEADER, pm_binary.read_ctype(address, IMAGE_DOS_HEADER()))
82+
if dos_header.e_magic != IMAGE_DOS_SIGNATURE:
83+
raise ValueError(f"Invalid DOS header magic for address 0x{address:X}")
84+
85+
address += dos_header.e_lfanew
86+
signature = pm_binary.read_ctype(address, wintypes.DWORD())
87+
if signature != IMAGE_NT_SIGNATURE:
88+
raise ValueError(f"Invalid PE header signature for address 0x{address:X}")
89+
90+
address += ctypes.sizeof(wintypes.DWORD)
91+
file_header = cast(IMAGE_FILE_HEADER, pm_binary.read_ctype(address, IMAGE_FILE_HEADER()))
92+
93+
num_sections = int(file_header.NumberOfSections)
94+
opt_header_size = int(file_header.SizeOfOptionalHeader)
95+
sections_base = address + ctypes.sizeof(IMAGE_FILE_HEADER) + opt_header_size
96+
97+
return sections_base, num_sections
98+
99+
100+
def _get_read_only_sections(
101+
pm_binary: pymem.Pymem,
102+
sections_base: int,
103+
num_sections: int,
104+
max_module_size: int,
105+
):
106+
"""Get a list of read-only sections in the PE file at the given address."""
107+
sections = []
108+
for i in range(num_sections):
109+
section_address = sections_base + i * ctypes.sizeof(IMAGE_SECTION_HEADER)
110+
section_header = cast(
111+
IMAGE_SECTION_HEADER, pm_binary.read_ctype(section_address, IMAGE_SECTION_HEADER())
112+
)
113+
114+
characteristics = section_header.Characteristics
115+
if not (characteristics & IMAGE_SCN_MEM_EXECUTE) or (characteristics & IMAGE_SCN_MEM_WRITE):
116+
continue
117+
118+
virtual_addr = int(section_header.VirtualAddress)
119+
virtual_size = int(section_header.Misc.VirtualSize) or int(section_header.SizeOfRawData)
120+
if virtual_addr == 0 or virtual_size == 0:
121+
continue
122+
123+
end_addr = min(virtual_addr + virtual_size, max_module_size)
124+
if end_addr <= virtual_addr:
125+
continue
126+
127+
section = (
128+
virtual_addr,
129+
end_addr - virtual_addr,
130+
bytes(bytearray(section_header.Name)).rstrip(b"\x00").decode(errors="ignore"),
131+
)
132+
sections.append(section)
133+
134+
return sections
135+
136+
137+
def hash_bytes_from_file(fileobj: BufferedReader, _bufsize: int = 2**18) -> str:
138+
# Essentially implement hashlib.file_digest since it's python 3.11+
139+
# cf. https://github.com/python/cpython/blob/main/Lib/hashlib.py#L195
140+
digestobj = hashlib.sha1()
141+
buf = bytearray(_bufsize) # Reusable buffer to reduce allocations.
142+
view = memoryview(buf)
143+
while True:
144+
size = fileobj.readinto(buf)
145+
if size == 0:
146+
break # EOF
147+
digestobj.update(view[:size])
148+
return digestobj.hexdigest()
149+
150+
151+
def hash_bytes_from_memory(pm_binary: pymem.Pymem, _bufsize: int = 2**18) -> str:
152+
"""Hash the bytes of the main module of the given `pymem.Pymem` instance.
153+
In order to ensure that the hash is stable across runs, this only read from sections that are not expected
154+
to change between runs."""
155+
process_handle = pm_binary.process_handle
156+
pid = pm_binary.process_id
157+
if not process_handle or not pid:
158+
raise ValueError("Pymem instance does not have a valid process handle")
159+
160+
main_module = _get_main_module(pm_binary)
161+
if not main_module:
162+
raise OSError(f"Could not find main module for process {pid}")
163+
164+
base_address = main_module.lpBaseOfDll
165+
module_size = main_module.SizeOfImage
166+
if not base_address or not module_size:
167+
raise OSError("Failed to resolve main module base/size")
168+
169+
sections_base, num_sections = _get_sections_info(pm_binary, base_address)
170+
sections = _get_read_only_sections(pm_binary, sections_base, num_sections, module_size)
171+
if not sections:
172+
raise ValueError("No read-only sections found in the main module")
173+
sections.sort(key=lambda s: s[0])
174+
175+
page_size = _get_page_size()
176+
digest = hashlib.sha1()
177+
buffer = (ctypes.c_ubyte * _bufsize)()
178+
for rva, size, name in sections:
179+
start = base_address + rva
180+
end = start + size
181+
address = start
182+
183+
while address < end:
184+
page = MEMORY_BASIC_INFORMATION()
185+
if not VirtualQueryEx(
186+
process_handle,
187+
ctypes.c_void_p(address),
188+
ctypes.byref(page),
189+
ctypes.sizeof(page),
190+
):
191+
address += page_size
192+
continue
193+
194+
region_end = min(end, address + page.RegionSize)
195+
if not _is_hashable_page(page):
196+
address = region_end
197+
continue
198+
199+
current = address
200+
while current < region_end:
201+
to_read = min(_bufsize, region_end - current)
202+
buffer = pm_binary.read_bytes(current, to_read)
203+
if len(buffer) == 0:
204+
current = (current + page_size) & ~(page_size - 1)
205+
if current < address:
206+
current = address + page_size
207+
continue
208+
209+
digest.update(memoryview(buffer)[: len(buffer)])
210+
current += len(buffer)
211+
212+
address = region_end
213+
214+
return digest.hexdigest()

pymhf/core/utils.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
1-
import hashlib
21
import logging
32
from collections.abc import Callable
43
from concurrent.futures import ThreadPoolExecutor
54
from ctypes import byref, c_ulong, create_unicode_buffer, windll
65
from functools import wraps
7-
from io import BufferedReader
86
from typing import Optional
97

108
import psutil
@@ -124,20 +122,6 @@ def does_pid_have_focus(pid: int) -> bool:
124122
return pid == get_foreground_pid()
125123

126124

127-
def hash_bytes(fileobj: BufferedReader, _bufsize: int = 2**18) -> str:
128-
# Essentially implement hashlib.file_digest since it's python 3.11+
129-
# cf. https://github.com/python/cpython/blob/main/Lib/hashlib.py#L195
130-
digestobj = hashlib.sha1()
131-
buf = bytearray(_bufsize) # Reusable buffer to reduce allocations.
132-
view = memoryview(buf)
133-
while True:
134-
size = fileobj.readinto(buf)
135-
if size == 0:
136-
break # EOF
137-
digestobj.update(view[:size])
138-
return digestobj.hexdigest()
139-
140-
141125
# TODO: Do something about this...
142126
# class AutosavingConfig(ConfigParser):
143127
# def __init__(self, *args, **kwargs):

pymhf/main.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818
import pyrun_injected.dllinject as dllinject
1919

2020
from pymhf.core._types import LoadTypeEnum, pymhfConfig
21+
from pymhf.core.hashing import hash_bytes_from_file, hash_bytes_from_memory
2122
from pymhf.core.importing import parse_file_for_mod
2223
from pymhf.core.log_handling import open_log_console
2324
from pymhf.core.process import start_process
2425
from pymhf.core.protocols import ESCAPE_SEQUENCE, TerminalProtocol
25-
from pymhf.core.utils import hash_bytes
2626
from pymhf.utils.config import canonicalize_setting
2727
from pymhf.utils.parse_toml import read_pymhf_settings
2828
from pymhf.utils.winapi import get_exe_path_from_pid
@@ -319,8 +319,12 @@ def kill_injected_code(loop: asyncio.AbstractEventLoop):
319319
# Have a small nap just to give it some time.
320320
time.sleep(0.5)
321321
if binary_path:
322-
with open(binary_path, "rb") as f:
323-
binary_hash = hash_bytes(f)
322+
try:
323+
with open(binary_path, "rb") as f:
324+
binary_hash = hash_bytes_from_file(f)
325+
except PermissionError:
326+
print(f"Cannot open {binary_path!r} to hash it. Trying to read from memory...")
327+
binary_hash = hash_bytes_from_memory(pm_binary)
324328
print(f"Exe hash is: {binary_hash}")
325329
else:
326330
binary_hash = 0

pymhf/utils/winapi.py

Lines changed: 86 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,80 @@
77
import pymem
88
import pymem.ressources.structure
99

10+
MAX_EXE_NAME_SIZE = 1024
11+
WS_EX_LAYERED = 0x00080000 # layered window
12+
GWL_EXSTYLE = -20 # "extended window style"
13+
14+
LWA_COLORKEY = 0x00000001
15+
LWA_ALPHA = 0x00000002
16+
17+
IMAGE_DOS_SIGNATURE = 0x5A4D
18+
IMAGE_NT_SIGNATURE = 0x00004550
19+
20+
IMAGE_SIZEOF_SHORT_NAME = 8
21+
22+
IMAGE_SCN_MEM_WRITE = 0x80000000
23+
IMAGE_SCN_MEM_EXECUTE = 0x20000000
24+
25+
26+
class IMAGE_DOS_HEADER(ctypes.Structure):
27+
_fields_ = [
28+
("e_magic", wintypes.WORD),
29+
("e_cblp", wintypes.WORD),
30+
("e_cp", wintypes.WORD),
31+
("e_crlc", wintypes.WORD),
32+
("e_cparhdr", wintypes.WORD),
33+
("e_minalloc", wintypes.WORD),
34+
("e_maxalloc", wintypes.WORD),
35+
("e_ss", wintypes.WORD),
36+
("e_sp", wintypes.WORD),
37+
("e_csum", wintypes.WORD),
38+
("e_ip", wintypes.WORD),
39+
("e_cs", wintypes.WORD),
40+
("e_lfarlc", wintypes.WORD),
41+
("e_ovno", wintypes.WORD),
42+
("e_res", wintypes.WORD * 4),
43+
("e_oemid", wintypes.WORD),
44+
("e_oeminfo", wintypes.WORD),
45+
("e_res2", wintypes.WORD * 10),
46+
("e_lfanew", wintypes.LONG),
47+
]
48+
49+
50+
class IMAGE_FILE_HEADER(ctypes.Structure):
51+
_fields_ = [
52+
("Machine", wintypes.WORD),
53+
("NumberOfSections", wintypes.WORD),
54+
("TimeDateStamp", wintypes.DWORD),
55+
("PointerToSymbolTable", wintypes.DWORD),
56+
("NumberOfSymbols", wintypes.DWORD),
57+
("SizeOfOptionalHeader", wintypes.WORD),
58+
("Characteristics", wintypes.WORD),
59+
]
60+
61+
62+
class IMAGE_SECTION_HEADER(ctypes.Structure):
63+
class _Misc(ctypes.Union):
64+
_fields_ = [
65+
("PhysicalAddress", wintypes.DWORD),
66+
("VirtualSize", wintypes.DWORD),
67+
]
68+
69+
_anonymous_ = ("Misc",)
70+
_fields_ = [
71+
("Name", wintypes.BYTE * IMAGE_SIZEOF_SHORT_NAME),
72+
("Misc", _Misc),
73+
("VirtualAddress", wintypes.DWORD),
74+
("SizeOfRawData", wintypes.DWORD),
75+
("PointerToRawData", wintypes.DWORD),
76+
("PointerToRelocations", wintypes.DWORD),
77+
("PointerToLinenumbers", wintypes.DWORD),
78+
("NumberOfRelocations", wintypes.WORD),
79+
("NumberOfLinenumbers", wintypes.WORD),
80+
("Characteristics", wintypes.DWORD),
81+
]
82+
83+
1084
GetModuleFileNameExA = ctypes.windll.psapi.GetModuleFileNameExA
1185
GetModuleFileNameExA.restype = wintypes.DWORD
1286
GetModuleFileNameExA.argtypes = [
@@ -58,12 +132,19 @@
58132
VirtualQuery.restype = ctypes.c_size_t
59133

60134

61-
MAX_EXE_NAME_SIZE = 1024
62-
WS_EX_LAYERED = 0x00080000 # layered window
63-
GWL_EXSTYLE = -20 # "extended window style"
135+
GetSystemInfo = ctypes.windll.kernel32.GetSystemInfo
136+
GetSystemInfo.argtypes = [ctypes.POINTER(pymem.ressources.structure.SYSTEM_INFO)]
137+
GetSystemInfo.restype = None
64138

65-
LWA_COLORKEY = 0x00000001
66-
LWA_ALPHA = 0x00000002
139+
140+
VirtualQueryEx = ctypes.windll.kernel32.VirtualQueryEx
141+
VirtualQueryEx.argtypes = [
142+
wintypes.HANDLE,
143+
wintypes.LPCVOID,
144+
ctypes.POINTER(pymem.ressources.structure.MEMORY_BASIC_INFORMATION),
145+
ctypes.c_size_t,
146+
]
147+
VirtualQueryEx.restype = ctypes.c_size_t
67148

68149

69150
def get_exe_path_from_pid(proc: pymem.Pymem) -> str:

0 commit comments

Comments
 (0)