|
| 1 | +import ctypes |
| 2 | +import ctypes.wintypes as wintypes |
| 3 | +import hashlib |
| 4 | +import os |
| 5 | +from io import BufferedReader |
| 6 | + |
| 7 | +import psutil |
| 8 | +import pymem |
| 9 | +from pymem.ressources.structure import ( |
| 10 | + MEMORY_BASIC_INFORMATION, |
| 11 | + MEMORY_BASIC_INFORMATION32, |
| 12 | + MEMORY_BASIC_INFORMATION64, |
| 13 | + MEMORY_PROTECTION, |
| 14 | + MEMORY_STATE, |
| 15 | + MEMORY_TYPES, |
| 16 | + MODULEINFO, |
| 17 | + SYSTEM_INFO, |
| 18 | +) |
| 19 | +from typing_extensions import Union, cast |
| 20 | + |
| 21 | +from pymhf.utils.winapi import ( |
| 22 | + IMAGE_DOS_HEADER, |
| 23 | + IMAGE_DOS_SIGNATURE, |
| 24 | + IMAGE_FILE_HEADER, |
| 25 | + IMAGE_NT_SIGNATURE, |
| 26 | + IMAGE_SCN_MEM_EXECUTE, |
| 27 | + IMAGE_SCN_MEM_WRITE, |
| 28 | + IMAGE_SECTION_HEADER, |
| 29 | + GetSystemInfo, |
| 30 | + VirtualQueryEx, |
| 31 | +) |
| 32 | + |
| 33 | + |
| 34 | +def _is_hashable_page(mbi: Union[MEMORY_BASIC_INFORMATION32, MEMORY_BASIC_INFORMATION64]) -> bool: |
| 35 | + """Check if a memory page is suitable for hashing. The page must not change during runtime and/or |
| 36 | + between runs.""" |
| 37 | + if mbi.State != MEMORY_STATE.MEM_COMMIT: |
| 38 | + return False |
| 39 | + if mbi.Type != MEMORY_TYPES.MEM_IMAGE: |
| 40 | + return False |
| 41 | + if mbi.Protect & ( |
| 42 | + MEMORY_PROTECTION.PAGE_GUARD |
| 43 | + | MEMORY_PROTECTION.PAGE_WRITECOPY |
| 44 | + | MEMORY_PROTECTION.PAGE_EXECUTE_WRITECOPY |
| 45 | + ): |
| 46 | + return False |
| 47 | + if mbi.Protect & (MEMORY_PROTECTION.PAGE_READWRITE | MEMORY_PROTECTION.PAGE_EXECUTE_READWRITE): |
| 48 | + return False |
| 49 | + if not (mbi.Protect & (MEMORY_PROTECTION.PAGE_EXECUTE | MEMORY_PROTECTION.PAGE_EXECUTE_READ)): |
| 50 | + return False |
| 51 | + return True |
| 52 | + |
| 53 | + |
| 54 | +def _get_page_size() -> int: |
| 55 | + """Get the system page size. Defaults to 4096 if it cannot be determined.""" |
| 56 | + sys_info = SYSTEM_INFO() |
| 57 | + GetSystemInfo(ctypes.byref(sys_info)) |
| 58 | + return sys_info.dwPageSize or 4096 |
| 59 | + |
| 60 | + |
| 61 | +def _get_main_module(pm_binary: pymem.Pymem) -> MODULEINFO: |
| 62 | + binary_path = psutil.Process(pm_binary.process_id).exe().lower() |
| 63 | + binary_exe = os.path.basename(binary_path).lower() |
| 64 | + |
| 65 | + main_module = None |
| 66 | + modules = list(pm_binary.list_modules()) |
| 67 | + for module in modules: |
| 68 | + if module.filename.lower() == binary_path or module.name.lower() == binary_exe: |
| 69 | + main_module = module |
| 70 | + break |
| 71 | + if not main_module: |
| 72 | + main_module = modules[0] # Usually the first module is the main module |
| 73 | + # Maybe raising an error or returning `None` here instead would be safer |
| 74 | + # raise OSError(f"Could not find main module for process {pid}") |
| 75 | + |
| 76 | + return main_module |
| 77 | + |
| 78 | + |
| 79 | +def _get_sections_info(pm_binary: pymem.Pymem, address: int) -> tuple[int, int]: |
| 80 | + """Get the base address and number of sections in the PE file at the given address.""" |
| 81 | + dos_header = cast(IMAGE_DOS_HEADER, pm_binary.read_ctype(address, IMAGE_DOS_HEADER())) |
| 82 | + if dos_header.e_magic != IMAGE_DOS_SIGNATURE: |
| 83 | + raise ValueError(f"Invalid DOS header magic for address 0x{address:X}") |
| 84 | + |
| 85 | + address += dos_header.e_lfanew |
| 86 | + signature = pm_binary.read_ctype(address, wintypes.DWORD()) |
| 87 | + if signature != IMAGE_NT_SIGNATURE: |
| 88 | + raise ValueError(f"Invalid PE header signature for address 0x{address:X}") |
| 89 | + |
| 90 | + address += ctypes.sizeof(wintypes.DWORD) |
| 91 | + file_header = cast(IMAGE_FILE_HEADER, pm_binary.read_ctype(address, IMAGE_FILE_HEADER())) |
| 92 | + |
| 93 | + num_sections = int(file_header.NumberOfSections) |
| 94 | + opt_header_size = int(file_header.SizeOfOptionalHeader) |
| 95 | + sections_base = address + ctypes.sizeof(IMAGE_FILE_HEADER) + opt_header_size |
| 96 | + |
| 97 | + return sections_base, num_sections |
| 98 | + |
| 99 | + |
| 100 | +def _get_read_only_sections( |
| 101 | + pm_binary: pymem.Pymem, |
| 102 | + sections_base: int, |
| 103 | + num_sections: int, |
| 104 | + max_module_size: int, |
| 105 | +): |
| 106 | + """Get a list of read-only sections in the PE file at the given address.""" |
| 107 | + sections = [] |
| 108 | + for i in range(num_sections): |
| 109 | + section_address = sections_base + i * ctypes.sizeof(IMAGE_SECTION_HEADER) |
| 110 | + section_header = cast( |
| 111 | + IMAGE_SECTION_HEADER, pm_binary.read_ctype(section_address, IMAGE_SECTION_HEADER()) |
| 112 | + ) |
| 113 | + |
| 114 | + characteristics = section_header.Characteristics |
| 115 | + if not (characteristics & IMAGE_SCN_MEM_EXECUTE) or (characteristics & IMAGE_SCN_MEM_WRITE): |
| 116 | + continue |
| 117 | + |
| 118 | + virtual_addr = int(section_header.VirtualAddress) |
| 119 | + virtual_size = int(section_header.Misc.VirtualSize) or int(section_header.SizeOfRawData) |
| 120 | + if virtual_addr == 0 or virtual_size == 0: |
| 121 | + continue |
| 122 | + |
| 123 | + end_addr = min(virtual_addr + virtual_size, max_module_size) |
| 124 | + if end_addr <= virtual_addr: |
| 125 | + continue |
| 126 | + |
| 127 | + section = ( |
| 128 | + virtual_addr, |
| 129 | + end_addr - virtual_addr, |
| 130 | + bytes(bytearray(section_header.Name)).rstrip(b"\x00").decode(errors="ignore"), |
| 131 | + ) |
| 132 | + sections.append(section) |
| 133 | + |
| 134 | + return sections |
| 135 | + |
| 136 | + |
| 137 | +def hash_bytes_from_file(fileobj: BufferedReader, _bufsize: int = 2**18) -> str: |
| 138 | + # Essentially implement hashlib.file_digest since it's python 3.11+ |
| 139 | + # cf. https://github.com/python/cpython/blob/main/Lib/hashlib.py#L195 |
| 140 | + digestobj = hashlib.sha1() |
| 141 | + buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. |
| 142 | + view = memoryview(buf) |
| 143 | + while True: |
| 144 | + size = fileobj.readinto(buf) |
| 145 | + if size == 0: |
| 146 | + break # EOF |
| 147 | + digestobj.update(view[:size]) |
| 148 | + return digestobj.hexdigest() |
| 149 | + |
| 150 | + |
| 151 | +def hash_bytes_from_memory(pm_binary: pymem.Pymem, _bufsize: int = 2**18) -> str: |
| 152 | + """Hash the bytes of the main module of the given `pymem.Pymem` instance. |
| 153 | + In order to ensure that the hash is stable across runs, this only read from sections that are not expected |
| 154 | + to change between runs.""" |
| 155 | + process_handle = pm_binary.process_handle |
| 156 | + pid = pm_binary.process_id |
| 157 | + if not process_handle or not pid: |
| 158 | + raise ValueError("Pymem instance does not have a valid process handle") |
| 159 | + |
| 160 | + main_module = _get_main_module(pm_binary) |
| 161 | + if not main_module: |
| 162 | + raise OSError(f"Could not find main module for process {pid}") |
| 163 | + |
| 164 | + base_address = main_module.lpBaseOfDll |
| 165 | + module_size = main_module.SizeOfImage |
| 166 | + if not base_address or not module_size: |
| 167 | + raise OSError("Failed to resolve main module base/size") |
| 168 | + |
| 169 | + sections_base, num_sections = _get_sections_info(pm_binary, base_address) |
| 170 | + sections = _get_read_only_sections(pm_binary, sections_base, num_sections, module_size) |
| 171 | + if not sections: |
| 172 | + raise ValueError("No read-only sections found in the main module") |
| 173 | + sections.sort(key=lambda s: s[0]) |
| 174 | + |
| 175 | + page_size = _get_page_size() |
| 176 | + digest = hashlib.sha1() |
| 177 | + buffer = (ctypes.c_ubyte * _bufsize)() |
| 178 | + for rva, size, name in sections: |
| 179 | + start = base_address + rva |
| 180 | + end = start + size |
| 181 | + address = start |
| 182 | + |
| 183 | + while address < end: |
| 184 | + page = MEMORY_BASIC_INFORMATION() |
| 185 | + if not VirtualQueryEx( |
| 186 | + process_handle, |
| 187 | + ctypes.c_void_p(address), |
| 188 | + ctypes.byref(page), |
| 189 | + ctypes.sizeof(page), |
| 190 | + ): |
| 191 | + address += page_size |
| 192 | + continue |
| 193 | + |
| 194 | + region_end = min(end, address + page.RegionSize) |
| 195 | + if not _is_hashable_page(page): |
| 196 | + address = region_end |
| 197 | + continue |
| 198 | + |
| 199 | + current = address |
| 200 | + while current < region_end: |
| 201 | + to_read = min(_bufsize, region_end - current) |
| 202 | + buffer = pm_binary.read_bytes(current, to_read) |
| 203 | + if len(buffer) == 0: |
| 204 | + current = (current + page_size) & ~(page_size - 1) |
| 205 | + if current < address: |
| 206 | + current = address + page_size |
| 207 | + continue |
| 208 | + |
| 209 | + digest.update(memoryview(buffer)[: len(buffer)]) |
| 210 | + current += len(buffer) |
| 211 | + |
| 212 | + address = region_end |
| 213 | + |
| 214 | + return digest.hexdigest() |
0 commit comments