|
54 | 54 | from .win32.version import _machine_to_arch_map |
55 | 55 | from .disasm import Disassembler |
56 | 56 | from .module import _ModuleContainer |
57 | | -from .search import HexPattern, IStringPattern, Pattern, Search, StringPattern |
| 57 | +from .search import ( |
| 58 | + AsciiStringsPattern, |
| 59 | + HexPattern, |
| 60 | + IStringPattern, |
| 61 | + Pattern, |
| 62 | + Search, |
| 63 | + StringPattern, |
| 64 | + UnicodeStringsPattern, |
| 65 | +) |
58 | 66 | from .textio import HexDump, HexInput |
59 | 67 | from .thread import Thread, _ThreadContainer |
60 | 68 | from .util import MemoryAddresses, PathOperations, Regenerator |
@@ -1358,6 +1366,96 @@ def search_hexa(self, hexa, minAddr=None, maxAddr=None): |
1358 | 1366 | pattern = HexPattern(hexa) |
1359 | 1367 | return Search.search_process(self, [pattern], minAddr, maxAddr) |
1360 | 1368 |
|
| 1369 | + def strings( |
| 1370 | + self, minLength=4, encoding="both", minAddr=None, maxAddr=None, bufferPages=None |
| 1371 | + ): |
| 1372 | + """ |
| 1373 | + Extract printable strings from the process memory. |
| 1374 | +
|
| 1375 | + This method extracts readable strings from the process memory, similar to |
| 1376 | + the Unix ``strings`` command. It can extract both ASCII and Unicode strings. |
| 1377 | +
|
| 1378 | + :param int minLength: Minimum length of strings to extract, in characters. |
| 1379 | + Defaults to 4. |
| 1380 | + :param str encoding: Type of strings to extract. Valid values are: |
| 1381 | +
|
| 1382 | + - ``"ascii"`` - Extract only ASCII strings (8-bit) |
| 1383 | + - ``"unicode"`` - Extract only Unicode strings (UTF-16LE, 16-bit) |
| 1384 | + - ``"both"`` - Extract both ASCII and Unicode strings (default) |
| 1385 | +
|
| 1386 | + :param int minAddr: Optional. Start the search at this memory address. |
| 1387 | + :param int maxAddr: Optional. Stop the search at this memory address. |
| 1388 | + :param int bufferPages: Optional. Number of memory pages to buffer when |
| 1389 | + performing the search. See :meth:`~.search.Search.search_process` for |
| 1390 | + details on this parameter. |
| 1391 | + :rtype: iterator[tuple[int, str]] |
| 1392 | + :return: An iterator of tuples. Each tuple contains the following: |
| 1393 | +
|
| 1394 | + - The memory address where the string was found. |
| 1395 | + - The string that was extracted (decoded as a Python str). |
| 1396 | +
|
| 1397 | + :raises ValueError: If an invalid encoding parameter is provided. |
| 1398 | + :raises WindowsError: An error occurred when querying or reading the |
| 1399 | + process memory. |
| 1400 | +
|
| 1401 | + Example:: |
| 1402 | +
|
| 1403 | + from winappdbg import Process |
| 1404 | +
|
| 1405 | + # Open a process |
| 1406 | + process = Process(1234) |
| 1407 | +
|
| 1408 | + # Extract all strings from the process memory |
| 1409 | + for address, string in process.strings(): |
| 1410 | + print(f"0x{address:08x}: {string}") |
| 1411 | +
|
| 1412 | + # Extract only ASCII strings of at least 8 characters |
| 1413 | + for address, string in process.strings(minLength=8, encoding="ascii"): |
| 1414 | + print(f"0x{address:08x}: {string}") |
| 1415 | +
|
| 1416 | + .. note:: |
| 1417 | +
|
| 1418 | + This method uses :class:`~.search.AsciiStringsPattern` and |
| 1419 | + :class:`~.search.UnicodeStringsPattern` to extract strings. |
| 1420 | + Only printable ASCII characters (0x20-0x7E) are considered. |
| 1421 | + """ |
| 1422 | + # Validate encoding parameter |
| 1423 | + encoding = encoding.lower() |
| 1424 | + if encoding not in ("ascii", "unicode", "both"): |
| 1425 | + raise ValueError( |
| 1426 | + f"Invalid encoding: {encoding!r}. " |
| 1427 | + "Valid values are: 'ascii', 'unicode', 'both'" |
| 1428 | + ) |
| 1429 | + |
| 1430 | + # Build list of patterns based on encoding |
| 1431 | + patterns = [] |
| 1432 | + if encoding in ("ascii", "both"): |
| 1433 | + patterns.append(AsciiStringsPattern(minLength)) |
| 1434 | + if encoding in ("unicode", "both"): |
| 1435 | + patterns.append(UnicodeStringsPattern(minLength)) |
| 1436 | + |
| 1437 | + # Search for strings in process memory |
| 1438 | + for address, data in Search.search_process( |
| 1439 | + self, patterns, minAddr, maxAddr, bufferPages, overlapping=False |
| 1440 | + ): |
| 1441 | + # Decode the extracted bytes |
| 1442 | + try: |
| 1443 | + # Try to detect if this is ASCII or Unicode |
| 1444 | + if b"\x00" in data and len(data) >= 2: |
| 1445 | + # Likely Unicode (UTF-16LE) - has null bytes |
| 1446 | + # Remove trailing null bytes if present |
| 1447 | + if data[-1] == 0: |
| 1448 | + data = data[:-1] |
| 1449 | + decoded = data.decode("utf-16-le") |
| 1450 | + else: |
| 1451 | + # ASCII string |
| 1452 | + decoded = data.decode("latin-1") |
| 1453 | + yield (address, decoded) |
| 1454 | + except (UnicodeDecodeError, UnicodeError): |
| 1455 | + # If decoding fails, skip this string |
| 1456 | + # This shouldn't happen with our patterns, but be defensive |
| 1457 | + continue |
| 1458 | + |
1361 | 1459 | # ------------------------------------------------------------------------------ |
1362 | 1460 |
|
1363 | 1461 | def __read_c_type(self, address, format, c_type): |
|
0 commit comments