Skip to content

Commit 2296d34

Browse files
committed
Reimplemented strings() fixing #47 plus made all the other tools use exit codes consistently.
1 parent 4912a6e commit 2296d34

File tree

17 files changed

+625
-162
lines changed

17 files changed

+625
-162
lines changed

winappdbg/process.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,15 @@
5454
from .win32.version import _machine_to_arch_map
5555
from .disasm import Disassembler
5656
from .module import _ModuleContainer
57-
from .search import HexPattern, IStringPattern, Pattern, Search, StringPattern
57+
from .search import (
58+
AsciiStringsPattern,
59+
HexPattern,
60+
IStringPattern,
61+
Pattern,
62+
Search,
63+
StringPattern,
64+
UnicodeStringsPattern,
65+
)
5866
from .textio import HexDump, HexInput
5967
from .thread import Thread, _ThreadContainer
6068
from .util import MemoryAddresses, PathOperations, Regenerator
@@ -1358,6 +1366,96 @@ def search_hexa(self, hexa, minAddr=None, maxAddr=None):
13581366
pattern = HexPattern(hexa)
13591367
return Search.search_process(self, [pattern], minAddr, maxAddr)
13601368

1369+
def strings(
1370+
self, minLength=4, encoding="both", minAddr=None, maxAddr=None, bufferPages=None
1371+
):
1372+
"""
1373+
Extract printable strings from the process memory.
1374+
1375+
This method extracts readable strings from the process memory, similar to
1376+
the Unix ``strings`` command. It can extract both ASCII and Unicode strings.
1377+
1378+
:param int minLength: Minimum length of strings to extract, in characters.
1379+
Defaults to 4.
1380+
:param str encoding: Type of strings to extract. Valid values are:
1381+
1382+
- ``"ascii"`` - Extract only ASCII strings (8-bit)
1383+
- ``"unicode"`` - Extract only Unicode strings (UTF-16LE, 16-bit)
1384+
- ``"both"`` - Extract both ASCII and Unicode strings (default)
1385+
1386+
:param int minAddr: Optional. Start the search at this memory address.
1387+
:param int maxAddr: Optional. Stop the search at this memory address.
1388+
:param int bufferPages: Optional. Number of memory pages to buffer when
1389+
performing the search. See :meth:`~.search.Search.search_process` for
1390+
details on this parameter.
1391+
:rtype: iterator[tuple[int, str]]
1392+
:return: An iterator of tuples. Each tuple contains the following:
1393+
1394+
- The memory address where the string was found.
1395+
- The string that was extracted (decoded as a Python str).
1396+
1397+
:raises ValueError: If an invalid encoding parameter is provided.
1398+
:raises WindowsError: An error occurred when querying or reading the
1399+
process memory.
1400+
1401+
Example::
1402+
1403+
from winappdbg import Process
1404+
1405+
# Open a process
1406+
process = Process(1234)
1407+
1408+
# Extract all strings from the process memory
1409+
for address, string in process.strings():
1410+
print(f"0x{address:08x}: {string}")
1411+
1412+
# Extract only ASCII strings of at least 8 characters
1413+
for address, string in process.strings(minLength=8, encoding="ascii"):
1414+
print(f"0x{address:08x}: {string}")
1415+
1416+
.. note::
1417+
1418+
This method uses :class:`~.search.AsciiStringsPattern` and
1419+
:class:`~.search.UnicodeStringsPattern` to extract strings.
1420+
Only printable ASCII characters (0x20-0x7E) are considered.
1421+
"""
1422+
# Validate encoding parameter
1423+
encoding = encoding.lower()
1424+
if encoding not in ("ascii", "unicode", "both"):
1425+
raise ValueError(
1426+
f"Invalid encoding: {encoding!r}. "
1427+
"Valid values are: 'ascii', 'unicode', 'both'"
1428+
)
1429+
1430+
# Build list of patterns based on encoding
1431+
patterns = []
1432+
if encoding in ("ascii", "both"):
1433+
patterns.append(AsciiStringsPattern(minLength))
1434+
if encoding in ("unicode", "both"):
1435+
patterns.append(UnicodeStringsPattern(minLength))
1436+
1437+
# Search for strings in process memory
1438+
for address, data in Search.search_process(
1439+
self, patterns, minAddr, maxAddr, bufferPages, overlapping=False
1440+
):
1441+
# Decode the extracted bytes
1442+
try:
1443+
# Try to detect if this is ASCII or Unicode
1444+
if b"\x00" in data and len(data) >= 2:
1445+
# Likely Unicode (UTF-16LE) - has null bytes
1446+
# Remove trailing null bytes if present
1447+
if data[-1] == 0:
1448+
data = data[:-1]
1449+
decoded = data.decode("utf-16-le")
1450+
else:
1451+
# ASCII string
1452+
decoded = data.decode("latin-1")
1453+
yield (address, decoded)
1454+
except (UnicodeDecodeError, UnicodeError):
1455+
# If decoding fails, skip this string
1456+
# This shouldn't happen with our patterns, but be defensive
1457+
continue
1458+
13611459
# ------------------------------------------------------------------------------
13621460

13631461
def __read_c_type(self, address, format, c_type):

winappdbg/search.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
"StringPattern",
4040
"IStringPattern",
4141
"HexPattern",
42+
"AsciiStringsPattern",
43+
"UnicodeStringsPattern",
4244
"MemoryAccessWarning",
4345
]
4446

@@ -267,6 +269,106 @@ def next_match(self):
267269
return -1
268270

269271

272+
# ------------------------------------------------------------------------------
273+
274+
275+
class AsciiStringsPattern(Pattern):
276+
"""
277+
Pattern matching for extracting ASCII strings from binary data.
278+
279+
This pattern extracts printable ASCII strings similar to the Unix
280+
``strings`` command. Only characters in the range 0x20-0x7E (space to
281+
tilde) are considered printable.
282+
"""
283+
284+
def __init__(self, minLength=4):
285+
"""
286+
Class constructor.
287+
288+
:type minLength: int
289+
:param minLength: Minimum length of strings to extract.
290+
Defaults to 4 characters.
291+
"""
292+
# Pattern to match sequences of printable ASCII characters
293+
# Printable ASCII: space (0x20) to tilde (0x7E)
294+
pattern = rb"[\x20-\x7E]{%d,}" % minLength
295+
super().__init__(pattern)
296+
self.minLength = minLength
297+
self.compiled = re.compile(pattern)
298+
self.match = None
299+
300+
def __len__(self):
301+
"""
302+
Return the length of the last match.
303+
"""
304+
if self.match is not None:
305+
return len(self.match.group(0))
306+
return self.minLength
307+
308+
def next_match(self):
309+
"""
310+
Find the next ASCII string in the data buffer.
311+
312+
:rtype: int
313+
:return: Position in the buffer where the string was found,
314+
or -1 if not found.
315+
"""
316+
self.match = self.compiled.search(self.data, self.pos)
317+
if self.match is not None:
318+
return self.match.start()
319+
return -1
320+
321+
322+
# ------------------------------------------------------------------------------
323+
324+
325+
class UnicodeStringsPattern(Pattern):
326+
"""
327+
Pattern matching for extracting Unicode (UTF-16LE) strings from binary data.
328+
329+
This pattern extracts printable Unicode strings encoded as UTF-16LE
330+
(little-endian), which is the standard Unicode encoding on Windows.
331+
"""
332+
333+
def __init__(self, minLength=4):
334+
"""
335+
Class constructor.
336+
337+
:type minLength: int
338+
:param minLength: Minimum length of strings to extract (in characters).
339+
Defaults to 4 characters.
340+
"""
341+
# Pattern to match sequences of printable ASCII characters as UTF-16LE
342+
# Each character is represented as: char byte followed by null byte
343+
# Printable ASCII range: 0x20-0x7E
344+
pattern = rb"(?:[\x20-\x7E]\x00){%d,}" % minLength
345+
super().__init__(pattern)
346+
self.minLength = minLength * 2 # Each Unicode char is 2 bytes
347+
self.compiled = re.compile(pattern)
348+
self.match = None
349+
350+
def __len__(self):
351+
"""
352+
Return the length of the last match.
353+
"""
354+
if self.match is not None:
355+
return len(self.match.group(0))
356+
return self.minLength
357+
358+
def next_match(self):
359+
"""
360+
Find the next Unicode string in the data buffer.
361+
362+
:rtype: int
363+
:return: Position in the buffer where the string was found,
364+
or -1 if not found.
365+
"""
366+
self.match = self.compiled.search(self.data, self.pos)
367+
if self.match is not None:
368+
return self.match.start()
369+
return -1
370+
371+
270372
# ==============================================================================
271373

272374

winappdbg/tools/SelectMyParent.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def main():
5555
if len(argv) < 3:
5656
script = os.path.basename(argv[0])
5757
print(" %s <pid> <process.exe> [arguments]" % script)
58-
return
58+
return 1
5959

6060
# Request debug privileges.
6161
system = System()
@@ -72,18 +72,18 @@ def main():
7272
system.scan_processes_fast()
7373
if not system.has_process(dwParentProcessId):
7474
print("Can't find process ID %d" % dwParentProcessId)
75-
return
75+
return 1
7676
else:
7777
system.scan_processes()
7878
process_list = system.find_processes_by_filename(argv[1])
7979
if not process_list:
8080
print("Can't find process %r" % argv[1])
81-
return
81+
return 1
8282
if len(process_list) > 1:
8383
print("Too many processes found:")
8484
for process, name in process_list:
8585
print("\t%d:\t%s" % (process.get_pid(), name))
86-
return
86+
return 1
8787
dwParentProcessId = process_list[0][0].get_pid()
8888

8989
# Parse the target process argument.
@@ -93,7 +93,7 @@ def main():
9393
filename = win32.SearchPath(None, filename, ".exe")[0]
9494
except WindowsError as e:
9595
print("Error searching for %s: %s" % (filename, str(e)))
96-
return
96+
return 1
9797
argv = list(argv)
9898
argv[2] = filename
9999

@@ -111,13 +111,16 @@ def main():
111111
print("This tool requires Windows Vista or above.")
112112
else:
113113
print("Error starting new process: %s" % str(e))
114-
return
114+
return 1
115115
except WindowsError as e:
116116
print("Error starting new process: %s" % str(e))
117-
return
117+
return 1
118118
print("Process created: %d" % dwProcessId)
119-
return dwProcessId
119+
return 0
120120

121121

122122
if __name__ == "__main__":
123-
main()
123+
try:
124+
sys.exit(main())
125+
except KeyboardInterrupt:
126+
sys.exit(130)

winappdbg/tools/crash_logger.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,9 +1013,11 @@ def run_from_cmdline(self, args):
10131013

10141014
# Catch errors and show them on screen
10151015
except Exception as e:
1016-
print("Runtime error: %s" % str(e))
1016+
print("Runtime error: %s" % str(e), file=sys.stderr)
10171017
traceback.print_exc()
1018-
return
1018+
return 1
1019+
1020+
return 0
10191021

10201022
def install_as_jit(self, config):
10211023
# Not yet compatible with Cygwin.
@@ -1234,7 +1236,8 @@ def main():
12341236
return cl.run_from_cmdline(sys.argv)
12351237
except KeyboardInterrupt:
12361238
print("Interrupted by the user!")
1239+
return 130
12371240

12381241

12391242
if __name__ == "__main__":
1240-
main()
1243+
sys.exit(main())

winappdbg/tools/crash_report.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,11 @@ def main():
157157
cc = open_database(filename)
158158
print_report_for_database(cc, options)
159159

160+
return 0
161+
160162

161163
if __name__ == "__main__":
162-
main()
164+
try:
165+
sys.exit(main())
166+
except KeyboardInterrupt:
167+
sys.exit(130)

winappdbg/tools/hexdump.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,23 +44,30 @@ def main():
4444

4545
script = os.path.basename(argv[0])
4646
print(" %s <filename>" % script)
47-
return
48-
with open(argv[1], "rb") as fd:
49-
fd.seek(0, 2)
50-
size = fd.tell()
51-
fd.seek(0, 0)
52-
if size.bit_length() > 32:
53-
width = 8
54-
else:
55-
width = 16
56-
address = 0
57-
while True:
58-
data = fd.read(16)
59-
if not data:
60-
break
61-
print(HexDump.hexblock(data, address=address, width=width))
62-
address = address + len(data)
47+
return 1
48+
try:
49+
with open(argv[1], "rb") as fd:
50+
fd.seek(0, 2)
51+
size = fd.tell()
52+
fd.seek(0, 0)
53+
if size.bit_length() > 32:
54+
width = 8
55+
else:
56+
width = 16
57+
address = 0
58+
while True:
59+
data = fd.read(16)
60+
if not data:
61+
break
62+
print(HexDump.hexblock(data, address=address, width=width))
63+
address = address + len(data)
64+
return 0
65+
except KeyboardInterrupt:
66+
return 130
67+
except Exception as e:
68+
print("Error: %s" % e, file=sys.stderr)
69+
return 1
6370

6471

6572
if __name__ == "__main__":
66-
main()
73+
sys.exit(main())

winappdbg/tools/pdebug.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def run(self, argv):
6868
self.loop()
6969
finally:
7070
self.finalize()
71+
return 0
7172

7273
# Initialize the debugger.
7374
def initialize(self):
@@ -300,4 +301,7 @@ def main():
300301

301302

302303
if __name__ == "__main__":
303-
main()
304+
try:
305+
sys.exit(main())
306+
except KeyboardInterrupt:
307+
sys.exit(130)

0 commit comments

Comments
 (0)