Skip to content

Commit aaec3c0

Browse files
committed
Use msgpack
Turns out that marshaling raw structs is good for minimizing the payload size, but it's terrible for maintainability. This change now uses msgpack to marshal the data structures. The implication is that payloads are now double the size.
1 parent 6a62544 commit aaec3c0

File tree

6 files changed

+265
-107
lines changed

6 files changed

+265
-107
lines changed

analyze_heap.py

Lines changed: 24 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,10 @@
33
import dataclasses
44
import copy
55
import gzip
6+
import io
67
import logging
7-
import struct
8-
from typing import NamedTuple
98

10-
RECORD_DONE = 0
11-
RECORD_TYPE = 1
12-
RECORD_OBJECT = 2
13-
RECORD_OBJECT_WITH_PAYLOAD = 3
14-
RECORD_REFERENTS = 4
9+
import msgpack
1510

1611

1712
@dataclasses.dataclass(order=True)
@@ -33,55 +28,35 @@ def _scanheap(filename: str, populate_referrers: bool = True) -> dict[int, HeapO
3328
typenames: dict[int, str] = {}
3429
live_objects: dict[int, HeapObject] = {}
3530
logging.info("%s: scanning live objects...", filename)
31+
file: io.IOBase
3632
if filename.endswith(".gz"):
3733
file = gzip.open(filename, "rb")
3834
else:
3935
file = open(filename, "rb")
4036
with file as f:
41-
while True:
42-
(record_kind,) = struct.unpack("B", f.read(1))
43-
if record_kind == RECORD_DONE:
44-
# !B
37+
for record in msgpack.Unpacker(f):
38+
if record["t"] == "done":
4539
break
46-
elif record_kind == RECORD_TYPE:
47-
# !BQH{len(typename)}s
48-
objtype_addr, objtype_len = struct.unpack("!QH", f.read(10))
49-
typenames[objtype_addr] = f.read(objtype_len).decode("utf-8")
50-
elif record_kind == RECORD_OBJECT:
51-
# !B?QQL
52-
root, addr, objtype_addr, size = struct.unpack("!?QQL", f.read(21))
53-
live_objects[addr] = HeapObject(
54-
addr=addr,
55-
typename=typenames[objtype_addr],
56-
size=size,
57-
root=root,
58-
)
59-
elif record_kind == RECORD_OBJECT_WITH_PAYLOAD:
60-
# !B?QQLH{len(payload)}s
61-
root, addr, objtype_addr, size, payload_len = struct.unpack(
62-
"!?QQLH", f.read(23)
63-
)
64-
payload = f.read(payload_len).decode("utf-8", "replace")
65-
live_objects[addr] = HeapObject(
66-
addr=addr,
67-
typename=typenames[objtype_addr],
68-
size=size,
40+
elif record["t"] == "type":
41+
typenames[record["objtype_addr"]] = record["typename"]
42+
elif record["t"] == "object":
43+
payload_bytes = record.get("payload", None)
44+
payload: str | None = None
45+
if payload_bytes is not None:
46+
payload = payload_bytes.decode("utf-8", "replace")
47+
live_objects[record["addr"]] = HeapObject(
48+
addr=record["addr"],
49+
typename=typenames[record["objtype_addr"]],
50+
size=record["size"],
51+
root=record["root"],
6952
payload=payload,
70-
root=root,
7153
)
72-
elif record_kind == RECORD_REFERENTS:
73-
# !BQH{len(referents)}Q
74-
addr, referents_len = struct.unpack("!QH", f.read(10))
75-
referents = live_objects[addr].referents
54+
elif record["t"] == "referents":
55+
referents = live_objects[record["addr"]].referents
7656
referents.clear()
77-
referents.update(
78-
struct.unpack(
79-
f"{referents_len}Q",
80-
f.read(8 * referents_len),
81-
)
82-
)
57+
referents.update(record["child_addrs"])
8358
else:
84-
logging.fatal("unknown record kind %d", record_kind)
59+
logging.fatal("unknown record kind %d", record["t"])
8560
else:
8661
logging.warning("incomplete file")
8762
logging.info("%s: %d live objects scanned", filename, len(live_objects))
@@ -180,8 +155,8 @@ def _graph(args: argparse.Namespace) -> None:
180155
queue: list[tuple[int, HeapObject]] = []
181156
seen: set[int] = set()
182157
ranks = collections.defaultdict[int, list[int]](list)
183-
addresses: Optional[set[int]] = None
184-
excluded_addresses: Optional[set[int]] = None
158+
addresses: set[int] | None = None
159+
excluded_addresses: set[int] | None = None
185160
if "0x" in args.filter:
186161
filter_exprs = args.filter.split(",")
187162
excluded_addresses = set(
@@ -196,7 +171,7 @@ def _graph(args: argparse.Namespace) -> None:
196171
print(" node [shape=box];")
197172
print(" edge [dir=back];")
198173
for obj in live_objects.values():
199-
if addresses is not None:
174+
if addresses is not None and excluded_addresses is not None:
200175
if obj.addr not in addresses or obj.addr in excluded_addresses:
201176
continue
202177
elif args.filter not in obj.typename:

lint.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/sh
2+
3+
set -e
4+
5+
uv run ruff check src/payloads/dump_heap.py analyze_heap.py
6+
uv run ruff format src/payloads/dump_heap.py analyze_heap.py
7+
uv run mypy --strict src/payloads/dump_heap.py analyze_heap.py

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@ description = "Dumps the heap of a running Python program."
55
readme = "README.md"
66
requires-python = ">=3.12"
77

8+
[dependencies]
9+
msgpack = ">=1.1.0"
10+
811
[dependency-groups]
912
dev = [
1013
"mypy>=1.13.0",
14+
"msgpack-types>=0.3.0",
1115
]

src/main.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ mod tests {
560560
.arg("-i")
561561
.stdin(Stdio::piped())
562562
.stdout(Stdio::piped())
563-
.stderr(Stdio::null())
563+
.stderr(Stdio::inherit())
564564
.spawn()
565565
.expect("Failed to execute Python");
566566

@@ -587,10 +587,12 @@ mod tests {
587587
let read = stdout.read(&mut buf).expect("Failed to read stdout");
588588
assert_eq!(&buf[..read], b"1\n");
589589

590-
let result = Command::new("python")
590+
let result = Command::new("uv")
591+
.arg("run")
591592
.arg("analyze_heap.py")
592593
.arg("top")
593594
.arg(output_path)
595+
.stderr(Stdio::inherit())
594596
.output()
595597
.expect("Failed to execute analyze_heap.py");
596598
assert!(result.status.success());
@@ -603,7 +605,7 @@ mod tests {
603605
assert!(
604606
false,
605607
"{:#?} did not contain {:#?}",
606-
result.stdout, expected
608+
String::from_utf8_lossy(&result.stdout), String::from_utf8_lossy(expected),
607609
);
608610
}
609611
}

0 commit comments

Comments
 (0)