Skip to content

Commit ac77da1

Browse files
committed
Add PoC implementation
1 parent db67fac commit ac77da1

File tree

8 files changed

+1195
-18
lines changed

8 files changed

+1195
-18
lines changed

mypy/build.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import time
2727
import types
2828
from collections.abc import Iterator, Mapping, Sequence, Set as AbstractSet
29+
from io import BytesIO
2930
from typing import (
3031
TYPE_CHECKING,
3132
Any,
@@ -1139,6 +1140,17 @@ def read_deps_cache(manager: BuildManager, graph: Graph) -> dict[str, FgDepMeta]
11391140
return module_deps_metas
11401141

11411142

1143+
def _load_ff_file(file: str, manager: BuildManager, log_error: str) -> bytes | None:
1144+
t0 = time.time()
1145+
try:
1146+
data = manager.metastore.read(file)
1147+
except OSError:
1148+
manager.log(log_error + file)
1149+
return None
1150+
manager.add_stats(metastore_read_time=time.time() - t0)
1151+
return data
1152+
1153+
11421154
def _load_json_file(
11431155
file: str, manager: BuildManager, log_success: str, log_error: str
11441156
) -> dict[str, Any] | None:
@@ -1259,7 +1271,11 @@ def get_cache_names(id: str, path: str, options: Options) -> tuple[str, str, str
12591271
deps_json = None
12601272
if options.cache_fine_grained:
12611273
deps_json = prefix + ".deps.json"
1262-
return (prefix + ".meta.json", prefix + ".data.json", deps_json)
1274+
if options.fixed_format_cache:
1275+
data_suffix = ".data.ff"
1276+
else:
1277+
data_suffix = ".data.json"
1278+
return (prefix + ".meta.json", prefix + data_suffix, deps_json)
12631279

12641280

12651281
def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | None:
@@ -1559,8 +1575,13 @@ def write_cache(
15591575
tree.path = path
15601576

15611577
# Serialize data and analyze interface
1562-
data = tree.serialize()
1563-
data_bytes = json_dumps(data, manager.options.debug_cache)
1578+
if manager.options.fixed_format_cache:
1579+
data_io = BytesIO()
1580+
tree.write(data_io)
1581+
data_bytes = data_io.getvalue()
1582+
else:
1583+
data = tree.serialize()
1584+
data_bytes = json_dumps(data, manager.options.debug_cache)
15641585
interface_hash = hash_digest(data_bytes)
15651586

15661587
plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))
@@ -2085,15 +2106,23 @@ def load_tree(self, temporary: bool = False) -> None:
20852106
self.meta is not None
20862107
), "Internal error: this method must be called only for cached modules"
20872108

2088-
data = _load_json_file(
2089-
self.meta.data_json, self.manager, "Load tree ", "Could not load tree: "
2090-
)
2109+
data: bytes | dict[str, Any] | None
2110+
if self.options.fixed_format_cache:
2111+
data = _load_ff_file(self.meta.data_json, self.manager, "Could not load tree: ")
2112+
else:
2113+
data = _load_json_file(
2114+
self.meta.data_json, self.manager, "Load tree ", "Could not load tree: "
2115+
)
20912116
if data is None:
20922117
return
20932118

20942119
t0 = time.time()
20952120
# TODO: Assert data file wasn't changed.
2096-
self.tree = MypyFile.deserialize(data)
2121+
if isinstance(data, bytes):
2122+
data_io = BytesIO(data)
2123+
self.tree = MypyFile.read(data_io)
2124+
else:
2125+
self.tree = MypyFile.deserialize(data)
20972126
t1 = time.time()
20982127
self.manager.add_stats(deserialize_time=t1 - t0)
20992128
if not temporary:
@@ -2481,7 +2510,11 @@ def write_cache(self) -> None:
24812510
):
24822511
if self.options.debug_serialize:
24832512
try:
2484-
self.tree.serialize()
2513+
if self.manager.options.fixed_format_cache:
2514+
data = BytesIO()
2515+
self.tree.write(data)
2516+
else:
2517+
self.tree.serialize()
24852518
except Exception:
24862519
print(f"Error serializing {self.id}", file=self.manager.stdout)
24872520
raise # Propagate to display traceback
@@ -3410,6 +3443,7 @@ def process_fresh_modules(graph: Graph, modules: list[str], manager: BuildManage
34103443
for id in modules:
34113444
graph[id].fix_cross_refs()
34123445
t2 = time.time()
3446+
# touch 22 again
34133447
manager.add_stats(process_fresh_time=t2 - t0, load_tree_time=t1 - t0)
34143448

34153449

mypy/cache.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
from __future__ import annotations
2+
3+
from collections.abc import Sequence
4+
from io import BytesIO
5+
from typing import Final
6+
7+
INT_SIZE: Final = 2
8+
LONG_INT_SIZE: Final = 10
9+
FLOAT_LEN: Final = 32
10+
11+
12+
def read_int(data: BytesIO) -> int:
13+
return int.from_bytes(data.read(INT_SIZE), "big", signed=True)
14+
15+
16+
def write_int(data: BytesIO, value: int) -> None:
17+
data.write(value.to_bytes(INT_SIZE, "big", signed=True))
18+
19+
20+
def read_long_int(data: BytesIO) -> int:
21+
return int.from_bytes(data.read(LONG_INT_SIZE), "big", signed=True)
22+
23+
24+
def write_long_int(data: BytesIO, value: int) -> None:
25+
data.write(value.to_bytes(LONG_INT_SIZE, "big", signed=True))
26+
27+
28+
def read_str(data: BytesIO) -> str:
29+
size = read_int(data)
30+
encoded = data.read(size)
31+
return encoded.decode()
32+
33+
34+
def write_str(data: BytesIO, value: str) -> None:
35+
encoded = value.encode()
36+
size = len(encoded)
37+
write_int(data, size)
38+
data.write(encoded)
39+
40+
41+
def read_bool(data: BytesIO) -> bool:
42+
return data.read(1) == b"\xff"
43+
44+
45+
def write_bool(data: BytesIO, value: bool) -> None:
46+
data.write(b"\xff" if value else b"\x00")
47+
48+
49+
def read_float(data: BytesIO) -> float:
50+
value_str = data.read(FLOAT_LEN).decode()
51+
return float(value_str)
52+
53+
54+
def write_float(data: BytesIO, value: float) -> None:
55+
value_str = str(value)
56+
value_str = "0" * (FLOAT_LEN - len(value_str)) + value_str
57+
data.write(value_str.encode())
58+
59+
60+
LITERAL_INT: Final = 1
61+
LITERAL_STR: Final = 2
62+
LITERAL_BOOL: Final = 3
63+
LITERAL_FLOAT: Final = 4
64+
LITERAL_COMPLEX: Final = 5
65+
LITERAL_NONE: Final = 6
66+
67+
68+
def read_literal(data: BytesIO, marker: int) -> int | str | bool | float:
69+
if marker == LITERAL_INT:
70+
return read_long_int(data)
71+
elif marker == LITERAL_STR:
72+
return read_str(data)
73+
elif marker == LITERAL_BOOL:
74+
return read_bool(data)
75+
elif marker == LITERAL_FLOAT:
76+
return read_float(data)
77+
assert False, f"Unknown literal marker {marker}"
78+
79+
80+
def write_literal(data: BytesIO, value: int | str | bool | float | complex | None) -> None:
81+
if isinstance(value, bool):
82+
write_int(data, LITERAL_BOOL)
83+
write_bool(data, value)
84+
elif isinstance(value, int):
85+
write_int(data, LITERAL_INT)
86+
write_long_int(data, value)
87+
elif isinstance(value, str):
88+
write_int(data, LITERAL_STR)
89+
write_str(data, value)
90+
elif isinstance(value, float):
91+
write_int(data, LITERAL_FLOAT)
92+
write_float(data, value)
93+
elif isinstance(value, complex):
94+
write_int(data, LITERAL_COMPLEX)
95+
write_float(data, value.real)
96+
write_float(data, value.imag)
97+
else:
98+
write_int(data, LITERAL_NONE)
99+
100+
101+
OPT_NO: Final = 0
102+
OPT_YES: Final = 1
103+
104+
105+
def read_int_opt(data: BytesIO) -> int | None:
106+
marker = read_int(data)
107+
if marker == OPT_YES:
108+
return read_int(data)
109+
assert marker == OPT_NO
110+
return None
111+
112+
113+
def write_int_opt(data: BytesIO, value: int | None) -> None:
114+
if value is not None:
115+
write_int(data, OPT_YES)
116+
write_int(data, value)
117+
else:
118+
write_int(data, OPT_NO)
119+
120+
121+
def read_str_opt(data: BytesIO) -> str | None:
122+
marker = read_int(data)
123+
if marker == OPT_YES:
124+
return read_str(data)
125+
assert marker == OPT_NO
126+
return None
127+
128+
129+
def write_str_opt(data: BytesIO, value: str | None) -> None:
130+
if value is not None:
131+
write_int(data, OPT_YES)
132+
write_str(data, value)
133+
else:
134+
write_int(data, OPT_NO)
135+
136+
137+
def read_int_list(data: BytesIO) -> list[int]:
138+
size = read_int(data)
139+
return [read_int(data) for _ in range(size)]
140+
141+
142+
def write_int_list(data: BytesIO, value: list[int]) -> None:
143+
write_int(data, len(value))
144+
for item in value:
145+
write_int(data, item)
146+
147+
148+
def read_str_list(data: BytesIO) -> list[str]:
149+
size = read_int(data)
150+
return [read_str(data) for _ in range(size)]
151+
152+
153+
def write_str_list(data: BytesIO, value: Sequence[str]) -> None:
154+
write_int(data, len(value))
155+
for item in value:
156+
write_str(data, item)
157+
158+
159+
def read_str_opt_list(data: BytesIO) -> list[str | None]:
160+
size = read_int(data)
161+
return [read_str_opt(data) for _ in range(size)]
162+
163+
164+
def write_str_opt_list(data: BytesIO, value: list[str | None]) -> None:
165+
write_int(data, len(value))
166+
for item in value:
167+
write_str_opt(data, item)

mypy/fixup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ def visit_type_info(self, info: TypeInfo) -> None:
9797
info.declared_metaclass.accept(self.type_fixer)
9898
if info.metaclass_type:
9999
info.metaclass_type.accept(self.type_fixer)
100+
if info.self_type:
101+
info.self_type.accept(self.type_fixer)
100102
if info.alt_promote:
101103
info.alt_promote.accept(self.type_fixer)
102104
instance = Instance(info, [])

mypy/main.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,11 @@ def add_invertible_flag(
10561056
action="store_true",
10571057
help="Include fine-grained dependency information in the cache for the mypy daemon",
10581058
)
1059+
incremental_group.add_argument(
1060+
"--fixed-format-cache",
1061+
action="store_true",
1062+
help="Use experimental binary fixed format cache",
1063+
)
10591064
incremental_group.add_argument(
10601065
"--skip-version-check",
10611066
action="store_true",

0 commit comments

Comments
 (0)