Skip to content

Commit 6c4f0aa

Browse files
committed
Merge remote-tracking branch 'upstream/master' into bugfix/gh-19576-enum-comparison-overlap
2 parents d96243d + e633140 commit 6c4f0aa

File tree

161 files changed

+7667
-1754
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

161 files changed

+7667
-1754
lines changed

docs/requirements-docs.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
-r ../mypy-requirements.txt
12
sphinx>=8.1.0
23
furo>=2022.3.4
34
myst-parser>=4.0.0

misc/typeshed_patches/0001-Remove-use-of-LiteralString-in-builtins-13743.patch

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
From e6995c91231e1915eba43a29a22dd4cbfaf9e08e Mon Sep 17 00:00:00 2001
1+
From 805d7fc06a8bee350959512e0908a18a87b7f8c2 Mon Sep 17 00:00:00 2001
22
From: Shantanu <[email protected]>
33
Date: Mon, 26 Sep 2022 12:55:07 -0700
44
Subject: [PATCH] Remove use of LiteralString in builtins (#13743)
@@ -8,7 +8,7 @@ Subject: [PATCH] Remove use of LiteralString in builtins (#13743)
88
1 file changed, 1 insertion(+), 99 deletions(-)
99

1010
diff --git a/mypy/typeshed/stdlib/builtins.pyi b/mypy/typeshed/stdlib/builtins.pyi
11-
index 00728f42d..ea77a730f 100644
11+
index c7ab95482..3e93da36e 100644
1212
--- a/mypy/typeshed/stdlib/builtins.pyi
1313
+++ b/mypy/typeshed/stdlib/builtins.pyi
1414
@@ -63,7 +63,6 @@ from typing import ( # noqa: Y022,UP035
@@ -19,7 +19,7 @@ index 00728f42d..ea77a730f 100644
1919
ParamSpec,
2020
Self,
2121
TypeAlias,
22-
@@ -453,31 +452,16 @@ class str(Sequence[str]):
22+
@@ -468,31 +467,16 @@ class str(Sequence[str]):
2323
def __new__(cls, object: object = ...) -> Self: ...
2424
@overload
2525
def __new__(cls, object: ReadableBuffer, encoding: str = ..., errors: str = ...) -> Self: ...
@@ -51,7 +51,7 @@ index 00728f42d..ea77a730f 100644
5151
def format(self, *args: object, **kwargs: object) -> str: ...
5252
def format_map(self, mapping: _FormatMapMapping, /) -> str: ...
5353
def index(self, sub: str, start: SupportsIndex | None = ..., end: SupportsIndex | None = ..., /) -> int: ...
54-
@@ -493,98 +477,34 @@ class str(Sequence[str]):
54+
@@ -508,98 +492,34 @@ class str(Sequence[str]):
5555
def isspace(self) -> bool: ...
5656
def istitle(self) -> bool: ...
5757
def isupper(self) -> bool: ...
@@ -150,7 +150,7 @@ index 00728f42d..ea77a730f 100644
150150
def zfill(self, width: SupportsIndex, /) -> str: ... # type: ignore[misc]
151151
@staticmethod
152152
@overload
153-
@@ -595,39 +515,21 @@ class str(Sequence[str]):
153+
@@ -610,39 +530,21 @@ class str(Sequence[str]):
154154
@staticmethod
155155
@overload
156156
def maketrans(x: str, y: str, z: str, /) -> dict[int, int | None]: ...
@@ -190,7 +190,7 @@ index 00728f42d..ea77a730f 100644
190190
- @overload
191191
def __rmul__(self, value: SupportsIndex, /) -> str: ... # type: ignore[misc]
192192
def __getnewargs__(self) -> tuple[str]: ...
193-
193+
def __format__(self, format_spec: str, /) -> str: ...
194194
--
195-
2.49.0
195+
2.50.1
196196

mypy/build.py

Lines changed: 66 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from typing_extensions import TypeAlias as _TypeAlias
4141

4242
import mypy.semanal_main
43+
from mypy.cache import Buffer
4344
from mypy.checker import TypeChecker
4445
from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter
4546
from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error
@@ -116,6 +117,8 @@
116117
"abc",
117118
}
118119

120+
# We are careful now, we can increase this in future if safe/useful.
121+
MAX_GC_FREEZE_CYCLES = 1
119122

120123
Graph: _TypeAlias = dict[str, "State"]
121124

@@ -707,6 +710,8 @@ def __init__(
707710
# new file can be processed O(n**2) times. This cache
708711
# avoids most of this redundant work.
709712
self.ast_cache: dict[str, tuple[MypyFile, list[ErrorInfo]]] = {}
713+
# Number of times we used GC optimization hack for fresh SCCs.
714+
self.gc_freeze_cycles = 0
710715

711716
def dump_stats(self) -> None:
712717
if self.options.dump_build_stats:
@@ -1139,6 +1144,17 @@ def read_deps_cache(manager: BuildManager, graph: Graph) -> dict[str, FgDepMeta]
11391144
return module_deps_metas
11401145

11411146

1147+
def _load_ff_file(file: str, manager: BuildManager, log_error: str) -> bytes | None:
1148+
t0 = time.time()
1149+
try:
1150+
data = manager.metastore.read(file)
1151+
except OSError:
1152+
manager.log(log_error + file)
1153+
return None
1154+
manager.add_stats(metastore_read_time=time.time() - t0)
1155+
return data
1156+
1157+
11421158
def _load_json_file(
11431159
file: str, manager: BuildManager, log_success: str, log_error: str
11441160
) -> dict[str, Any] | None:
@@ -1259,7 +1275,11 @@ def get_cache_names(id: str, path: str, options: Options) -> tuple[str, str, str
12591275
deps_json = None
12601276
if options.cache_fine_grained:
12611277
deps_json = prefix + ".deps.json"
1262-
return (prefix + ".meta.json", prefix + ".data.json", deps_json)
1278+
if options.fixed_format_cache:
1279+
data_suffix = ".data.ff"
1280+
else:
1281+
data_suffix = ".data.json"
1282+
return (prefix + ".meta.json", prefix + data_suffix, deps_json)
12631283

12641284

12651285
def find_cache_meta(id: str, path: str, manager: BuildManager) -> CacheMeta | None:
@@ -1559,8 +1579,13 @@ def write_cache(
15591579
tree.path = path
15601580

15611581
# Serialize data and analyze interface
1562-
data = tree.serialize()
1563-
data_bytes = json_dumps(data, manager.options.debug_cache)
1582+
if manager.options.fixed_format_cache:
1583+
data_io = Buffer()
1584+
tree.write(data_io)
1585+
data_bytes = data_io.getvalue()
1586+
else:
1587+
data = tree.serialize()
1588+
data_bytes = json_dumps(data, manager.options.debug_cache)
15641589
interface_hash = hash_digest(data_bytes)
15651590

15661591
plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))
@@ -2085,15 +2110,23 @@ def load_tree(self, temporary: bool = False) -> None:
20852110
self.meta is not None
20862111
), "Internal error: this method must be called only for cached modules"
20872112

2088-
data = _load_json_file(
2089-
self.meta.data_json, self.manager, "Load tree ", "Could not load tree: "
2090-
)
2113+
data: bytes | dict[str, Any] | None
2114+
if self.options.fixed_format_cache:
2115+
data = _load_ff_file(self.meta.data_json, self.manager, "Could not load tree: ")
2116+
else:
2117+
data = _load_json_file(
2118+
self.meta.data_json, self.manager, "Load tree ", "Could not load tree: "
2119+
)
20912120
if data is None:
20922121
return
20932122

20942123
t0 = time.time()
20952124
# TODO: Assert data file wasn't changed.
2096-
self.tree = MypyFile.deserialize(data)
2125+
if isinstance(data, bytes):
2126+
data_io = Buffer(data)
2127+
self.tree = MypyFile.read(data_io)
2128+
else:
2129+
self.tree = MypyFile.deserialize(data)
20972130
t1 = time.time()
20982131
self.manager.add_stats(deserialize_time=t1 - t0)
20992132
if not temporary:
@@ -2481,7 +2514,11 @@ def write_cache(self) -> None:
24812514
):
24822515
if self.options.debug_serialize:
24832516
try:
2484-
self.tree.serialize()
2517+
if self.manager.options.fixed_format_cache:
2518+
data = Buffer()
2519+
self.tree.write(data)
2520+
else:
2521+
self.tree.serialize()
24852522
except Exception:
24862523
print(f"Error serializing {self.id}", file=self.manager.stdout)
24872524
raise # Propagate to display traceback
@@ -3326,8 +3363,29 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
33263363
#
33273364
# TODO: see if it's possible to determine if we need to process only a
33283365
# _subset_ of the past SCCs instead of having to process them all.
3366+
if (
3367+
platform.python_implementation() == "CPython"
3368+
and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES
3369+
):
3370+
# When deserializing cache we create huge amount of new objects, so even
3371+
# with our generous GC thresholds, GC is still doing a lot of pointless
3372+
# work searching for garbage. So, we temporarily disable it when
3373+
# processing fresh SCCs, and then move all the new objects to the oldest
3374+
# generation with the freeze()/unfreeze() trick below. This is arguably
3375+
# a hack, but it gives huge performance wins for large third-party
3376+
# libraries, like torch.
3377+
gc.collect()
3378+
gc.disable()
33293379
for prev_scc in fresh_scc_queue:
33303380
process_fresh_modules(graph, prev_scc, manager)
3381+
if (
3382+
platform.python_implementation() == "CPython"
3383+
and manager.gc_freeze_cycles < MAX_GC_FREEZE_CYCLES
3384+
):
3385+
manager.gc_freeze_cycles += 1
3386+
gc.freeze()
3387+
gc.unfreeze()
3388+
gc.enable()
33313389
fresh_scc_queue = []
33323390
size = len(scc)
33333391
if size == 1:

mypy/cache.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
from __future__ import annotations
2+
3+
from collections.abc import Sequence
4+
from typing import TYPE_CHECKING, Final
5+
6+
from mypy_extensions import u8
7+
8+
try:
9+
from native_internal import (
10+
Buffer as Buffer,
11+
read_bool as read_bool,
12+
read_float as read_float,
13+
read_int as read_int,
14+
read_str as read_str,
15+
read_tag as read_tag,
16+
write_bool as write_bool,
17+
write_float as write_float,
18+
write_int as write_int,
19+
write_str as write_str,
20+
write_tag as write_tag,
21+
)
22+
except ImportError:
23+
# TODO: temporary, remove this after we publish mypy-native on PyPI.
24+
if not TYPE_CHECKING:
25+
26+
class Buffer:
27+
def __init__(self, source: bytes = b"") -> None:
28+
raise NotImplementedError
29+
30+
def getvalue(self) -> bytes:
31+
raise NotImplementedError
32+
33+
def read_int(data: Buffer) -> int:
34+
raise NotImplementedError
35+
36+
def write_int(data: Buffer, value: int) -> None:
37+
raise NotImplementedError
38+
39+
def read_tag(data: Buffer) -> u8:
40+
raise NotImplementedError
41+
42+
def write_tag(data: Buffer, value: u8) -> None:
43+
raise NotImplementedError
44+
45+
def read_str(data: Buffer) -> str:
46+
raise NotImplementedError
47+
48+
def write_str(data: Buffer, value: str) -> None:
49+
raise NotImplementedError
50+
51+
def read_bool(data: Buffer) -> bool:
52+
raise NotImplementedError
53+
54+
def write_bool(data: Buffer, value: bool) -> None:
55+
raise NotImplementedError
56+
57+
def read_float(data: Buffer) -> float:
58+
raise NotImplementedError
59+
60+
def write_float(data: Buffer, value: float) -> None:
61+
raise NotImplementedError
62+
63+
64+
# Always use this type alias to refer to type tags.
65+
Tag = u8
66+
67+
LITERAL_INT: Final[Tag] = 1
68+
LITERAL_STR: Final[Tag] = 2
69+
LITERAL_BOOL: Final[Tag] = 3
70+
LITERAL_FLOAT: Final[Tag] = 4
71+
LITERAL_COMPLEX: Final[Tag] = 5
72+
LITERAL_NONE: Final[Tag] = 6
73+
74+
75+
def read_literal(data: Buffer, tag: Tag) -> int | str | bool | float:
76+
if tag == LITERAL_INT:
77+
return read_int(data)
78+
elif tag == LITERAL_STR:
79+
return read_str(data)
80+
elif tag == LITERAL_BOOL:
81+
return read_bool(data)
82+
elif tag == LITERAL_FLOAT:
83+
return read_float(data)
84+
assert False, f"Unknown literal tag {tag}"
85+
86+
87+
def write_literal(data: Buffer, value: int | str | bool | float | complex | None) -> None:
88+
if isinstance(value, bool):
89+
write_tag(data, LITERAL_BOOL)
90+
write_bool(data, value)
91+
elif isinstance(value, int):
92+
write_tag(data, LITERAL_INT)
93+
write_int(data, value)
94+
elif isinstance(value, str):
95+
write_tag(data, LITERAL_STR)
96+
write_str(data, value)
97+
elif isinstance(value, float):
98+
write_tag(data, LITERAL_FLOAT)
99+
write_float(data, value)
100+
elif isinstance(value, complex):
101+
write_tag(data, LITERAL_COMPLEX)
102+
write_float(data, value.real)
103+
write_float(data, value.imag)
104+
else:
105+
write_tag(data, LITERAL_NONE)
106+
107+
108+
def read_int_opt(data: Buffer) -> int | None:
109+
if read_bool(data):
110+
return read_int(data)
111+
return None
112+
113+
114+
def write_int_opt(data: Buffer, value: int | None) -> None:
115+
if value is not None:
116+
write_bool(data, True)
117+
write_int(data, value)
118+
else:
119+
write_bool(data, False)
120+
121+
122+
def read_str_opt(data: Buffer) -> str | None:
123+
if read_bool(data):
124+
return read_str(data)
125+
return None
126+
127+
128+
def write_str_opt(data: Buffer, value: str | None) -> None:
129+
if value is not None:
130+
write_bool(data, True)
131+
write_str(data, value)
132+
else:
133+
write_bool(data, False)
134+
135+
136+
def read_int_list(data: Buffer) -> list[int]:
137+
size = read_int(data)
138+
return [read_int(data) for _ in range(size)]
139+
140+
141+
def write_int_list(data: Buffer, value: list[int]) -> None:
142+
write_int(data, len(value))
143+
for item in value:
144+
write_int(data, item)
145+
146+
147+
def read_str_list(data: Buffer) -> list[str]:
148+
size = read_int(data)
149+
return [read_str(data) for _ in range(size)]
150+
151+
152+
def write_str_list(data: Buffer, value: Sequence[str]) -> None:
153+
write_int(data, len(value))
154+
for item in value:
155+
write_str(data, item)
156+
157+
158+
def read_str_opt_list(data: Buffer) -> list[str | None]:
159+
size = read_int(data)
160+
return [read_str_opt(data) for _ in range(size)]
161+
162+
163+
def write_str_opt_list(data: Buffer, value: list[str | None]) -> None:
164+
write_int(data, len(value))
165+
for item in value:
166+
write_str_opt(data, item)

0 commit comments

Comments
 (0)