Skip to content

Commit dcb4d69

Browse files
authored
Use 1 byte per type/symbol tag (#19735)
This is a small incremental improvement for fixed format cache. I am adding a dedicated write/read functions for tags (i.e. integers in 0-255 range). I propose to exclusively use these functions for type tags (hence the name), and still use regular `write_int()`/`read_int()` for integers that are "accidentally small" (like argument kinds etc). In a separate PR I will change regular `int` format to be more progressive (e.g. only use 1 byte if an integer happens to be small). I also change the terminology from "marker" to "tag", as this is a more common name for this concept. Note we can probably use `mypy_extensions.u8` for type tags. If there is a desire for this, I can switch to it (either in this or a separate PR).
1 parent d1dffe2 commit dcb4d69

File tree

10 files changed

+262
-138
lines changed

10 files changed

+262
-138
lines changed

mypy/cache.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010
read_float as read_float,
1111
read_int as read_int,
1212
read_str as read_str,
13+
read_tag as read_tag,
1314
write_bool as write_bool,
1415
write_float as write_float,
1516
write_int as write_int,
1617
write_str as write_str,
18+
write_tag as write_tag,
1719
)
1820
except ImportError:
1921
# TODO: temporary, remove this after we publish mypy-native on PyPI.
@@ -32,6 +34,12 @@ def read_int(data: Buffer) -> int:
3234
def write_int(data: Buffer, value: int) -> None:
3335
raise NotImplementedError
3436

37+
def read_tag(data: Buffer) -> int:
38+
raise NotImplementedError
39+
40+
def write_tag(data: Buffer, value: int) -> None:
41+
raise NotImplementedError
42+
3543
def read_str(data: Buffer) -> str:
3644
raise NotImplementedError
3745

@@ -59,37 +67,37 @@ def write_float(data: Buffer, value: float) -> None:
5967
LITERAL_NONE: Final = 6
6068

6169

62-
def read_literal(data: Buffer, marker: int) -> int | str | bool | float:
63-
if marker == LITERAL_INT:
70+
def read_literal(data: Buffer, tag: int) -> int | str | bool | float:
71+
if tag == LITERAL_INT:
6472
return read_int(data)
65-
elif marker == LITERAL_STR:
73+
elif tag == LITERAL_STR:
6674
return read_str(data)
67-
elif marker == LITERAL_BOOL:
75+
elif tag == LITERAL_BOOL:
6876
return read_bool(data)
69-
elif marker == LITERAL_FLOAT:
77+
elif tag == LITERAL_FLOAT:
7078
return read_float(data)
71-
assert False, f"Unknown literal marker {marker}"
79+
assert False, f"Unknown literal tag {tag}"
7280

7381

7482
def write_literal(data: Buffer, value: int | str | bool | float | complex | None) -> None:
7583
if isinstance(value, bool):
76-
write_int(data, LITERAL_BOOL)
84+
write_tag(data, LITERAL_BOOL)
7785
write_bool(data, value)
7886
elif isinstance(value, int):
79-
write_int(data, LITERAL_INT)
87+
write_tag(data, LITERAL_INT)
8088
write_int(data, value)
8189
elif isinstance(value, str):
82-
write_int(data, LITERAL_STR)
90+
write_tag(data, LITERAL_STR)
8391
write_str(data, value)
8492
elif isinstance(value, float):
85-
write_int(data, LITERAL_FLOAT)
93+
write_tag(data, LITERAL_FLOAT)
8694
write_float(data, value)
8795
elif isinstance(value, complex):
88-
write_int(data, LITERAL_COMPLEX)
96+
write_tag(data, LITERAL_COMPLEX)
8997
write_float(data, value.real)
9098
write_float(data, value.imag)
9199
else:
92-
write_int(data, LITERAL_NONE)
100+
write_tag(data, LITERAL_NONE)
93101

94102

95103
def read_int_opt(data: Buffer) -> int | None:

mypy/nodes.py

Lines changed: 43 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
read_str_list,
2929
read_str_opt,
3030
read_str_opt_list,
31+
read_tag,
3132
write_bool,
3233
write_int,
3334
write_int_list,
@@ -37,6 +38,7 @@
3738
write_str_list,
3839
write_str_opt,
3940
write_str_opt_list,
41+
write_tag,
4042
)
4143
from mypy.options import Options
4244
from mypy.util import is_sunder, is_typeshed_file, short_type
@@ -417,7 +419,7 @@ def deserialize(cls, data: JsonDict) -> MypyFile:
417419
return tree
418420

419421
def write(self, data: Buffer) -> None:
420-
write_int(data, MYPY_FILE)
422+
write_tag(data, MYPY_FILE)
421423
write_str(data, self._fullname)
422424
self.names.write(data, self._fullname)
423425
write_bool(data, self.is_stub)
@@ -427,7 +429,7 @@ def write(self, data: Buffer) -> None:
427429

428430
@classmethod
429431
def read(cls, data: Buffer) -> MypyFile:
430-
assert read_int(data) == MYPY_FILE
432+
assert read_tag(data) == MYPY_FILE
431433
tree = MypyFile([], [])
432434
tree._fullname = read_str(data)
433435
tree.names = SymbolTable.read(data)
@@ -711,7 +713,7 @@ def deserialize(cls, data: JsonDict) -> OverloadedFuncDef:
711713
return res
712714

713715
def write(self, data: Buffer) -> None:
714-
write_int(data, OVERLOADED_FUNC_DEF)
716+
write_tag(data, OVERLOADED_FUNC_DEF)
715717
write_int(data, len(self.items))
716718
for item in self.items:
717719
item.write(data)
@@ -1022,7 +1024,7 @@ def deserialize(cls, data: JsonDict) -> FuncDef:
10221024
return ret
10231025

10241026
def write(self, data: Buffer) -> None:
1025-
write_int(data, FUNC_DEF)
1027+
write_tag(data, FUNC_DEF)
10261028
write_str(data, self._name)
10271029
mypy.types.write_type_opt(data, self.type)
10281030
write_str(data, self._fullname)
@@ -1134,16 +1136,16 @@ def deserialize(cls, data: JsonDict) -> Decorator:
11341136
return dec
11351137

11361138
def write(self, data: Buffer) -> None:
1137-
write_int(data, DECORATOR)
1139+
write_tag(data, DECORATOR)
11381140
self.func.write(data)
11391141
self.var.write(data)
11401142
write_bool(data, self.is_overload)
11411143

11421144
@classmethod
11431145
def read(cls, data: Buffer) -> Decorator:
1144-
assert read_int(data) == FUNC_DEF
1146+
assert read_tag(data) == FUNC_DEF
11451147
func = FuncDef.read(data)
1146-
assert read_int(data) == VAR
1148+
assert read_tag(data) == VAR
11471149
var = Var.read(data)
11481150
dec = Decorator(func, [], var)
11491151
dec.is_overload = read_bool(data)
@@ -1326,7 +1328,7 @@ def deserialize(cls, data: JsonDict) -> Var:
13261328
return v
13271329

13281330
def write(self, data: Buffer) -> None:
1329-
write_int(data, VAR)
1331+
write_tag(data, VAR)
13301332
write_str(data, self._name)
13311333
mypy.types.write_type_opt(data, self.type)
13321334
mypy.types.write_type_opt(data, self.setter_type)
@@ -1341,13 +1343,13 @@ def read(cls, data: Buffer) -> Var:
13411343
v = Var(name, typ)
13421344
setter_type: mypy.types.CallableType | None = None
13431345
if read_bool(data):
1344-
assert read_int(data) == mypy.types.CALLABLE_TYPE
1346+
assert read_tag(data) == mypy.types.CALLABLE_TYPE
13451347
setter_type = mypy.types.CallableType.read(data)
13461348
v.setter_type = setter_type
13471349
v.is_ready = False # Override True default set in __init__
13481350
v._fullname = read_str(data)
13491351
read_flags(data, v, VAR_FLAGS)
1350-
marker = read_int(data)
1352+
marker = read_tag(data)
13511353
if marker == LITERAL_COMPLEX:
13521354
v.final_value = complex(read_float(data), read_float(data))
13531355
elif marker != LITERAL_NONE:
@@ -1465,7 +1467,7 @@ def deserialize(cls, data: JsonDict) -> ClassDef:
14651467
return res
14661468

14671469
def write(self, data: Buffer) -> None:
1468-
write_int(data, CLASS_DEF)
1470+
write_tag(data, CLASS_DEF)
14691471
write_str(data, self.name)
14701472
mypy.types.write_type_list(data, self.type_vars)
14711473
write_str(data, self.fullname)
@@ -2898,7 +2900,7 @@ def deserialize(cls, data: JsonDict) -> TypeVarExpr:
28982900
)
28992901

29002902
def write(self, data: Buffer) -> None:
2901-
write_int(data, TYPE_VAR_EXPR)
2903+
write_tag(data, TYPE_VAR_EXPR)
29022904
write_str(data, self._name)
29032905
write_str(data, self._fullname)
29042906
mypy.types.write_type_list(data, self.values)
@@ -2948,7 +2950,7 @@ def deserialize(cls, data: JsonDict) -> ParamSpecExpr:
29482950
)
29492951

29502952
def write(self, data: Buffer) -> None:
2951-
write_int(data, PARAM_SPEC_EXPR)
2953+
write_tag(data, PARAM_SPEC_EXPR)
29522954
write_str(data, self._name)
29532955
write_str(data, self._fullname)
29542956
self.upper_bound.write(data)
@@ -3016,7 +3018,7 @@ def deserialize(cls, data: JsonDict) -> TypeVarTupleExpr:
30163018
)
30173019

30183020
def write(self, data: Buffer) -> None:
3019-
write_int(data, TYPE_VAR_TUPLE_EXPR)
3021+
write_tag(data, TYPE_VAR_TUPLE_EXPR)
30203022
self.tuple_fallback.write(data)
30213023
write_str(data, self._name)
30223024
write_str(data, self._fullname)
@@ -3026,7 +3028,7 @@ def write(self, data: Buffer) -> None:
30263028

30273029
@classmethod
30283030
def read(cls, data: Buffer) -> TypeVarTupleExpr:
3029-
assert read_int(data) == mypy.types.INSTANCE
3031+
assert read_tag(data) == mypy.types.INSTANCE
30303032
fallback = mypy.types.Instance.read(data)
30313033
return TypeVarTupleExpr(
30323034
read_str(data),
@@ -3908,7 +3910,7 @@ def deserialize(cls, data: JsonDict) -> TypeInfo:
39083910
return ti
39093911

39103912
def write(self, data: Buffer) -> None:
3911-
write_int(data, TYPE_INFO)
3913+
write_tag(data, TYPE_INFO)
39123914
self.names.write(data, self.fullname)
39133915
self.defn.write(data)
39143916
write_str(data, self.module_name)
@@ -3944,7 +3946,7 @@ def write(self, data: Buffer) -> None:
39443946
@classmethod
39453947
def read(cls, data: Buffer) -> TypeInfo:
39463948
names = SymbolTable.read(data)
3947-
assert read_int(data) == CLASS_DEF
3949+
assert read_tag(data) == CLASS_DEF
39483950
defn = ClassDef.read(data)
39493951
module_name = read_str(data)
39503952
ti = TypeInfo(names, defn, module_name)
@@ -3954,10 +3956,9 @@ def read(cls, data: Buffer) -> TypeInfo:
39543956
ti.abstract_attributes = list(zip(attrs, statuses))
39553957
ti.type_vars = read_str_list(data)
39563958
ti.has_param_spec_type = read_bool(data)
3957-
num_bases = read_int(data)
39583959
ti.bases = []
3959-
for _ in range(num_bases):
3960-
assert read_int(data) == mypy.types.INSTANCE
3960+
for _ in range(read_int(data)):
3961+
assert read_tag(data) == mypy.types.INSTANCE
39613962
ti.bases.append(mypy.types.Instance.read(data))
39623963
# NOTE: ti.mro will be set in the fixup phase based on these
39633964
# names. The reason we need to store the mro instead of just
@@ -3972,19 +3973,19 @@ def read(cls, data: Buffer) -> TypeInfo:
39723973
ti._mro_refs = read_str_list(data)
39733974
ti._promote = cast(list[mypy.types.ProperType], mypy.types.read_type_list(data))
39743975
if read_bool(data):
3975-
assert read_int(data) == mypy.types.INSTANCE
3976+
assert read_tag(data) == mypy.types.INSTANCE
39763977
ti.alt_promote = mypy.types.Instance.read(data)
39773978
if read_bool(data):
3978-
assert read_int(data) == mypy.types.INSTANCE
3979+
assert read_tag(data) == mypy.types.INSTANCE
39793980
ti.declared_metaclass = mypy.types.Instance.read(data)
39803981
if read_bool(data):
3981-
assert read_int(data) == mypy.types.INSTANCE
3982+
assert read_tag(data) == mypy.types.INSTANCE
39823983
ti.metaclass_type = mypy.types.Instance.read(data)
39833984
if read_bool(data):
3984-
assert read_int(data) == mypy.types.TUPLE_TYPE
3985+
assert read_tag(data) == mypy.types.TUPLE_TYPE
39853986
ti.tuple_type = mypy.types.TupleType.read(data)
39863987
if read_bool(data):
3987-
assert read_int(data) == mypy.types.TYPED_DICT_TYPE
3988+
assert read_tag(data) == mypy.types.TYPED_DICT_TYPE
39883989
ti.typeddict_type = mypy.types.TypedDictType.read(data)
39893990
read_flags(data, ti, TypeInfo.FLAGS)
39903991
metadata = read_str(data)
@@ -3994,7 +3995,7 @@ def read(cls, data: Buffer) -> TypeInfo:
39943995
ti.slots = set(read_str_list(data))
39953996
ti.deletable_attributes = read_str_list(data)
39963997
if read_bool(data):
3997-
assert read_int(data) == mypy.types.TYPE_VAR_TYPE
3998+
assert read_tag(data) == mypy.types.TYPE_VAR_TYPE
39983999
ti.self_type = mypy.types.TypeVarType.read(data)
39994000
if read_bool(data):
40004001
ti.dataclass_transform_spec = DataclassTransformSpec.read(data)
@@ -4270,7 +4271,7 @@ def deserialize(cls, data: JsonDict) -> TypeAlias:
42704271
)
42714272

42724273
def write(self, data: Buffer) -> None:
4273-
write_int(data, TYPE_ALIAS)
4274+
write_tag(data, TYPE_ALIAS)
42744275
write_str(data, self._fullname)
42754276
self.target.write(data)
42764277
mypy.types.write_type_list(data, self.alias_tvars)
@@ -4890,33 +4891,33 @@ def local_definitions(
48904891

48914892

48924893
def read_symbol(data: Buffer) -> mypy.nodes.SymbolNode:
4893-
marker = read_int(data)
4894+
tag = read_tag(data)
48944895
# The branches here are ordered manually by type "popularity".
4895-
if marker == VAR:
4896+
if tag == VAR:
48964897
return mypy.nodes.Var.read(data)
4897-
if marker == FUNC_DEF:
4898+
if tag == FUNC_DEF:
48984899
return mypy.nodes.FuncDef.read(data)
4899-
if marker == DECORATOR:
4900+
if tag == DECORATOR:
49004901
return mypy.nodes.Decorator.read(data)
4901-
if marker == TYPE_INFO:
4902+
if tag == TYPE_INFO:
49024903
return mypy.nodes.TypeInfo.read(data)
4903-
if marker == OVERLOADED_FUNC_DEF:
4904+
if tag == OVERLOADED_FUNC_DEF:
49044905
return mypy.nodes.OverloadedFuncDef.read(data)
4905-
if marker == TYPE_VAR_EXPR:
4906+
if tag == TYPE_VAR_EXPR:
49064907
return mypy.nodes.TypeVarExpr.read(data)
4907-
if marker == TYPE_ALIAS:
4908+
if tag == TYPE_ALIAS:
49084909
return mypy.nodes.TypeAlias.read(data)
4909-
if marker == PARAM_SPEC_EXPR:
4910+
if tag == PARAM_SPEC_EXPR:
49104911
return mypy.nodes.ParamSpecExpr.read(data)
4911-
if marker == TYPE_VAR_TUPLE_EXPR:
4912+
if tag == TYPE_VAR_TUPLE_EXPR:
49124913
return mypy.nodes.TypeVarTupleExpr.read(data)
4913-
assert False, f"Unknown symbol marker {marker}"
4914+
assert False, f"Unknown symbol tag {tag}"
49144915

49154916

49164917
def read_overload_part(data: Buffer) -> OverloadPart:
4917-
marker = read_int(data)
4918-
if marker == DECORATOR:
4918+
tag = read_tag(data)
4919+
if tag == DECORATOR:
49194920
return Decorator.read(data)
4920-
if marker == FUNC_DEF:
4921+
if tag == FUNC_DEF:
49214922
return FuncDef.read(data)
4922-
assert False, f"Invalid marker for an OverloadPart {marker}"
4923+
assert False, f"Invalid tag for an OverloadPart {tag}"

0 commit comments

Comments
 (0)