Skip to content

Commit 54a079d

Browse files
committed
Serialize raw errors in cache metas
1 parent f1bb818 commit 54a079d

File tree

4 files changed

+129
-32
lines changed

4 files changed

+129
-32
lines changed

mypy/build.py

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,14 @@
4343
from librt.internal import cache_version
4444

4545
import mypy.semanal_main
46-
from mypy.cache import CACHE_VERSION, CacheMeta, ReadBuffer, WriteBuffer, write_json
46+
from mypy.cache import (
47+
CACHE_VERSION,
48+
CacheMeta,
49+
ReadBuffer,
50+
SerializedError,
51+
WriteBuffer,
52+
write_json,
53+
)
4754
from mypy.checker import TypeChecker
4855
from mypy.defaults import (
4956
WORKER_CONNECTION_TIMEOUT,
@@ -52,7 +59,7 @@
5259
WORKER_START_TIMEOUT,
5360
)
5461
from mypy.error_formatter import OUTPUT_CHOICES, ErrorFormatter
55-
from mypy.errors import CompileError, ErrorInfo, Errors, report_internal_error
62+
from mypy.errors import CompileError, ErrorInfo, Errors, ErrorTuple, report_internal_error
5663
from mypy.graph_utils import prepare_sccs, strongly_connected_components, topsort
5764
from mypy.indirection import TypeIndirectionVisitor
5865
from mypy.ipc import BadStatus, IPCClient, read_status, ready_to_read, receive, send
@@ -2046,7 +2053,7 @@ class State:
20462053
dep_hashes: dict[str, bytes] = {}
20472054

20482055
# List of errors reported for this file last time.
2049-
error_lines: list[str] = []
2056+
error_lines: list[SerializedError] = []
20502057

20512058
# Parent package, its parent, etc.
20522059
ancestors: list[str] | None = None
@@ -3511,9 +3518,13 @@ def find_stale_sccs(
35113518
scc = order_ascc_ex(graph, ascc)
35123519
for id in scc:
35133520
if graph[id].error_lines:
3514-
manager.flush_errors(
3515-
manager.errors.simplify_path(graph[id].xpath), graph[id].error_lines, False
3521+
path = manager.errors.simplify_path(graph[id].xpath)
3522+
formatted = manager.errors.format_messages(
3523+
path,
3524+
deserialize_codes(graph[id].error_lines),
3525+
formatter=manager.error_formatter,
35163526
)
3527+
manager.flush_errors(path, formatted, False)
35173528
fresh_sccs.append(ascc)
35183529
else:
35193530
size = len(ascc.mod_ids)
@@ -3759,21 +3770,24 @@ def process_stale_scc(
37593770
# Flush errors, and write cache in two phases: first data files, then meta files.
37603771
meta_tuples = {}
37613772
errors_by_id = {}
3773+
formatted_by_id = {}
37623774
for id in stale:
37633775
if graph[id].xpath not in manager.errors.ignored_files:
3764-
errors = manager.errors.file_messages(
3765-
graph[id].xpath, formatter=manager.error_formatter
3776+
errors = manager.errors.file_messages(graph[id].xpath)
3777+
formatted = manager.errors.format_messages(
3778+
graph[id].xpath, errors, formatter=manager.error_formatter
37663779
)
3767-
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), errors, False)
3780+
manager.flush_errors(manager.errors.simplify_path(graph[id].xpath), formatted, False)
37683781
errors_by_id[id] = errors
3782+
formatted_by_id[id] = formatted
37693783
meta_tuples[id] = graph[id].write_cache()
37703784
for id in stale:
37713785
meta_tuple = meta_tuples[id]
37723786
if meta_tuple is None:
37733787
continue
37743788
meta, meta_file = meta_tuple
37753789
meta.dep_hashes = [graph[dep].interface_hash for dep in graph[id].dependencies]
3776-
meta.error_lines = errors_by_id.get(id, [])
3790+
meta.error_lines = serialize_codes(errors_by_id.get(id, []))
37773791
write_cache_meta(meta, manager, meta_file)
37783792
manager.done_sccs.add(ascc.id)
37793793
manager.add_stats(
@@ -3785,7 +3799,7 @@ def process_stale_scc(
37853799
)
37863800
scc_result = {}
37873801
for id in scc:
3788-
scc_result[id] = graph[id].interface_hash.hex(), errors_by_id.get(id, [])
3802+
scc_result[id] = graph[id].interface_hash.hex(), formatted_by_id.get(id, [])
37893803
return scc_result
37903804

37913805

@@ -3932,3 +3946,26 @@ def sccs_to_bytes(sccs: list[SCC]) -> bytes:
39323946
buf = WriteBuffer()
39333947
write_json(buf, {"sccs": scc_tuples})
39343948
return buf.getvalue()
3949+
3950+
3951+
def serialize_codes(errs: list[ErrorTuple]) -> list[SerializedError]:
3952+
return [
3953+
(path, line, column, end_line, end_column, severity, message, code.code if code else None)
3954+
for path, line, column, end_line, end_column, severity, message, code in errs
3955+
]
3956+
3957+
3958+
def deserialize_codes(errs: list[SerializedError]) -> list[ErrorTuple]:
3959+
return [
3960+
(
3961+
path,
3962+
line,
3963+
column,
3964+
end_line,
3965+
end_column,
3966+
severity,
3967+
message,
3968+
codes.error_codes.get(code) if code else None,
3969+
)
3970+
for path, line, column, end_line, end_column, severity, message, code in errs
3971+
]

mypy/cache.py

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@
6969
from mypy_extensions import u8
7070

7171
# High-level cache layout format
72-
CACHE_VERSION: Final = 0
72+
CACHE_VERSION: Final = 1
73+
74+
SerializedError: _TypeAlias = tuple[str | None, int, int, int, int, str, str, str | None]
7375

7476

7577
class CacheMeta:
@@ -92,7 +94,7 @@ def __init__(
9294
dep_lines: list[int],
9395
dep_hashes: list[bytes],
9496
interface_hash: bytes,
95-
error_lines: list[str],
97+
error_lines: list[SerializedError],
9698
version_id: str,
9799
ignore_all: bool,
98100
plugin_data: Any,
@@ -157,7 +159,7 @@ def deserialize(cls, meta: dict[str, Any], data_file: str) -> CacheMeta | None:
157159
dep_lines=meta["dep_lines"],
158160
dep_hashes=[bytes.fromhex(dep) for dep in meta["dep_hashes"]],
159161
interface_hash=bytes.fromhex(meta["interface_hash"]),
160-
error_lines=meta["error_lines"],
162+
error_lines=[tuple(err) for err in meta["error_lines"]],
161163
version_id=meta["version_id"],
162164
ignore_all=meta["ignore_all"],
163165
plugin_data=meta["plugin_data"],
@@ -179,7 +181,7 @@ def write(self, data: WriteBuffer) -> None:
179181
write_int_list(data, self.dep_lines)
180182
write_bytes_list(data, self.dep_hashes)
181183
write_bytes(data, self.interface_hash)
182-
write_str_list(data, self.error_lines)
184+
write_errors(data, self.error_lines)
183185
write_str(data, self.version_id)
184186
write_bool(data, self.ignore_all)
185187
# Plugin data may be not a dictionary, so we use
@@ -204,7 +206,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
204206
dep_lines=read_int_list(data),
205207
dep_hashes=read_bytes_list(data),
206208
interface_hash=read_bytes(data),
207-
error_lines=read_str_list(data),
209+
error_lines=read_errors(data),
208210
version_id=read_str(data),
209211
ignore_all=read_bool(data),
210212
plugin_data=read_json_value(data),
@@ -231,6 +233,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
231233
LIST_INT: Final[Tag] = 21
232234
LIST_STR: Final[Tag] = 22
233235
LIST_BYTES: Final[Tag] = 23
236+
TUPLE_GEN: Final[Tag] = 24
234237
DICT_STR_GEN: Final[Tag] = 30
235238

236239
# Misc classes.
@@ -391,12 +394,11 @@ def write_str_opt_list(data: WriteBuffer, value: list[str | None]) -> None:
391394

392395

393396
Value: _TypeAlias = None | int | str | bool
394-
JsonValue: _TypeAlias = Value | list["JsonValue"] | dict[str, "JsonValue"]
395397

396-
# Currently tuples are used by mypyc plugin. They will be normalized to
397-
# JSON lists after a roundtrip.
398-
JsonValueEx: _TypeAlias = (
399-
Value | list["JsonValueEx"] | dict[str, "JsonValueEx"] | tuple["JsonValueEx", ...]
398+
# Our JSON format is somewhat non-standard as we distinguish lists and tuples.
399+
# This is convenient for some internal things, like mypyc plugin and error serialization.
400+
JsonValue: _TypeAlias = (
401+
Value | list["JsonValue"] | dict[str, "JsonValue"] | tuple["JsonValue", ...]
400402
)
401403

402404

@@ -415,13 +417,16 @@ def read_json_value(data: ReadBuffer) -> JsonValue:
415417
if tag == LIST_GEN:
416418
size = read_int_bare(data)
417419
return [read_json_value(data) for _ in range(size)]
420+
if tag == TUPLE_GEN:
421+
size = read_int_bare(data)
422+
return tuple(read_json_value(data) for _ in range(size))
418423
if tag == DICT_STR_GEN:
419424
size = read_int_bare(data)
420425
return {read_str_bare(data): read_json_value(data) for _ in range(size)}
421426
assert False, f"Invalid JSON tag: {tag}"
422427

423428

424-
def write_json_value(data: WriteBuffer, value: JsonValueEx) -> None:
429+
def write_json_value(data: WriteBuffer, value: JsonValue) -> None:
425430
if value is None:
426431
write_tag(data, LITERAL_NONE)
427432
elif isinstance(value, bool):
@@ -432,11 +437,16 @@ def write_json_value(data: WriteBuffer, value: JsonValueEx) -> None:
432437
elif isinstance(value, str):
433438
write_tag(data, LITERAL_STR)
434439
write_str_bare(data, value)
435-
elif isinstance(value, (list, tuple)):
440+
elif isinstance(value, list):
436441
write_tag(data, LIST_GEN)
437442
write_int_bare(data, len(value))
438443
for val in value:
439444
write_json_value(data, val)
445+
elif isinstance(value, tuple):
446+
write_tag(data, TUPLE_GEN)
447+
write_int_bare(data, len(value))
448+
for val in value:
449+
write_json_value(data, val)
440450
elif isinstance(value, dict):
441451
write_tag(data, DICT_STR_GEN)
442452
write_int_bare(data, len(value))
@@ -461,3 +471,38 @@ def write_json(data: WriteBuffer, value: dict[str, Any]) -> None:
461471
for key in sorted(value):
462472
write_str_bare(data, key)
463473
write_json_value(data, value[key])
474+
475+
476+
def write_errors(data: WriteBuffer, errs: list[SerializedError]) -> None:
477+
write_tag(data, LIST_GEN)
478+
write_int_bare(data, len(errs))
479+
for path, line, column, end_line, end_column, severity, message, code in errs:
480+
write_tag(data, TUPLE_GEN)
481+
write_str_opt(data, path)
482+
write_int(data, line)
483+
write_int(data, column)
484+
write_int(data, end_line)
485+
write_int(data, end_column)
486+
write_str(data, severity)
487+
write_str(data, message)
488+
write_str_opt(data, code)
489+
490+
491+
def read_errors(data: ReadBuffer) -> list[SerializedError]:
492+
assert read_tag(data) == LIST_GEN
493+
result = []
494+
for _ in range(read_int_bare(data)):
495+
assert read_tag(data) == TUPLE_GEN
496+
result.append(
497+
(
498+
read_str_opt(data),
499+
read_int(data),
500+
read_int(data),
501+
read_int(data),
502+
read_int(data),
503+
read_str(data),
504+
read_str(data),
505+
read_str_opt(data),
506+
)
507+
)
508+
return result

mypy/errors.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,7 @@ def raise_error(self, use_stdout: bool = True) -> NoReturn:
951951
self.new_messages(), use_stdout=use_stdout, module_with_blocker=self.blocker_module()
952952
)
953953

954-
def format_messages(
954+
def format_messages_default(
955955
self, error_tuples: list[ErrorTuple], source_lines: list[str] | None
956956
) -> list[str]:
957957
"""Return a string list that represents the error messages.
@@ -1009,24 +1009,28 @@ def format_messages(
10091009
a.append(" " * (DEFAULT_SOURCE_OFFSET + column) + marker)
10101010
return a
10111011

1012-
def file_messages(self, path: str, formatter: ErrorFormatter | None = None) -> list[str]:
1013-
"""Return a string list of new error messages from a given file.
1014-
1015-
Use a form suitable for displaying to the user.
1016-
"""
1012+
def file_messages(self, path: str) -> list[ErrorTuple]:
1013+
"""Return an error tuple list of new error messages from a given file."""
10171014
if path not in self.error_info_map:
10181015
return []
10191016

10201017
error_info = self.error_info_map[path]
10211018
error_info = [info for info in error_info if not info.hidden]
10221019
error_info = self.remove_duplicates(self.sort_messages(error_info))
1023-
error_tuples = self.render_messages(error_info)
1020+
return self.render_messages(error_info)
10241021

1022+
def format_messages(
1023+
self, path: str, error_tuples: list[ErrorTuple], formatter: ErrorFormatter | None = None
1024+
) -> list[str]:
1025+
"""Return a string list of new error messages from a given file.
1026+
1027+
Use a form suitable for displaying to the user.
1028+
"""
1029+
self.flushed_files.add(path)
10251030
if formatter is not None:
10261031
errors = create_errors(error_tuples)
10271032
return [formatter.report_error(err) for err in errors]
10281033

1029-
self.flushed_files.add(path)
10301034
source_lines = None
10311035
if self.options.pretty and self.read_source:
10321036
# Find shadow file mapping and read source lines if a shadow file exists for the given path.
@@ -1036,7 +1040,7 @@ def file_messages(self, path: str, formatter: ErrorFormatter | None = None) -> l
10361040
source_lines = self.read_source(mapped_path)
10371041
else:
10381042
source_lines = self.read_source(path)
1039-
return self.format_messages(error_tuples, source_lines)
1043+
return self.format_messages_default(error_tuples, source_lines)
10401044

10411045
def find_shadow_file_mapping(self, path: str) -> str | None:
10421046
"""Return the shadow file path for a given source file path or None."""
@@ -1058,7 +1062,8 @@ def new_messages(self) -> list[str]:
10581062
msgs = []
10591063
for path in self.error_info_map.keys():
10601064
if path not in self.flushed_files:
1061-
msgs.extend(self.file_messages(path))
1065+
error_tuples = self.file_messages(path)
1066+
msgs.extend(self.format_messages(path, error_tuples))
10621067
return msgs
10631068

10641069
def targets(self) -> set[str]:

test-data/unit/check-incremental.test

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7626,3 +7626,13 @@ y = 1
76267626
class C: ...
76277627
[out2]
76287628
tmp/m.py:2: note: Revealed type is "def () -> other.C"
7629+
7630+
[case testOutputFormatterIncremental]
7631+
# flags2: --output json
7632+
def wrong() -> int:
7633+
if wrong():
7634+
return 0
7635+
[out]
7636+
main:2: error: Missing return statement
7637+
[out2]
7638+
{"file": "main", "line": 2, "column": 0, "message": "Missing return statement", "hint": null, "code": "return", "severity": "error"}

0 commit comments

Comments
 (0)