Skip to content

Commit eca043e

Browse files
committed
DOP-3068: Properly handle rst source files with invalid UTF-8
1 parent 87339ec commit eca043e

File tree

4 files changed

+41
-11
lines changed

4 files changed

+41
-11
lines changed

snooty/diagnostics.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,16 +144,15 @@ class GitMergeConflictArtifactFound(Diagnostic):
144144

145145
def __init__(
146146
self,
147-
path: Path,
147+
path: Optional[Path],
148148
start: Union[int, Tuple[int, int]],
149149
end: Union[None, int, Tuple[int, int]] = None,
150150
) -> None:
151151
super().__init__(
152-
f"Git merge conflict artifact found in {str(path)} on line {str(start)}",
152+
f"Git merge conflict artifact found{' in ' + str(path) if path else ''} on line {str(start)}",
153153
start,
154154
end,
155155
)
156-
self.path = path
157156

158157

159158
class DocUtilsParseError(Diagnostic):
@@ -347,12 +346,14 @@ class CannotOpenFile(Diagnostic):
347346

348347
def __init__(
349348
self,
350-
path: PurePath,
349+
path: Optional[PurePath],
351350
reason: str,
352351
start: Union[int, Tuple[int, int]],
353352
end: Union[None, int, Tuple[int, int]] = None,
354353
) -> None:
355-
super().__init__(f"Error opening {str(path)}: {reason}", start, end)
354+
super().__init__(
355+
f"Error opening{' ' + str(path) if path else ''}: {reason}", start, end
356+
)
356357
self.path = path
357358
self.reason = reason
358359

snooty/test_parser.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,15 @@
2121
UnknownTabID,
2222
UnknownTabset,
2323
)
24+
from .n import FileId
2425
from .parser import InlineJSONVisitor, JSONVisitor
2526
from .types import ProjectConfig
26-
from .util_test import ast_to_testing_string, check_ast_testing_string, parse_rst
27+
from .util_test import (
28+
ast_to_testing_string,
29+
check_ast_testing_string,
30+
make_test,
31+
parse_rst,
32+
)
2733

2834
ROOT_PATH = Path("test_data")
2935

@@ -3337,3 +3343,18 @@ def test_field_list_in_list() -> None:
33373343

33383344
page.finish(diagnostics)
33393345
assert len(diagnostics) == 2
3346+
3347+
3348+
def test_invalid_utf8() -> None:
3349+
with make_test(
3350+
{
3351+
Path(
3352+
"source/index.txt"
3353+
): b"""test line 1
3354+
test line 2
3355+
here is an invalid character sequence\x80 oh noooo
3356+
"""
3357+
}
3358+
) as result:
3359+
diagnostics = result.diagnostics[FileId("index.txt")]
3360+
assert [(type(d), d.start[0]) for d in diagnostics] == [(CannotOpenFile, 2)]

snooty/types.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from . import n, specparser
1313
from .diagnostics import (
14+
CannotOpenFile,
1415
ConstantNotDeclared,
1516
Diagnostic,
1617
GitMergeConflictArtifactFound,
@@ -203,15 +204,19 @@ def read(
203204
self, path: Path, text: Optional[str] = None
204205
) -> Tuple[str, List[Diagnostic]]:
205206
if text is None:
206-
text = path.read_text(encoding="utf-8")
207+
try:
208+
text = path.read_text(encoding="utf-8")
209+
except UnicodeDecodeError as err:
210+
error_line = path.read_bytes()[: err.start].count(b"\n")
211+
return ("", [CannotOpenFile(None, str(err), error_line)])
207212

208213
text, diagnostics = self.substitute(text)
209214
match_found = PAT_GIT_MARKER.finditer(text)
210215

211216
if match_found:
212217
for match in match_found:
213218
lineno = text.count("\n", 0, match.start())
214-
diagnostics.append(GitMergeConflictArtifactFound(path, lineno))
219+
diagnostics.append(GitMergeConflictArtifactFound(None, lineno))
215220

216221
return (text, diagnostics)
217222

snooty/util_test.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import xml.etree.ElementTree as ET
88
from collections import defaultdict
99
from pathlib import Path, PurePath
10-
from typing import Any, Dict, Iterator, List, Optional, Tuple
10+
from typing import Any, AnyStr, Dict, Iterator, List, Optional, Tuple
1111
from xml.sax.saxutils import escape
1212

1313
from . import n, rstparser
@@ -204,7 +204,7 @@ def flush(self) -> None:
204204

205205
@contextlib.contextmanager
206206
def make_test(
207-
files: Dict[PurePath, str], name: str = ""
207+
files: Dict[PurePath, AnyStr], name: str = ""
208208
) -> Iterator[BackendTestResults]:
209209
"""Create a temporary test project with the given files."""
210210
need_to_make_snooty_toml = Path("snooty.toml") not in files
@@ -222,7 +222,10 @@ def make_test(
222222
file_path = root.joinpath(filename)
223223
if file_path.parent != root:
224224
os.makedirs(file_path.parent, exist_ok=True)
225-
file_path.write_text(file_text)
225+
if isinstance(file_text, str):
226+
file_path.write_text(file_text)
227+
else:
228+
file_path.write_bytes(file_text)
226229

227230
backend = BackendTestResults()
228231

0 commit comments

Comments
 (0)