Skip to content

Commit b02f492

Browse files
committed
Apply lazy file reading for instancefiles
Lazy reads are slightly customized to better handle named pipes. The lazy file reads are combined with an early close immediately after the instance loader gets data from a file. As a result, check-jsonschema should only keep one instance file open at a time.
1 parent 462e628 commit b02f492

File tree

5 files changed

+73
-23
lines changed

5 files changed

+73
-23
lines changed

src/check_jsonschema/cli/main_command.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
SchemaLoaderBase,
2121
)
2222
from ..transforms import TRANSFORM_LIBRARY
23-
from .param_types import CommaDelimitedList, ValidatorClassName
23+
from .param_types import CommaDelimitedList, LazyBinaryReadFile, ValidatorClassName
2424
from .parse_result import ParseResult, SchemaLoadingMode
2525

2626
BUILTIN_SCHEMA_NAMES = [f"vendor.{k}" for k in SCHEMA_CATALOG.keys()] + [
@@ -220,7 +220,9 @@ def pretty_helptext_list(values: list[str] | tuple[str, ...]) -> str:
220220
help="Reduce output verbosity",
221221
count=True,
222222
)
223-
@click.argument("instancefiles", required=True, nargs=-1, type=click.File("rb"))
223+
@click.argument(
224+
"instancefiles", required=True, nargs=-1, type=LazyBinaryReadFile("rb", lazy=True)
225+
)
224226
def main(
225227
*,
226228
schemafile: str | None,

src/check_jsonschema/cli/param_types.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
from __future__ import annotations
22

33
import importlib
4+
import os
45
import re
6+
import stat
57
import typing as t
68

79
import click
810
import jsonschema
11+
from click._compat import open_stream
912

1013

1114
class CommaDelimitedList(click.ParamType):
@@ -104,3 +107,45 @@ def convert(
104107
self.fail(f"'{classname}' in '{pkg}' is not a class", param, ctx)
105108

106109
return t.cast(t.Type[jsonschema.protocols.Validator], result)
110+
111+
112+
class _CustomLazyFile(click.utils.LazyFile):
113+
def __init__(
114+
self,
115+
filename: str | os.PathLike[str],
116+
mode: str = "r",
117+
encoding: str | None = None,
118+
errors: str | None = "strict",
119+
atomic: bool = False,
120+
):
121+
self.name: str = os.fspath(filename)
122+
self.mode = mode
123+
self.encoding = encoding
124+
self.errors = errors
125+
self.atomic = atomic
126+
self._f: t.IO[t.Any] | None
127+
self.should_close: bool
128+
129+
if self.name == "-":
130+
self._f, self.should_close = open_stream(filename, mode, encoding, errors)
131+
else:
132+
if "r" in mode and not stat.S_ISFIFO(os.stat(filename).st_mode):
133+
# Open and close the file in case we're opening it for
134+
# reading so that we can catch at least some errors in
135+
# some cases early.
136+
open(filename, mode).close()
137+
self._f = None
138+
self.should_close = True
139+
140+
141+
class LazyBinaryReadFile(click.File):
142+
def convert(
143+
self,
144+
value: str,
145+
param: click.Parameter | None,
146+
ctx: click.Context | None,
147+
) -> t.IO[bytes]:
148+
lf = _CustomLazyFile(value, mode="rb")
149+
if ctx is not None:
150+
ctx.call_on_close(lf.close_intelligently)
151+
return t.cast(t.IO[bytes], lf)

src/check_jsonschema/instance_loader.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,6 @@ def iter_files(self) -> t.Iterator[tuple[str, ParseError | t.Any]]:
4343
data = err
4444
else:
4545
data = self._data_transform(data)
46+
47+
file.close()
4648
yield (name, data)

tests/acceptance/test_special_filetypes.py

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1+
import multiprocessing
12
import os
23
import platform
34
import sys
4-
import threading
55

66
import pytest
77
import responses
@@ -33,6 +33,16 @@ def test_schema_and_instance_in_memfds(run_line_simple):
3333
os.close(instancefd)
3434

3535

36+
# helper (in global scope) for multiprocessing "spawn" to be able to use to launch
37+
# background writers
38+
def _fifo_write(path, data):
39+
fd = os.open(path, os.O_WRONLY)
40+
try:
41+
os.write(fd, data)
42+
finally:
43+
os.close(fd)
44+
45+
3646
@pytest.mark.skipif(os.name != "posix", reason="test requires mkfifo")
3747
@pytest.mark.parametrize("check_succeeds", (True, False))
3848
def test_schema_and_instance_in_fifos(tmp_path, run_line, check_succeeds):
@@ -45,25 +55,17 @@ def test_schema_and_instance_in_fifos(tmp_path, run_line, check_succeeds):
4555
os.mkfifo(schema_path)
4656
os.mkfifo(instance_path)
4757

48-
# execute FIFO writes as blocking writes in background threads
49-
# nonblocking writes fail file existence if there's no reader, so using a FIFO
50-
# requires some level of concurrency
51-
def fifo_write(path, data):
52-
fd = os.open(path, os.O_WRONLY)
53-
try:
54-
os.write(fd, data)
55-
finally:
56-
os.close(fd)
57-
58-
schema_thread = threading.Thread(
59-
target=fifo_write, args=[schema_path, b'{"type": "integer"}']
58+
spawn_ctx = multiprocessing.get_context("spawn")
59+
60+
schema_proc = spawn_ctx.Process(
61+
target=_fifo_write, args=(schema_path, b'{"type": "integer"}')
6062
)
61-
schema_thread.start()
63+
schema_proc.start()
6264
instance_data = b"42" if check_succeeds else b'"foo"'
63-
instance_thread = threading.Thread(
64-
target=fifo_write, args=[instance_path, instance_data]
65+
instance_proc = spawn_ctx.Process(
66+
target=_fifo_write, args=(instance_path, instance_data)
6567
)
66-
instance_thread.start()
68+
instance_proc.start()
6769

6870
try:
6971
result = run_line(
@@ -74,8 +76,8 @@ def fifo_write(path, data):
7476
else:
7577
assert result.exit_code == 1
7678
finally:
77-
schema_thread.join(timeout=0.1)
78-
instance_thread.join(timeout=0.1)
79+
schema_proc.terminate()
80+
instance_proc.terminate()
7981

8082

8183
@pytest.mark.parametrize("check_passes", (True, False))

tests/unit/test_cli_parse.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import io
43
from unittest import mock
54

65
import click
@@ -86,7 +85,7 @@ def test_schemafile_and_instancefile(runner, mock_parse_result, in_tmp_dir, tmp_
8685
assert mock_parse_result.schema_path == "schema.json"
8786
assert isinstance(mock_parse_result.instancefiles, tuple)
8887
for f in mock_parse_result.instancefiles:
89-
assert isinstance(f, (io.BytesIO, io.BufferedReader))
88+
assert isinstance(f, click.utils.LazyFile)
9089
assert tuple(f.name for f in mock_parse_result.instancefiles) == ("foo.json",)
9190

9291

0 commit comments

Comments
 (0)