Skip to content

Commit d429e84

Browse files
authored
Fix two issues with newer ref resolution (#298)
- Add a cache for remote lookups - Use `.content`, not `.raw`, on the response object `.raw` does not expose the same binary data as `.content` in all cases. e.g. gzipped content is exposed verbatim.
1 parent dcd3c4f commit d429e84

File tree

2 files changed

+32
-5
lines changed

2 files changed

+32
-5
lines changed

src/check_jsonschema/parsers/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import io
34
import json
45
import pathlib
56
import typing as t
@@ -83,10 +84,12 @@ def get(
8384
)
8485

8586
def parse_data_with_path(
86-
self, data: t.BinaryIO, path: pathlib.Path | str, default_filetype: str
87+
self, data: t.BinaryIO | bytes, path: pathlib.Path | str, default_filetype: str
8788
) -> t.Any:
8889
loadfunc = self.get(path, default_filetype)
8990
try:
91+
if isinstance(data, bytes):
92+
data = io.BytesIO(data)
9093
return loadfunc(data)
9194
except LOADING_FAILURE_ERROR_TYPES as e:
9295
raise FailedFileLoadError(f"Failed to parse {path}") from e

src/check_jsonschema/schema_loader/resolver.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ def create_retrieve_callable(
4444
if base_uri is None:
4545
base_uri = retrieval_uri
4646

47+
cache = ResourceCache()
48+
4749
def get_local_file(uri: str) -> t.Any:
4850
path = filename2path(uri)
4951
return parser_set.parse_file(path, "json")
@@ -55,15 +57,37 @@ def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
5557
else:
5658
full_uri = uri
5759

60+
if full_uri in cache._cache:
61+
return cache[uri]
62+
5863
full_uri_scheme = urllib.parse.urlsplit(full_uri).scheme
5964
if full_uri_scheme in ("http", "https"):
6065
data = requests.get(full_uri, stream=True)
61-
parsed_object = parser_set.parse_data_with_path(data.raw, full_uri, "json")
66+
parsed_object = parser_set.parse_data_with_path(
67+
data.content, full_uri, "json"
68+
)
6269
else:
6370
parsed_object = get_local_file(full_uri)
6471

65-
return referencing.Resource.from_contents(
66-
parsed_object, default_specification=DRAFT202012
67-
)
72+
cache[uri] = parsed_object
73+
return cache[uri]
6874

6975
return retrieve_reference
76+
77+
78+
class ResourceCache:
79+
def __init__(self) -> None:
80+
self._cache: t.Dict[str, referencing.Resource[Schema]] = {}
81+
82+
def __setitem__(self, uri: str, data: t.Any) -> referencing.Resource[Schema]:
83+
resource = referencing.Resource.from_contents(
84+
data, default_specification=DRAFT202012
85+
)
86+
self._cache[uri] = resource
87+
return resource
88+
89+
def __getitem__(self, uri: str) -> referencing.Resource[Schema]:
90+
return self._cache[uri]
91+
92+
def __contains__(self, uri: str) -> bool:
93+
return uri in self._cache

0 commit comments

Comments
 (0)