Skip to content

Commit 9481d2a

Browse files
committed
Introduce remote $ref tests and fix URL join logic
URL joining needs to be done between any relative ref paths and the base URI. This was not correct, as revealed by a new testcase. After fixing, this revealed that the local file loading was somewhat indirect in a way that it no longer needs to be, so local file handling in the ref resolver has been refactored as well.
1 parent ff740f8 commit 9481d2a

File tree

2 files changed

+87
-15
lines changed

2 files changed

+87
-15
lines changed

src/check_jsonschema/schema_loader/resolver.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import pathlib
43
import typing as t
54
import urllib.parse
65

@@ -38,25 +37,22 @@ def create_retrieve_callable(
3837
parser_set: ParserSet, schema_uri: str | None
3938
) -> t.Callable[[str], referencing.Resource[Schema]]:
4039
def get_local_file(uri: str) -> t.Any:
41-
path = pathlib.Path(uri)
42-
if not path.is_absolute():
43-
if schema_uri is None:
44-
raise referencing.exceptions.Unretrievable(
45-
f"Cannot retrieve schema reference data for '{uri}' from "
46-
"local filesystem. "
47-
"The path appears relative, but there is no known local base path."
48-
)
49-
schema_path = filename2path(schema_uri)
50-
path = schema_path.parent / path
40+
path = filename2path(uri)
5141
return parser_set.parse_file(path, "json")
5242

5343
def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
5444
scheme = urllib.parse.urlsplit(uri).scheme
55-
if scheme in ("http", "https"):
56-
data = requests.get(uri, stream=True)
57-
parsed_object = parser_set.parse_data_with_path(data.raw, uri, "json")
45+
if scheme == "" and schema_uri is not None:
46+
full_uri = urllib.parse.urljoin(schema_uri, uri)
5847
else:
59-
parsed_object = get_local_file(uri)
48+
full_uri = uri
49+
50+
full_uri_scheme = urllib.parse.urlsplit(full_uri).scheme
51+
if full_uri_scheme in ("http", "https"):
52+
data = requests.get(full_uri, stream=True)
53+
parsed_object = parser_set.parse_data_with_path(data.raw, full_uri, "json")
54+
else:
55+
parsed_object = get_local_file(full_uri)
6056

6157
return referencing.Resource.from_contents(
6258
parsed_object, default_specification=DRAFT202012
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import json
2+
3+
import pytest
4+
import responses
5+
6+
from check_jsonschema import cachedownloader
7+
8+
CASES = {
9+
"case1": {
10+
"main_schema": {
11+
"$schema": "http://json-schema.org/draft-07/schema",
12+
"properties": {
13+
"title": {"$ref": "./title_schema.json"},
14+
},
15+
"additionalProperties": False,
16+
},
17+
"other_schemas": {"title_schema": {"type": "string"}},
18+
"passing_document": {"title": "doc one"},
19+
"failing_document": {"title": 2},
20+
},
21+
"case2": {
22+
"main_schema": {
23+
"$schema": "http://json-schema.org/draft-07/schema",
24+
"type": "object",
25+
"required": ["test"],
26+
"properties": {"test": {"$ref": "./values.json#/$defs/test"}},
27+
},
28+
"other_schemas": {
29+
"values": {
30+
"$schema": "http://json-schema.org/draft-07/schema",
31+
"$defs": {"test": {"type": "string"}},
32+
}
33+
},
34+
"passing_document": {"test": "some data"},
35+
"failing_document": {"test": {"foo": "bar"}},
36+
},
37+
}
38+
39+
40+
@pytest.mark.parametrize("check_passes", (True, False))
41+
@pytest.mark.parametrize("casename", ("case1", "case2"))
42+
def test_remote_ref_resolution_simple_case(
43+
run_line, check_passes, casename, tmp_path, monkeypatch
44+
):
45+
def _fake_compute_default_cache_dir(self):
46+
return str(tmp_path)
47+
48+
monkeypatch.setattr(
49+
cachedownloader.CacheDownloader,
50+
"_compute_default_cache_dir",
51+
_fake_compute_default_cache_dir,
52+
)
53+
54+
main_schema_loc = "https://example.com/main.json"
55+
responses.add("GET", main_schema_loc, json=CASES[casename]["main_schema"])
56+
for name, subschema in CASES[casename]["other_schemas"].items():
57+
other_schema_loc = f"https://example.com/{name}.json"
58+
responses.add("GET", other_schema_loc, json=subschema)
59+
60+
instance_path = tmp_path / "instance.json"
61+
instance_path.write_text(
62+
json.dumps(
63+
CASES[casename]["passing_document"]
64+
if check_passes
65+
else CASES[casename]["failing_document"]
66+
)
67+
)
68+
69+
result = run_line(
70+
["check-jsonschema", "--schemafile", main_schema_loc, str(instance_path)]
71+
)
72+
output = f"\nstdout:\n{result.stdout}\n\nstderr:\n{result.stderr}"
73+
if check_passes:
74+
assert result.exit_code == 0, output
75+
else:
76+
assert result.exit_code == 1, output

0 commit comments

Comments
 (0)