Skip to content

Commit 2312ec3

Browse files
committed
Split out resolution logic
1 parent d651b8d commit 2312ec3

File tree

5 files changed

+95
-68
lines changed

5 files changed

+95
-68
lines changed

src/hypothesis_jsonschema/_canonicalise.py

Lines changed: 1 addition & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
import json
1717
import math
1818
import re
19-
from copy import deepcopy
20-
from typing import Any, Dict, List, NoReturn, Optional, Tuple, Union
19+
from typing import Any, Dict, List, Optional, Tuple, Union
2120

2221
import jsonschema
2322
from hypothesis.errors import InvalidArgument
@@ -569,69 +568,6 @@ def canonicalish(schema: JSONType) -> Dict[str, Any]:
569568
FALSEY = canonicalish(False)
570569

571570

572-
class LocalResolver(jsonschema.RefResolver):
573-
def resolve_remote(self, uri: str) -> NoReturn:
574-
raise HypothesisRefResolutionError(
575-
f"hypothesis-jsonschema does not fetch remote references (uri={uri!r})"
576-
)
577-
578-
579-
def resolve_all_refs(
580-
schema: Union[bool, Schema], *, resolver: LocalResolver = None
581-
) -> Schema:
582-
"""
583-
Resolve all references in the given schema.
584-
585-
This handles nested definitions, but not recursive definitions.
586-
The latter require special handling to convert to strategies and are much
587-
less common, so we just ignore them (and error out) for now.
588-
"""
589-
if isinstance(schema, bool):
590-
return canonicalish(schema)
591-
assert isinstance(schema, dict), schema
592-
if resolver is None:
593-
resolver = LocalResolver.from_schema(deepcopy(schema))
594-
if not isinstance(resolver, jsonschema.RefResolver):
595-
raise InvalidArgument(
596-
f"resolver={resolver} (type {type(resolver).__name__}) is not a RefResolver"
597-
)
598-
599-
if "$ref" in schema:
600-
s = dict(schema)
601-
ref = s.pop("$ref")
602-
with resolver.resolving(ref) as got:
603-
if s == {}:
604-
return resolve_all_refs(got, resolver=resolver)
605-
m = merged([s, got])
606-
if m is None: # pragma: no cover
607-
msg = f"$ref:{ref!r} had incompatible base schema {s!r}"
608-
raise HypothesisRefResolutionError(msg)
609-
return resolve_all_refs(m, resolver=resolver)
610-
assert "$ref" not in schema
611-
612-
for key in SCHEMA_KEYS:
613-
val = schema.get(key, False)
614-
if isinstance(val, list):
615-
schema[key] = [
616-
resolve_all_refs(v, resolver=resolver) if isinstance(v, dict) else v
617-
for v in val
618-
]
619-
elif isinstance(val, dict):
620-
schema[key] = resolve_all_refs(val, resolver=resolver)
621-
else:
622-
assert isinstance(val, bool)
623-
for key in SCHEMA_OBJECT_KEYS: # values are keys-to-schema-dicts, not schemas
624-
if key in schema:
625-
subschema = schema[key]
626-
assert isinstance(subschema, dict)
627-
schema[key] = {
628-
k: resolve_all_refs(v, resolver=resolver) if isinstance(v, dict) else v
629-
for k, v in subschema.items()
630-
}
631-
assert isinstance(schema, dict)
632-
return schema
633-
634-
635571
def merged(schemas: List[Any]) -> Optional[Schema]:
636572
"""Merge *n* schemas into a single schema, or None if result is invalid.
637573

src/hypothesis_jsonschema/_from_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
get_type,
2727
make_validator,
2828
merged,
29-
resolve_all_refs,
3029
)
3130
from ._encode import JSONType, encode_canonical_json
31+
from ._resolve import resolve_all_refs
3232

3333
JSON_STRATEGY: st.SearchStrategy[JSONType] = st.recursive(
3434
st.none()

src/hypothesis_jsonschema/_resolve.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""
2+
Canonicalisation logic for JSON schemas.
3+
4+
The canonical format that we transform to is not intended for human consumption.
5+
Instead, it prioritises locality of reasoning - for example, we convert oneOf
6+
arrays into an anyOf of allOf (each sub-schema being the original plus not anyOf
7+
the rest). Resolving references and merging subschemas is also really helpful.
8+
9+
All this effort is justified by the huge performance improvements that we get
10+
when converting to Hypothesis strategies. To the extent possible there is only
11+
one way to generate any given value... but much more importantly, we can do
12+
most things by construction instead of by filtering. That's the difference
13+
between "I'd like it to be faster" and "doesn't finish at all".
14+
"""
15+
from copy import deepcopy
16+
from typing import NoReturn, Union
17+
18+
import jsonschema
19+
from hypothesis.errors import InvalidArgument
20+
21+
from ._canonicalise import (
22+
SCHEMA_KEYS,
23+
SCHEMA_OBJECT_KEYS,
24+
HypothesisRefResolutionError,
25+
Schema,
26+
canonicalish,
27+
merged,
28+
)
29+
30+
31+
class LocalResolver(jsonschema.RefResolver):
32+
def resolve_remote(self, uri: str) -> NoReturn:
33+
raise HypothesisRefResolutionError(
34+
f"hypothesis-jsonschema does not fetch remote references (uri={uri!r})"
35+
)
36+
37+
38+
def resolve_all_refs(
39+
schema: Union[bool, Schema], *, resolver: LocalResolver = None
40+
) -> Schema:
41+
"""
42+
Resolve all references in the given schema.
43+
44+
This handles nested definitions, but not recursive definitions.
45+
The latter require special handling to convert to strategies and are much
46+
less common, so we just ignore them (and error out) for now.
47+
"""
48+
if isinstance(schema, bool):
49+
return canonicalish(schema)
50+
assert isinstance(schema, dict), schema
51+
if resolver is None:
52+
resolver = LocalResolver.from_schema(deepcopy(schema))
53+
if not isinstance(resolver, jsonschema.RefResolver):
54+
raise InvalidArgument(
55+
f"resolver={resolver} (type {type(resolver).__name__}) is not a RefResolver"
56+
)
57+
58+
if "$ref" in schema:
59+
s = dict(schema)
60+
ref = s.pop("$ref")
61+
with resolver.resolving(ref) as got:
62+
m = merged([s, resolve_all_refs(got, resolver=resolver)])
63+
if m is None: # pragma: no cover
64+
msg = f"$ref:{ref!r} had incompatible base schema {s!r}"
65+
raise HypothesisRefResolutionError(msg)
66+
assert "$ref" not in m
67+
return m
68+
assert "$ref" not in schema
69+
70+
for key in SCHEMA_KEYS:
71+
val = schema.get(key, False)
72+
if isinstance(val, list):
73+
schema[key] = [
74+
resolve_all_refs(v, resolver=resolver) if isinstance(v, dict) else v
75+
for v in val
76+
]
77+
elif isinstance(val, dict):
78+
schema[key] = resolve_all_refs(val, resolver=resolver)
79+
else:
80+
assert isinstance(val, bool)
81+
for key in SCHEMA_OBJECT_KEYS: # values are keys-to-schema-dicts, not schemas
82+
if key in schema:
83+
subschema = schema[key]
84+
assert isinstance(subschema, dict)
85+
schema[key] = {
86+
k: resolve_all_refs(v, resolver=resolver) if isinstance(v, dict) else v
87+
for k, v in subschema.items()
88+
}
89+
assert isinstance(schema, dict)
90+
assert "$ref" not in schema
91+
return schema

tests/test_canonicalise.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
make_validator,
1515
merged,
1616
next_up,
17-
resolve_all_refs,
1817
)
1918
from hypothesis_jsonschema._from_schema import JSON_STRATEGY
19+
from hypothesis_jsonschema._resolve import resolve_all_refs
2020

2121

2222
def is_valid(instance, schema):

tests/test_from_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424
from hypothesis_jsonschema._canonicalise import (
2525
HypothesisRefResolutionError,
2626
canonicalish,
27-
resolve_all_refs,
2827
)
2928
from hypothesis_jsonschema._from_schema import from_schema, rfc3339
29+
from hypothesis_jsonschema._resolve import resolve_all_refs
3030

3131
# We use this as a placeholder for all schemas which resolve to nothing()
3232
# but do not canonicalise to FALSEY

0 commit comments

Comments
 (0)