Skip to content

Commit 18691f2

Browse files
committed
PoC recursive schemas
1 parent 7fa9377 commit 18691f2

File tree

3 files changed

+101
-27
lines changed

3 files changed

+101
-27
lines changed

src/hypothesis_jsonschema/_canonicalise.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,12 @@ def resolve_all_refs(
621621
f"resolver={resolver} (type {type(resolver).__name__}) is not a RefResolver"
622622
)
623623

624-
if "$ref" in schema:
624+
def is_recursive(reference: str) -> bool:
625+
return reference == "#" or resolver.resolution_scope == reference # type: ignore
626+
627+
# To avoid infinite recursion, we skip all recursive definitions, and such references will be processed later
628+
# A definition is recursive if it contains a reference to itself or one of its ancestors.
629+
if "$ref" in schema and not is_recursive(schema["$ref"]): # type: ignore
625630
s = dict(schema)
626631
ref = s.pop("$ref")
627632
with resolver.resolving(ref) as got:
@@ -632,7 +637,6 @@ def resolve_all_refs(
632637
msg = f"$ref:{ref!r} had incompatible base schema {s!r}"
633638
raise HypothesisRefResolutionError(msg)
634639
return resolve_all_refs(m, resolver=resolver)
635-
assert "$ref" not in schema
636640

637641
for key in SCHEMA_KEYS:
638642
val = schema.get(key, False)

src/hypothesis_jsonschema/_from_schema.py

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import math
55
import operator
66
import re
7+
from copy import deepcopy
78
from fractions import Fraction
89
from functools import partial
910
from typing import Any, Callable, Dict, List, NoReturn, Optional, Set, Union
@@ -19,6 +20,7 @@
1920
TYPE_STRINGS,
2021
HypothesisRefResolutionError,
2122
JSONType,
23+
LocalResolver,
2224
Schema,
2325
canonicalish,
2426
encode_canonical_json,
@@ -43,11 +45,13 @@
4345

4446

4547
def merged_as_strategies(
46-
schemas: List[Schema], custom_formats: Optional[Dict[str, st.SearchStrategy[str]]]
48+
schemas: List[Schema],
49+
custom_formats: Optional[Dict[str, st.SearchStrategy[str]]],
50+
resolver: LocalResolver,
4751
) -> st.SearchStrategy[JSONType]:
4852
assert schemas, "internal error: must pass at least one schema to merge"
4953
if len(schemas) == 1:
50-
return from_schema(schemas[0], custom_formats=custom_formats)
54+
return from_schema(schemas[0], custom_formats=custom_formats, resolver=resolver)
5155
# Try to merge combinations of strategies.
5256
strats = []
5357
combined: Set[str] = set()
@@ -61,7 +65,7 @@ def merged_as_strategies(
6165
if s is not None and s != FALSEY:
6266
validators = [make_validator(s) for s in schemas]
6367
strats.append(
64-
from_schema(s, custom_formats=custom_formats).filter(
68+
from_schema(s, custom_formats=custom_formats, resolver=resolver).filter(
6569
lambda obj: all(v.is_valid(obj) for v in validators)
6670
)
6771
)
@@ -73,14 +77,15 @@ def from_schema(
7377
schema: Union[bool, Schema],
7478
*,
7579
custom_formats: Dict[str, st.SearchStrategy[str]] = None,
80+
resolver: Optional[LocalResolver] = None,
7681
) -> st.SearchStrategy[JSONType]:
7782
"""Take a JSON schema and return a strategy for allowed JSON objects.
7883
7984
Schema reuse with "definitions" and "$ref" is not yet supported, but
8085
everything else in drafts 04, 05, and 07 is fully tested and working.
8186
"""
8287
try:
83-
return __from_schema(schema, custom_formats=custom_formats)
88+
return __from_schema(schema, custom_formats=custom_formats, resolver=resolver)
8489
except Exception as err:
8590
error = err
8691

@@ -113,9 +118,10 @@ def __from_schema(
113118
schema: Union[bool, Schema],
114119
*,
115120
custom_formats: Dict[str, st.SearchStrategy[str]] = None,
121+
resolver: Optional[LocalResolver] = None,
116122
) -> st.SearchStrategy[JSONType]:
117123
try:
118-
schema = resolve_all_refs(schema)
124+
schema = resolve_all_refs(schema, resolver=resolver)
119125
except RecursionError:
120126
raise HypothesisRefResolutionError(
121127
f"Could not resolve recursive references in schema={schema!r}"
@@ -142,6 +148,9 @@ def __from_schema(
142148
}
143149
custom_formats[_FORMATS_TOKEN] = None # type: ignore
144150

151+
if resolver is None:
152+
resolver = LocalResolver.from_schema(deepcopy(schema))
153+
145154
schema = canonicalish(schema)
146155
# Boolean objects are special schemata; False rejects all and True accepts all.
147156
if schema == FALSEY:
@@ -156,32 +165,44 @@ def __from_schema(
156165

157166
assert isinstance(schema, dict)
158167
# Now we handle as many validation keywords as we can...
168+
if "$ref" in schema:
169+
ref = schema["$ref"]
170+
171+
def _recurse() -> st.SearchStrategy[JSONType]:
172+
_, resolved = resolver.resolve(ref) # type: ignore
173+
return from_schema(
174+
resolved, custom_formats=custom_formats, resolver=resolver
175+
)
176+
177+
return st.deferred(_recurse)
159178
# Applying subschemata with boolean logic
160179
if "not" in schema:
161180
not_ = schema.pop("not")
162181
assert isinstance(not_, dict)
163182
validator = make_validator(not_).is_valid
164-
return from_schema(schema, custom_formats=custom_formats).filter(
165-
lambda v: not validator(v)
166-
)
183+
return from_schema(
184+
schema, custom_formats=custom_formats, resolver=resolver
185+
).filter(lambda v: not validator(v))
167186
if "anyOf" in schema:
168187
tmp = schema.copy()
169188
ao = tmp.pop("anyOf")
170189
assert isinstance(ao, list)
171-
return st.one_of([merged_as_strategies([tmp, s], custom_formats) for s in ao])
190+
return st.one_of(
191+
[merged_as_strategies([tmp, s], custom_formats, resolver) for s in ao]
192+
)
172193
if "allOf" in schema:
173194
tmp = schema.copy()
174195
ao = tmp.pop("allOf")
175196
assert isinstance(ao, list)
176-
return merged_as_strategies([tmp] + ao, custom_formats)
197+
return merged_as_strategies([tmp] + ao, custom_formats, resolver)
177198
if "oneOf" in schema:
178199
tmp = schema.copy()
179200
oo = tmp.pop("oneOf")
180201
assert isinstance(oo, list)
181202
schemas = [merged([tmp, s]) for s in oo]
182203
return st.one_of(
183204
[
184-
from_schema(s, custom_formats=custom_formats)
205+
from_schema(s, custom_formats=custom_formats, resolver=resolver)
185206
for s in schemas
186207
if s is not None
187208
]
@@ -199,8 +220,8 @@ def __from_schema(
199220
"number": number_schema,
200221
"integer": integer_schema,
201222
"string": partial(string_schema, custom_formats),
202-
"array": partial(array_schema, custom_formats),
203-
"object": partial(object_schema, custom_formats),
223+
"array": partial(array_schema, custom_formats, resolver),
224+
"object": partial(object_schema, custom_formats, resolver),
204225
}
205226
assert set(map_) == set(TYPE_STRINGS)
206227
return st.one_of([map_[t](schema) for t in get_type(schema)])
@@ -423,10 +444,14 @@ def string_schema(
423444

424445

425446
def array_schema(
426-
custom_formats: Dict[str, st.SearchStrategy[str]], schema: dict
447+
custom_formats: Dict[str, st.SearchStrategy[str]],
448+
resolver: LocalResolver,
449+
schema: dict,
427450
) -> st.SearchStrategy[List[JSONType]]:
428451
"""Handle schemata for arrays."""
429-
_from_schema_ = partial(from_schema, custom_formats=custom_formats)
452+
_from_schema_ = partial(
453+
from_schema, custom_formats=custom_formats, resolver=resolver
454+
)
430455
items = schema.get("items", {})
431456
additional_items = schema.get("additionalItems", {})
432457
min_size = schema.get("minItems", 0)
@@ -437,14 +462,16 @@ def array_schema(
437462
if max_size is not None:
438463
max_size -= len(items)
439464

440-
items_strats = [_from_schema_(s) for s in items]
465+
items_strats = [_from_schema_(s) for s in deepcopy(items)]
441466
additional_items_strat = _from_schema_(additional_items)
442467

443468
# If we have a contains schema to satisfy, we try generating from it when
444469
# allowed to do so. We'll skip the None (unmergable / no contains) cases
445470
# below, and let Hypothesis ignore the FALSEY cases for us.
446471
if "contains" in schema:
447-
for i, mrgd in enumerate(merged([schema["contains"], s]) for s in items):
472+
for i, mrgd in enumerate(
473+
merged([schema["contains"], s]) for s in deepcopy(items)
474+
):
448475
if mrgd is not None:
449476
items_strats[i] |= _from_schema_(mrgd)
450477
contains_additional = merged([schema["contains"], additional_items])
@@ -481,10 +508,10 @@ def not_seen(elem: JSONType) -> bool:
481508
st.lists(additional_items_strat, min_size=min_size, max_size=max_size),
482509
)
483510
else:
484-
items_strat = _from_schema_(items)
511+
items_strat = _from_schema_(deepcopy(items))
485512
if "contains" in schema:
486513
contains_strat = _from_schema_(schema["contains"])
487-
if merged([items, schema["contains"]]) != schema["contains"]:
514+
if merged([deepcopy(items), schema["contains"]]) != schema["contains"]:
488515
# We only need this filter if we couldn't merge items in when
489516
# canonicalising. Note that for list-items, above, we just skip
490517
# the mixed generation in this case (because they tend to be
@@ -505,7 +532,9 @@ def not_seen(elem: JSONType) -> bool:
505532

506533

507534
def object_schema(
508-
custom_formats: Dict[str, st.SearchStrategy[str]], schema: dict
535+
custom_formats: Dict[str, st.SearchStrategy[str]],
536+
resolver: LocalResolver,
537+
schema: dict,
509538
) -> st.SearchStrategy[Dict[str, JSONType]]:
510539
"""Handle a manageable subset of possible schemata for objects."""
511540
required = schema.get("required", []) # required keys
@@ -519,7 +548,7 @@ def object_schema(
519548
return st.builds(dict)
520549
names["type"] = "string"
521550

522-
properties = schema.get("properties", {}) # exact name: value schema
551+
properties = deepcopy(schema.get("properties", {})) # exact name: value schema
523552
patterns = schema.get("patternProperties", {}) # regex for names: value schema
524553
# schema for other values; handled specially if nothing matches
525554
additional = schema.get("additionalProperties", {})
@@ -534,7 +563,7 @@ def object_schema(
534563
st.sampled_from(sorted(dep_names) + sorted(dep_schemas) + sorted(properties))
535564
if (dep_names or dep_schemas or properties)
536565
else st.nothing(),
537-
from_schema(names, custom_formats=custom_formats)
566+
from_schema(names, custom_formats=custom_formats, resolver=resolver)
538567
if additional_allowed
539568
else st.nothing(),
540569
st.one_of([st.from_regex(p) for p in sorted(patterns)]),
@@ -580,12 +609,20 @@ def from_object_schema(draw: Any) -> Any:
580609
if re.search(rgx, string=key) is not None
581610
]
582611
if key in properties:
583-
pattern_schemas.insert(0, properties[key])
612+
pattern_schemas.insert(0, deepcopy(properties[key]))
584613

585614
if pattern_schemas:
586-
out[key] = draw(merged_as_strategies(pattern_schemas, custom_formats))
615+
out[key] = draw(
616+
merged_as_strategies(pattern_schemas, custom_formats, resolver)
617+
)
587618
else:
588-
out[key] = draw(from_schema(additional, custom_formats=custom_formats))
619+
out[key] = draw(
620+
from_schema(
621+
deepcopy(additional),
622+
custom_formats=custom_formats,
623+
resolver=resolver,
624+
)
625+
)
589626

590627
for k, v in dep_schemas.items():
591628
if k in out and not make_validator(v).is_valid(out):

tests/test_from_schema.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,3 +425,36 @@ def test_allowed_custom_format(num):
425425
def test_allowed_unknown_custom_format(string):
426426
assert string == "hello world"
427427
assert "not registered" not in jsonschema.FormatChecker().checkers
428+
429+
430+
@pytest.mark.parametrize(
431+
"schema",
432+
(
433+
{
434+
"properties": {"foo": {"$ref": "#"}},
435+
"additionalProperties": False,
436+
"type": "object",
437+
},
438+
{
439+
"definitions": {
440+
"Node": {
441+
"type": "object",
442+
"properties": {
443+
"children": {
444+
"type": "array",
445+
"items": {"$ref": "#/definitions/Node"},
446+
"maxItems": 2,
447+
}
448+
},
449+
"required": ["children"],
450+
"additionalProperties": False,
451+
},
452+
},
453+
"$ref": "#/definitions/Node",
454+
},
455+
),
456+
)
457+
@given(data=st.data())
458+
def test_recursive_reference(data, schema):
459+
value = data.draw(from_schema(schema))
460+
jsonschema.validate(value, schema)

0 commit comments

Comments
 (0)