Skip to content

Commit a05f0aa

Browse files
committed
Skip resolving recursive references in resolve_all_refs
1 parent f40063a commit a05f0aa

File tree

2 files changed

+167
-21
lines changed

2 files changed

+167
-21
lines changed

src/hypothesis_jsonschema/_canonicalise.py

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import re
1919
from copy import deepcopy
2020
from typing import Any, Dict, List, NoReturn, Optional, Tuple, Union
21+
from urllib.parse import urljoin
2122

2223
import jsonschema
2324
from hypothesis.errors import InvalidArgument
@@ -576,16 +577,25 @@ def resolve_remote(self, uri: str) -> NoReturn:
576577
)
577578

578579

580+
def is_recursive_reference(reference: str, resolver: LocalResolver) -> bool:
581+
"""Detect if the given reference is recursive."""
582+
# Special case: a reference to the schema's root is always recursive
583+
if reference == "#":
584+
return True
585+
# During reference resolving the scope might go to external schemas. `hypothesis-jsonschema` does not support
586+
# schemas behind remote references, but the underlying `jsonschema` library includes meta schemas for
587+
# different JSON Schema drafts that are available transparently, and they count as external schemas in this context.
588+
# For this reason we need to check the reference relatively to the base uri.
589+
full_reference = urljoin(resolver.base_uri, reference)
590+
# If a fully-qualified reference is in the resolution stack, then we encounter it for the second time.
591+
# Therefore it is a recursive reference.
592+
return full_reference in resolver._scopes_stack
593+
594+
579595
def resolve_all_refs(
580596
schema: Union[bool, Schema], *, resolver: LocalResolver = None
581597
) -> Schema:
582-
"""
583-
Resolve all references in the given schema.
584-
585-
This handles nested definitions, but not recursive definitions.
586-
The latter require special handling to convert to strategies and are much
587-
less common, so we just ignore them (and error out) for now.
588-
"""
598+
"""Resolve all non-recursive references in the given schema."""
589599
if isinstance(schema, bool):
590600
return canonicalish(schema)
591601
assert isinstance(schema, dict), schema
@@ -597,35 +607,41 @@ def resolve_all_refs(
597607
)
598608

599609
if "$ref" in schema:
600-
s = dict(schema)
601-
ref = s.pop("$ref")
602-
with resolver.resolving(ref) as got:
603-
if s == {}:
604-
return resolve_all_refs(got, resolver=resolver)
605-
m = merged([s, got])
606-
if m is None: # pragma: no cover
607-
msg = f"$ref:{ref!r} had incompatible base schema {s!r}"
608-
raise HypothesisRefResolutionError(msg)
609-
return resolve_all_refs(m, resolver=resolver)
610-
assert "$ref" not in schema
610+
# Recursive references are skipped to avoid infinite recursion.
611+
if not is_recursive_reference(schema["$ref"], resolver):
612+
s = dict(schema)
613+
ref = s.pop("$ref")
614+
with resolver.resolving(ref) as got:
615+
if s == {}:
616+
return resolve_all_refs(deepcopy(got), resolver=resolver)
617+
m = merged([s, got])
618+
if m is None: # pragma: no cover
619+
msg = f"$ref:{ref!r} had incompatible base schema {s!r}"
620+
raise HypothesisRefResolutionError(msg)
621+
# `deepcopy` is not needed, because, the schemas are copied inside the `merged` call above
622+
return resolve_all_refs(m, resolver=resolver)
611623

612624
for key in SCHEMA_KEYS:
613625
val = schema.get(key, False)
614626
if isinstance(val, list):
615627
schema[key] = [
616-
resolve_all_refs(v, resolver=resolver) if isinstance(v, dict) else v
628+
resolve_all_refs(deepcopy(v), resolver=resolver)
629+
if isinstance(v, dict)
630+
else v
617631
for v in val
618632
]
619633
elif isinstance(val, dict):
620-
schema[key] = resolve_all_refs(val, resolver=resolver)
634+
schema[key] = resolve_all_refs(deepcopy(val), resolver=resolver)
621635
else:
622636
assert isinstance(val, bool)
623637
for key in SCHEMA_OBJECT_KEYS: # values are keys-to-schema-dicts, not schemas
624638
if key in schema:
625639
subschema = schema[key]
626640
assert isinstance(subschema, dict)
627641
schema[key] = {
628-
k: resolve_all_refs(v, resolver=resolver) if isinstance(v, dict) else v
642+
k: resolve_all_refs(deepcopy(v), resolver=resolver)
643+
if isinstance(v, dict)
644+
else v
629645
for k, v in subschema.items()
630646
}
631647
assert isinstance(schema, dict)

tests/test_canonicalise.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,3 +558,133 @@ def test_validators_use_proper_draft():
558558
}
559559
cc = canonicalish(schema)
560560
jsonschema.validators.validator_for(cc).check_schema(cc)
561+
562+
563+
# Reference to itself
564+
ROOT_REFERENCE = {"$ref": "#"}
565+
# One extra nesting level
566+
NESTED = {"not": {"$ref": "#/not"}}
567+
# The same as above, but includes "$id".
568+
NESTED_WITH_ID = {
569+
"not": {"$ref": "#/not"},
570+
"$id": "http://json-schema.org/draft-07/schema#",
571+
}
572+
SELF_REFERENTIAL = {"foo": {"$ref": "#foo"}, "not": {"$ref": "#foo"}}
573+
574+
575+
@pytest.mark.parametrize(
576+
"schema, expected",
577+
(
578+
(ROOT_REFERENCE, ROOT_REFERENCE),
579+
(NESTED, NESTED),
580+
(NESTED_WITH_ID, NESTED_WITH_ID),
581+
# "foo" content should be inlined as is, because "#" is recursive (special case)
582+
(
583+
{"foo": {"$ref": "#"}, "not": {"$ref": "#foo"}},
584+
{"foo": {"$ref": "#"}, "not": {"$ref": "#"}},
585+
),
586+
# "foo" content should be inlined as is, because it points to itself
587+
(
588+
SELF_REFERENTIAL,
589+
SELF_REFERENTIAL,
590+
),
591+
# The same as above, but with one extra nesting level
592+
(
593+
{"foo": {"not": {"$ref": "#foo"}}, "not": {"$ref": "#foo"}},
594+
# 1. We start from resolving "$ref" in "not"
595+
# 2. at this point we don't know this path is recursive, so we follow to "foo"
596+
# 3. inside "foo" we found a reference to "foo", which means it is recursive
597+
{"foo": {"not": {"$ref": "#foo"}}, "not": {"not": {"$ref": "#foo"}}},
598+
),
599+
# Circular reference between two schemas
600+
(
601+
{"foo": {"$ref": "#bar"}, "bar": {"$ref": "#foo"}, "not": {"$ref": "#foo"}},
602+
# 1. We start in "not" and follow to "foo"
603+
# 2. In "foo" we follow to "bar"
604+
# 3. Here we see a reference to previously seen scope, which means it is a recursive path
605+
# We take the schema where we stop and inline it to the starting point (therefore it is `{"$ref": "#foo"}`)
606+
{"foo": {"$ref": "#bar"}, "bar": {"$ref": "#foo"}, "not": {"$ref": "#foo"}},
607+
),
608+
),
609+
)
610+
def test_skip_recursive_references_simple_schemas(schema, expected):
611+
# When there is a recursive reference, it should not be resolved
612+
assert resolve_all_refs(schema) == expected
613+
614+
615+
@pytest.mark.parametrize(
616+
"schema, resolved",
617+
(
618+
# NOTE. The `resolved` fixture does not include "definitions" to save visual space here, but it is extended
619+
# with it in the test body.
620+
# The reference target is behind two references, that share the same definition path. Not a recursive reference
621+
(
622+
{
623+
"definitions": {
624+
"properties": {
625+
"foo": {"type": "string"},
626+
"bar": {"$ref": "#/definitions/properties/foo"},
627+
},
628+
},
629+
"not": {"$ref": "#/definitions/properties/bar"},
630+
},
631+
{
632+
"not": {"type": "string"},
633+
},
634+
),
635+
# Here we need to resolve multiple references while being on the same resolution scope:
636+
# "#/definitions/foo" contains two references
637+
(
638+
{
639+
"definitions": {
640+
"foo": {
641+
"properties": {
642+
"bar": {"$ref": "#/definitions/spam"},
643+
"baz": {"$ref": "#/definitions/spam"},
644+
}
645+
},
646+
"spam": {"type": "string"},
647+
},
648+
"properties": {"foo": {"$ref": "#/definitions/foo"}},
649+
},
650+
{
651+
"properties": {
652+
"foo": {
653+
"properties": {
654+
"bar": {"type": "string"},
655+
"baz": {"type": "string"},
656+
}
657+
}
658+
},
659+
},
660+
),
661+
# Similar to the one above, but recursive
662+
(
663+
{
664+
"definitions": {
665+
"foo": {
666+
"properties": {
667+
"bar": {"$ref": "#/definitions/spam"},
668+
"baz": {"$ref": "#/definitions/spam"},
669+
}
670+
},
671+
"spam": {"$ref": "#/definitions/foo"},
672+
},
673+
"properties": {"foo": {"$ref": "#/definitions/foo"}},
674+
},
675+
{
676+
"properties": {
677+
"foo": {
678+
"properties": {
679+
"bar": {"$ref": "#/definitions/foo"},
680+
"baz": {"$ref": "#/definitions/foo"},
681+
}
682+
}
683+
},
684+
},
685+
),
686+
),
687+
)
688+
def test_skip_recursive_references_complex_schemas(schema, resolved):
689+
resolved["definitions"] = schema["definitions"]
690+
assert resolve_all_refs(schema) == resolved

0 commit comments

Comments
 (0)