Skip to content

Commit 98d3296

Browse files
committed
fixes, simplification
1 parent f325a11 commit 98d3296

File tree

3 files changed

+66
-35
lines changed

3 files changed

+66
-35
lines changed

airbyte_cdk/sources/utils/schema_helpers.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,18 @@
77
import json
88
import os
99
import pkgutil
10-
from typing import Any, ClassVar, Dict, List, Mapping, MutableMapping, Optional, Tuple
10+
from copy import deepcopy
11+
from typing import (
12+
TYPE_CHECKING,
13+
Any,
14+
ClassVar,
15+
Dict,
16+
List,
17+
Mapping,
18+
MutableMapping,
19+
Optional,
20+
Tuple,
21+
)
1122

1223
import jsonref
1324
from jsonschema import validate
@@ -66,23 +77,23 @@ def resolve_ref_links(obj: Any) -> Any:
6677
return obj
6778

6879

69-
def _expand_refs(schema: Any, ref_resolver: Optional[Resolver] = None) -> None:
80+
def get_ref_resolver_registry(schema: dict[str, Any]) -> Registry:
81+
"""Get a reference resolver registry for the given schema."""
82+
resource: Resource = Resource.from_contents(
83+
contents=deepcopy(schema),
84+
default_specification=DRAFT7,
85+
)
86+
return Registry().with_resource(
87+
uri="",
88+
resource=resource,
89+
)
90+
91+
92+
def _expand_refs(schema: Any, ref_resolver: Resolver) -> None:
7093
"""Internal function to iterate over schema and replace all occurrences of $ref with their definitions. Recursive.
7194
7295
:param schema: schema that will be patched
73-
:param ref_resolver: resolver to get definition from $ref, if None pass it will be instantiated
7496
"""
75-
if ref_resolver is None:
76-
resource = Resource.from_contents(
77-
contents=schema,
78-
default_specification=DRAFT7,
79-
)
80-
resolver_registry = Registry().with_resource(
81-
uri="",
82-
resource=resource,
83-
)
84-
ref_resolver = resolver_registry.resolver()
85-
8697
if isinstance(schema, MutableMapping):
8798
if "$ref" in schema:
8899
ref_url = schema.pop("$ref")
@@ -102,10 +113,14 @@ def _expand_refs(schema: Any, ref_resolver: Optional[Resolver] = None) -> None:
102113
def expand_refs(schema: Any) -> None:
103114
"""Iterate over schema and replace all occurrences of $ref with their definitions.
104115
116+
If a "definitions" section is present at the root of the schema, it will be removed
117+
after $ref resolution is complete.
118+
105119
:param schema: schema that will be patched
106120
"""
107-
_expand_refs(schema)
108-
schema.pop("definitions", None) # remove definitions created by $ref
121+
ref_resolver = get_ref_resolver_registry(schema).resolver()
122+
_expand_refs(schema, ref_resolver)
123+
schema.pop("definitions", None)
109124

110125

111126
def rename_key(schema: Any, old_key: str, new_key: str) -> None:

airbyte_cdk/sources/utils/transform.py

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,27 @@
33
#
44

55
import logging
6+
from copy import deepcopy
67
from enum import Flag, auto
7-
from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast
8+
from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, Mapping, Optional, cast
89

9-
from jsonschema import Draft7Validator, ValidationError, Validator, validators
10+
from jsonschema import Draft7Validator, ValidationError, validators
11+
from jsonschema._typing import SchemaKeywordValidator
1012
from referencing import Registry, Resource
11-
from referencing._core import Resolver # used for type hints
13+
from referencing._core import Resolver
14+
from referencing.exceptions import Unresolvable
1215
from referencing.jsonschema import DRAFT7
1316

17+
from airbyte_cdk.sources.utils.schema_helpers import expand_refs
18+
19+
from .schema_helpers import get_ref_resolver_registry
20+
21+
try:
22+
from jsonschema.validators import Validator
23+
except:
24+
from jsonschema import Validator
25+
26+
1427
MAX_NESTING_DEPTH = 3
1528
json_to_python_simple = {
1629
"string": str,
@@ -194,20 +207,22 @@ def normalizator(
194207
validators parameter for detailed description.
195208
:
196209
"""
210+
# Very first step is to expand references in the schema itself
211+
expand_refs(schema)
212+
if isinstance(property_value, dict):
213+
expand_refs(property_value)
214+
# resolver_registry: Registry = get_ref_resolver_registry(schema)
215+
# ref_resolver: Resolver = resolver_registry.resolver()
197216

198217
def resolve(subschema: dict[str, Any]) -> dict[str, Any]:
199-
if "$ref" in subschema:
200-
ref_url = subschema["$ref"]
201-
try:
202-
if hasattr(validator_instance.resolver, 'lookup'):
203-
resolved = validator_instance.resolver.lookup(ref_url).contents
204-
return cast(dict[str, Any], resolved)
205-
elif hasattr(validator_instance.resolver, 'resolve'):
206-
_, resolved = validator_instance.resolver.resolve(ref_url)
207-
return cast(dict[str, Any], resolved)
208-
except Exception:
209-
pass
210-
return subschema
218+
# if "$ref" in subschema:
219+
# try:
220+
# resolved = ref_resolver.lookup(subschema["$ref"]).contents
221+
# except Unresolvable as e:
222+
# raise ValidationError(
223+
# f"Failed to resolve $ref '{subschema['$ref']}' from {ref_resolver!r} and schema {schema!r}: {e}"
224+
# ) from e
225+
# return cast(dict[str, Any], resolved)
211226
return subschema
212227

213228
# Transform object and array values before running json schema type checking for each element.
@@ -216,14 +231,14 @@ def resolve(subschema: dict[str, Any]) -> dict[str, Any]:
216231
if schema_key == "properties" and isinstance(instance, dict):
217232
for k, subschema in property_value.items():
218233
if k in instance:
219-
subschema = resolve(subschema)
234+
# subschema = resolve(subschema)
220235
instance[k] = self.__normalize(instance[k], subschema)
221236
# Recursively normalize every item of the "instance" sub-array,
222237
# if "instance" is an incorrect type - skip recursive normalization of "instance"
223238
elif schema_key == "items" and isinstance(instance, list):
224-
subschema = resolve(property_value)
239+
# subschema = resolve(property_value)
225240
for index, item in enumerate(instance):
226-
instance[index] = self.__normalize(item, subschema)
241+
instance[index] = self.__normalize(item, property_value)
227242

228243
# Running native jsonschema traverse algorithm after field normalization is done.
229244
yield from original_validator(

unit_tests/sources/utils/test_transform.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"too_many_types": {"type": ["boolean", "null", "string"]},
2121
"def": {
2222
"type": "object",
23-
"properties": {"dd": {"$ref": "#/definitions/my_type"}},
23+
"properties": {"dd": {"$ref": "#/definitions/my_type"}}, # << Broken (missing?) on purpose?
2424
},
2525
"array": {"type": "array", "items": {"$ref": "#/definitions/str_type"}},
2626
"nested": {"$ref": "#/definitions/nested_type"},
@@ -31,6 +31,7 @@
3131
},
3232
"definitions": {
3333
"str_type": {"type": "string"},
34+
"my_type": {"type": "string"}, # << Fixed
3435
"nested_type": {"type": "object", "properties": {"a": {"type": "string"}}},
3536
},
3637
}

0 commit comments

Comments
 (0)