Skip to content

Commit 7371ba7

Browse files
committed
Introduce IdentifiedResource, bundling a resource and specification.
This moves Specifications to the resource level rather than registry level as they should be. TBD on whether we want/need to have multiple specifications on the same resource, so far not, though in the test suite we kind of have this. But with this... all tests pass (except for 1 draft 2019 test, the invalid $defs test validating against the metaschema, which I'm suspicious is either wrong or else is a bug in the recursiveRef code rather than here). Next step will be to move the just-referenced recursiveRef code here and check.
1 parent 86e8816 commit 7371ba7

File tree

4 files changed

+192
-58
lines changed

4 files changed

+192
-58
lines changed

referencing/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from referencing._core import ( # noqa: F401
2+
IdentifiedResource,
23
OpaqueSpecification as _Opaque,
34
Registry,
45
)

referencing/_core.py

Lines changed: 115 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,59 @@ class UnidentifiedResource(Exception):
1515
pass
1616

1717

18+
@frozen
19+
class IdentifiedResource:
20+
21+
_specification: Specification
22+
resource: Schema
23+
24+
@classmethod
25+
def from_resource(cls, resource, **kwargs):
26+
return cls(
27+
resource=resource,
28+
specification=specification_for(resource, **kwargs),
29+
)
30+
31+
def id(self):
32+
return self._specification.id_of(self.resource)
33+
34+
def anchors(self):
35+
return self._specification.anchors_in(self.resource)
36+
37+
def subresources(self):
38+
for each in self._specification.subresources_of(self.resource):
39+
yield IdentifiedResource.from_resource(
40+
resource=each,
41+
default=self._specification,
42+
)
43+
44+
45+
def specification_for(
46+
resource: Schema,
47+
default: Specification = ..., # type: ignore
48+
) -> Specification:
49+
if resource is True or resource is False:
50+
pass
51+
else:
52+
jsonschema_schema_keyword = resource.get("$schema")
53+
if jsonschema_schema_keyword is not None:
54+
from referencing import jsonschema
55+
56+
specification = jsonschema.BY_ID.get(jsonschema_schema_keyword)
57+
if specification is not None:
58+
return specification
59+
if default is ...:
60+
raise UnidentifiedResource(resource)
61+
return default
62+
63+
1864
@frozen
1965
class Anchor:
2066

2167
name: str
22-
resource: Schema
68+
resource: IdentifiedResource
2369

24-
def resolve(self, resolver: Resolver, uri: str) -> tuple[Schema, str]:
70+
def resolve(self, resolver, uri):
2571
return self.resource, uri
2672

2773

@@ -47,12 +93,14 @@ def subresources_of(self, resource):
4793
@frozen
4894
class Registry:
4995

50-
_contents: PMap[str, tuple[Schema, PMap[str, AnchorType]]] = field(
96+
_contents: PMap[
97+
str,
98+
tuple[IdentifiedResource, PMap[str, AnchorType]],
99+
] = field(
51100
default=m(),
52101
repr=lambda value: f"({len(value)} entries)",
53102
)
54103
_uncrawled: PSet[str] = field(default=s(), repr=False)
55-
_specification: Specification = OpaqueSpecification()
56104

57105
def update(self, *registries: Registry) -> Registry:
58106
contents = (each._contents for each in registries)
@@ -64,28 +112,41 @@ def update(self, *registries: Registry) -> Registry:
64112
)
65113

66114
def with_resource(self, resource: Schema) -> Registry:
67-
uri = self._specification.id_of(resource)
68-
if uri is None:
69-
raise UnidentifiedResource(resource)
70-
return self.with_identified_resource(uri=uri, resource=resource)
115+
identified = IdentifiedResource.from_resource(resource)
116+
return self.with_identified_resource(
117+
uri=identified.id(),
118+
resource=identified,
119+
)
71120

72-
def with_identified_resource(self, uri, resource) -> Registry:
73-
return self.with_resources([(uri, resource)])
121+
def with_resources(
122+
self,
123+
pairs: Iterable[tuple[str, Schema]],
124+
**kwargs,
125+
) -> Registry:
126+
return self.with_identified_resources(
127+
(uri, IdentifiedResource.from_resource(resource, **kwargs))
128+
for uri, resource in pairs
129+
)
130+
131+
def with_identified_resource(
132+
self,
133+
uri: str,
134+
resource: IdentifiedResource,
135+
) -> Registry:
136+
return self.with_identified_resources([(uri, resource)])
74137

75-
def with_resources(self, pairs: Iterable[tuple[str, Schema]]) -> Registry:
138+
def with_identified_resources(
139+
self,
140+
pairs: Iterable[tuple[str, IdentifiedResource]],
141+
) -> Registry:
76142
uncrawled = self._uncrawled
77143
contents = self._contents
78144
for uri, resource in pairs:
79-
assert (
80-
uri == ""
81-
or uri not in self._contents
82-
or self._contents[uri][0] == resource
83-
), (uri, self._contents[uri], resource)
84-
contents = contents.set(uri, (resource, m()))
85-
86-
id = self._specification.id_of(resource)
145+
anchors: PMap[str, AnchorType] = m()
146+
contents = contents.set(uri, (resource, anchors))
147+
id = resource.id()
87148
if id is not None:
88-
contents = contents.set(id, (resource, m()))
149+
contents = contents.set(id, (resource, anchors))
89150

90151
uncrawled = uncrawled.add(uri)
91152
return evolve(self, contents=contents, uncrawled=uncrawled)
@@ -101,7 +162,7 @@ def with_anchors(
101162
contents = self._contents.set(uri, (resource, new))
102163
return evolve(self, contents=contents)
103164

104-
def resource_at(self, uri: str) -> tuple[Schema, Registry]:
165+
def resource_at(self, uri: str) -> tuple[IdentifiedResource, Registry]:
105166
at_uri = self._contents.get(uri)
106167
if at_uri is not None and at_uri[1]:
107168
registry = self
@@ -114,33 +175,40 @@ def anchors_at(self, uri: str) -> PMap[str, AnchorType]:
114175

115176
def _crawl(self) -> Registry:
116177
registry = self
117-
resources = [(uri, self._contents[uri][0]) for uri in self._uncrawled]
178+
resources: list[tuple[str, IdentifiedResource]] = [
179+
(uri, self._contents[uri][0]) for uri in self._uncrawled
180+
]
118181
while resources:
119182
base_uri, resource = resources.pop()
120-
if resource is True or resource is False:
183+
if resource.resource is True or resource.resource is False:
121184
continue
122185

123-
uri = urljoin(base_uri, self._specification.id_of(resource) or "")
186+
uri = urljoin(base_uri, resource.id() or "")
124187
if uri != base_uri:
125188
registry = registry.with_identified_resource(
126189
uri=uri,
127190
resource=resource,
128191
)
129192

130-
anchors = self._specification.anchors_in(resource)
193+
anchors = resource.anchors()
131194
registry = registry.with_anchors(uri=uri, anchors=anchors)
132195

133196
resources.extend(
134197
(uri, each)
135-
for each in self._specification.subresources_of(resource)
198+
for each in resource.subresources()
136199
if each is not True and each is not False
137200
)
138201
return evolve(registry, uncrawled=s())
139202

140203
def resolver(self, root: Schema, specification: Specification) -> Resolver:
141-
uri = self._specification.id_of(root) or ""
142-
registry = self.with_identified_resource(uri=uri, resource=root)
143-
registry = evolve(registry, specification=specification)
204+
uri = specification.id_of(root) or ""
205+
registry = self.with_identified_resource(
206+
uri=uri,
207+
resource=IdentifiedResource(
208+
specification=specification,
209+
resource=root,
210+
),
211+
)
144212
return Resolver(base_uri=uri, registry=registry)
145213

146214

@@ -157,8 +225,9 @@ def lookup(self, ref: str) -> tuple[Schema, Resolver]:
157225
else:
158226
uri, fragment = urldefrag(urljoin(self._base_uri, ref))
159227

160-
target, registry = self._registry.resource_at(uri)
228+
resource, registry = self._registry.resource_at(uri)
161229
base_uri = uri
230+
target = resource.resource
162231

163232
if fragment.startswith("/"):
164233
segments = unquote(fragment[1:]).split("/")
@@ -171,31 +240,40 @@ def lookup(self, ref: str) -> tuple[Schema, Resolver]:
171240
# FIXME: this is wrong, we need to know that we are crossing
172241
# the boundary of a *schema* specifically
173242
if not isinstance(target, Sequence):
174-
id = self._registry._specification.id_of(target)
243+
id = resource._specification.id_of(target)
175244
if id is not None:
176245
base_uri = urljoin(base_uri, id).rstrip("#")
177246
elif fragment:
178247
anchor = registry.anchors_at(uri=uri)[fragment]
179-
target, uri = anchor.resolve(resolver=self, uri=uri)
248+
resource, uri = anchor.resolve(resolver=self, uri=uri)
249+
target = resource.resource
180250

181-
id = self._registry._specification.id_of(target)
251+
id = resource.id()
182252
if id is not None:
183253
base_uri = urljoin(self._base_uri, id).rstrip("#")
184254
else:
185-
id = self._registry._specification.id_of(target)
255+
target = resource.resource
256+
id = resource.id()
186257
if id is not None:
187258
base_uri = urljoin(self._base_uri, id).rstrip("#")
188259
return target, self.evolve(base_uri=base_uri, registry=registry)
189260

190-
def with_root(self, root: Schema) -> Resolver:
191-
maybe_relative = self._registry._specification.id_of(root)
261+
def with_root(
262+
self,
263+
root: Schema,
264+
specification: Specification,
265+
) -> Resolver:
266+
maybe_relative = specification.id_of(root)
192267
if maybe_relative is None:
193268
return self
194269

195270
uri = urljoin(self._base_uri, maybe_relative)
196271
registry = self._registry.with_identified_resource(
197272
uri=uri,
198-
resource=root,
273+
resource=IdentifiedResource(
274+
resource=root,
275+
specification=specification,
276+
),
199277
)
200278
return self.evolve(base_uri=uri, registry=registry)
201279

0 commit comments

Comments
 (0)