Skip to content

Commit 82eac0c

Browse files
committed
First stab at subresources, and crawling to find them.
1 parent 950f426 commit 82eac0c

File tree

6 files changed

+235
-43
lines changed

6 files changed

+235
-43
lines changed

noxfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def tests(session):
3030
session.run("coverage", "report")
3131
else:
3232
session.install(*dependencies)
33-
session.run("pytest", *session.posargs)
33+
session.run("pytest", *session.posargs, REFERENCING)
3434

3535

3636
@session(tags=["build"])

referencing/_core.py

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
from urllib.parse import unquote, urldefrag, urljoin
66

77
from attrs import evolve, field
8-
from pyrsistent import m, pmap
8+
from pyrsistent import m, pmap, s
9+
from pyrsistent.typing import PMap, PSet
910

1011
from referencing._attrs import frozen
1112
from referencing.exceptions import CannotDetermineSpecification, Unresolvable
@@ -24,6 +25,10 @@ class Specification(Generic[D]):
2425
#: Find the ID of a given document.
2526
id_of: Callable[[D], URI | None]
2627

28+
#: Retrieve the subresources of the given document (without traversing into
29+
#: the subresources themselves).
30+
subresources_of: Callable[[D], Iterable[D]]
31+
2732
#: An opaque specification where resources have no subresources
2833
#: nor internal identifiers.
2934
OPAQUE: ClassVar[Specification[Any]]
@@ -35,7 +40,10 @@ def create_resource(self, contents: D) -> Resource[D]:
3540
return Resource(contents=contents, specification=self)
3641

3742

38-
Specification.OPAQUE = Specification(id_of=lambda contents: None)
43+
Specification.OPAQUE = Specification(
44+
id_of=lambda contents: None,
45+
subresources_of=lambda contents: [],
46+
)
3947

4048

4149
@frozen
@@ -99,6 +107,18 @@ def id(self) -> URI | None:
99107
"""
100108
return self._specification.id_of(self.contents)
101109

110+
def subresources(self) -> Iterable[Resource[D]]:
111+
"""
112+
Retrieve this resource's subresources.
113+
"""
114+
return (
115+
Resource.from_contents(
116+
each,
117+
default_specification=self._specification,
118+
)
119+
for each in self._specification.subresources_of(self.contents)
120+
)
121+
102122
def pointer(self, pointer: str, resolver: Resolver[D]) -> Resolved[D]:
103123
"""
104124
Resolve the given JSON pointer.
@@ -131,7 +151,8 @@ class Registry(Mapping[URI, Resource[D]]):
131151
registry with the additional resources added to them.
132152
"""
133153

134-
_resources: Mapping[URI, Resource[D]] = field(default=m(), converter=pmap) # type: ignore[reportUnknownArgumentType] # noqa: E501
154+
_resources: PMap[URI, Resource[D]] = field(default=m(), converter=pmap) # type: ignore[reportUnknownArgumentType] # noqa: E501
155+
_uncrawled: PSet[URI] = field(default=s()) # type: ignore[reportUnknownArgumentType] # noqa: E501
135156

136157
def __getitem__(self, uri: URI) -> Resource[D]:
137158
"""
@@ -154,7 +175,15 @@ def __len__(self) -> int:
154175
def __repr__(self) -> str:
155176
size = len(self)
156177
pluralized = "resource" if size == 1 else "resources"
157-
return f"<Registry ({size} {pluralized})>"
178+
if self._uncrawled:
179+
uncrawled = len(self._uncrawled)
180+
if uncrawled == size:
181+
summary = f"uncrawled {pluralized}"
182+
else:
183+
summary = f"{pluralized}, {uncrawled} uncrawled"
184+
else:
185+
summary = f"{pluralized}"
186+
return f"<Registry ({size} {summary})>"
158187

159188
def contents(self, uri: URI) -> D:
160189
"""
@@ -166,7 +195,18 @@ def crawl(self) -> Registry[D]:
166195
"""
167196
Immediately crawl all added resources, discovering subresources.
168197
"""
169-
return self
198+
resources = self._resources.evolver()
199+
uncrawled = [(uri, resources[uri]) for uri in self._uncrawled]
200+
while uncrawled:
201+
uri, resource = uncrawled.pop()
202+
id = resource.id()
203+
if id is None:
204+
pass
205+
else:
206+
uri = urljoin(uri, id)
207+
resources[uri] = resource
208+
uncrawled.extend((uri, each) for each in resource.subresources())
209+
return evolve(self, resources=resources.persistent(), uncrawled=s())
170210

171211
def with_resource(self, uri: URI, resource: Resource[D]):
172212
"""
@@ -181,7 +221,16 @@ def with_resources(
181221
r"""
182222
Add the given `Resource`\ s to the registry, without crawling them.
183223
"""
184-
return evolve(self, resources=self._resources.update(pmap(pairs))) # type: ignore[reportUnknownArgumentType] # noqa: E501
224+
resources = self._resources.evolver()
225+
uncrawled = self._uncrawled.evolver()
226+
for uri, resource in pairs:
227+
uncrawled.add(uri)
228+
resources[uri] = resource
229+
return evolve(
230+
self,
231+
resources=resources.persistent(),
232+
uncrawled=uncrawled.persistent(),
233+
)
185234

186235
def with_contents(
187236
self,
@@ -251,11 +300,16 @@ def lookup(self, ref: URI) -> Resolved[D]:
251300
if the reference isn't resolvable
252301
"""
253302
uri, fragment = urldefrag(urljoin(self._base_uri, ref))
303+
resolver, registry = self, self._registry
304+
resource = registry.get(uri)
254305
try:
255-
resource = self._registry[uri]
306+
if resource is None:
307+
registry = registry.crawl()
308+
resource = registry[uri]
309+
resolver = evolve(resolver, registry=registry)
256310
if fragment.startswith("/"):
257-
return resource.pointer(pointer=fragment, resolver=self)
311+
return resource.pointer(pointer=fragment, resolver=resolver)
258312
except KeyError:
259313
raise Unresolvable(ref=ref) from None
260314

261-
return Resolved(contents=resource.contents, resolver=self)
315+
return Resolved(contents=resource.contents, resolver=resolver)

referencing/jsonschema.py

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
from __future__ import annotations
66

7-
from typing import Any, Union
7+
from collections.abc import Set
8+
from typing import Any, Iterable, Union
89

910
from referencing import Registry, Resource, Specification
1011
from referencing._attrs import frozen
@@ -38,16 +39,50 @@ def _legacy_id(contents: ObjectSchema) -> URI | None:
3839
return contents.get("id")
3940

4041

41-
DRAFT202012 = Specification(id_of=_dollar_id)
42-
DRAFT201909 = Specification(id_of=_dollar_id)
43-
DRAFT7 = Specification(id_of=_dollar_id)
44-
DRAFT6 = Specification(id_of=_dollar_id)
45-
DRAFT4 = Specification(id_of=_legacy_id)
46-
DRAFT3 = Specification(id_of=_legacy_id)
42+
def _subresources_of(values: Set[str] = frozenset()):
43+
"""
44+
Create a callable returning JSON Schema specification-style subschemas.
45+
46+
Relies on specifying the set of keywords containing subschemas in their
47+
values, in a subobject's values, or in a subarray.
48+
"""
49+
50+
def subresources_of(resource: ObjectSchema) -> Iterable[ObjectSchema]:
51+
for each in values:
52+
if each in resource:
53+
yield resource[each]
54+
55+
return subresources_of
56+
57+
58+
DRAFT202012 = Specification(
59+
id_of=_dollar_id,
60+
subresources_of=lambda contents: [],
61+
)
62+
DRAFT201909 = Specification(
63+
id_of=_dollar_id,
64+
subresources_of=lambda contents: [],
65+
)
66+
DRAFT7 = Specification(
67+
id_of=_dollar_id,
68+
subresources_of=_subresources_of(values={"if", "then", "else"}),
69+
)
70+
DRAFT6 = Specification(
71+
id_of=_dollar_id,
72+
subresources_of=lambda contents: [],
73+
)
74+
DRAFT4 = Specification(
75+
id_of=_legacy_id,
76+
subresources_of=lambda contents: [],
77+
)
78+
DRAFT3 = Specification(
79+
id_of=_legacy_id,
80+
subresources_of=lambda contents: [],
81+
)
4782

4883

4984
_SPECIFICATIONS: Registry[Specification[Schema]] = Registry(
50-
{
85+
{ # type: ignore[reportGeneralTypeIssues] # :/ internal vs external types
5186
dialect_id: Resource.opaque(specification)
5287
for dialect_id, specification in [
5388
("https://json-schema.org/draft/2020-12/schema", DRAFT202012),

0 commit comments

Comments
 (0)