5
5
from urllib .parse import unquote , urldefrag , urljoin
6
6
7
7
from attrs import evolve , field
8
- from pyrsistent import m , pmap
8
+ from pyrsistent import m , pmap , s
9
+ from pyrsistent .typing import PMap , PSet
9
10
10
11
from referencing ._attrs import frozen
11
12
from referencing .exceptions import CannotDetermineSpecification , Unresolvable
@@ -24,6 +25,10 @@ class Specification(Generic[D]):
24
25
#: Find the ID of a given document.
25
26
id_of : Callable [[D ], URI | None ]
26
27
28
+ #: Retrieve the subresources of the given document (without traversing into
29
+ #: the subresources themselves).
30
+ subresources_of : Callable [[D ], Iterable [D ]]
31
+
27
32
#: An opaque specification where resources have no subresources
28
33
#: nor internal identifiers.
29
34
OPAQUE : ClassVar [Specification [Any ]]
@@ -35,7 +40,10 @@ def create_resource(self, contents: D) -> Resource[D]:
35
40
return Resource (contents = contents , specification = self )
36
41
37
42
38
- Specification .OPAQUE = Specification (id_of = lambda contents : None )
43
+ Specification .OPAQUE = Specification (
44
+ id_of = lambda contents : None ,
45
+ subresources_of = lambda contents : [],
46
+ )
39
47
40
48
41
49
@frozen
@@ -99,6 +107,18 @@ def id(self) -> URI | None:
99
107
"""
100
108
return self ._specification .id_of (self .contents )
101
109
110
+ def subresources (self ) -> Iterable [Resource [D ]]:
111
+ """
112
+ Retrieve this resource's subresources.
113
+ """
114
+ return (
115
+ Resource .from_contents (
116
+ each ,
117
+ default_specification = self ._specification ,
118
+ )
119
+ for each in self ._specification .subresources_of (self .contents )
120
+ )
121
+
102
122
def pointer (self , pointer : str , resolver : Resolver [D ]) -> Resolved [D ]:
103
123
"""
104
124
Resolve the given JSON pointer.
@@ -131,7 +151,8 @@ class Registry(Mapping[URI, Resource[D]]):
131
151
registry with the additional resources added to them.
132
152
"""
133
153
134
- _resources : Mapping [URI , Resource [D ]] = field (default = m (), converter = pmap ) # type: ignore[reportUnknownArgumentType] # noqa: E501
154
+ _resources : PMap [URI , Resource [D ]] = field (default = m (), converter = pmap ) # type: ignore[reportUnknownArgumentType] # noqa: E501
155
+ _uncrawled : PSet [URI ] = field (default = s ()) # type: ignore[reportUnknownArgumentType] # noqa: E501
135
156
136
157
def __getitem__ (self , uri : URI ) -> Resource [D ]:
137
158
"""
@@ -154,7 +175,15 @@ def __len__(self) -> int:
154
175
def __repr__ (self ) -> str :
155
176
size = len (self )
156
177
pluralized = "resource" if size == 1 else "resources"
157
- return f"<Registry ({ size } { pluralized } )>"
178
+ if self ._uncrawled :
179
+ uncrawled = len (self ._uncrawled )
180
+ if uncrawled == size :
181
+ summary = f"uncrawled { pluralized } "
182
+ else :
183
+ summary = f"{ pluralized } , { uncrawled } uncrawled"
184
+ else :
185
+ summary = f"{ pluralized } "
186
+ return f"<Registry ({ size } { summary } )>"
158
187
159
188
def contents (self , uri : URI ) -> D :
160
189
"""
@@ -166,7 +195,18 @@ def crawl(self) -> Registry[D]:
166
195
"""
167
196
Immediately crawl all added resources, discovering subresources.
168
197
"""
169
- return self
198
+ resources = self ._resources .evolver ()
199
+ uncrawled = [(uri , resources [uri ]) for uri in self ._uncrawled ]
200
+ while uncrawled :
201
+ uri , resource = uncrawled .pop ()
202
+ id = resource .id ()
203
+ if id is None :
204
+ pass
205
+ else :
206
+ uri = urljoin (uri , id )
207
+ resources [uri ] = resource
208
+ uncrawled .extend ((uri , each ) for each in resource .subresources ())
209
+ return evolve (self , resources = resources .persistent (), uncrawled = s ())
170
210
171
211
def with_resource (self , uri : URI , resource : Resource [D ]):
172
212
"""
@@ -181,7 +221,16 @@ def with_resources(
181
221
r"""
182
222
Add the given `Resource`\ s to the registry, without crawling them.
183
223
"""
184
- return evolve (self , resources = self ._resources .update (pmap (pairs ))) # type: ignore[reportUnknownArgumentType] # noqa: E501
224
+ resources = self ._resources .evolver ()
225
+ uncrawled = self ._uncrawled .evolver ()
226
+ for uri , resource in pairs :
227
+ uncrawled .add (uri )
228
+ resources [uri ] = resource
229
+ return evolve (
230
+ self ,
231
+ resources = resources .persistent (),
232
+ uncrawled = uncrawled .persistent (),
233
+ )
185
234
186
235
def with_contents (
187
236
self ,
@@ -251,11 +300,16 @@ def lookup(self, ref: URI) -> Resolved[D]:
251
300
if the reference isn't resolvable
252
301
"""
253
302
uri , fragment = urldefrag (urljoin (self ._base_uri , ref ))
303
+ resolver , registry = self , self ._registry
304
+ resource = registry .get (uri )
254
305
try :
255
- resource = self ._registry [uri ]
306
+ if resource is None :
307
+ registry = registry .crawl ()
308
+ resource = registry [uri ]
309
+ resolver = evolve (resolver , registry = registry )
256
310
if fragment .startswith ("/" ):
257
- return resource .pointer (pointer = fragment , resolver = self )
311
+ return resource .pointer (pointer = fragment , resolver = resolver )
258
312
except KeyError :
259
313
raise Unresolvable (ref = ref ) from None
260
314
261
- return Resolved (contents = resource .contents , resolver = self )
315
+ return Resolved (contents = resource .contents , resolver = resolver )
0 commit comments