11from __future__ import annotations
22
3- from collections .abc import Iterable , Mapping , Sequence
3+ from collections .abc import Mapping , Sequence
44from typing import TYPE_CHECKING , Any , Union
55from urllib .parse import unquote , urldefrag , urljoin
66
7- from pyrsistent import m
8- from pyrsistent .typing import PMap
9- import attrs
7+ from pyrsistent import m , s
8+ from pyrsistent .typing import PMap , PSet
109
1110try :
1211 Mapping [str , str ]
@@ -29,16 +28,17 @@ class UnidentifiedResource(Exception):
2928
3029
3130if TYPE_CHECKING :
32- from attrs import define , frozen
31+ from attrs import define , evolve , field , frozen
3332else :
33+ from attrs import define as _define , evolve , field , frozen as _frozen
3434
3535 def define (cls ):
3636 cls .__init_subclass__ = UnsupportedSubclassing .complain
37- return attrs . define (cls )
37+ return _define (cls )
3838
3939 def frozen (cls ):
4040 cls .__init_subclass__ = UnsupportedSubclassing .complain
41- return attrs . frozen (cls )
41+ return _frozen (cls )
4242
4343
4444Schema = Union [bool , Mapping [str , Any ]]
@@ -51,9 +51,6 @@ class Anchor:
5151 name : str
5252 resource : Schema
5353
54- def added_to (self , registry : Registry ):
55- return registry .with_anchor (anchor = self )
56-
5754
5855@frozen
5956class DynamicAnchor :
@@ -62,9 +59,6 @@ class DynamicAnchor:
6259 name : str
6360 resource : Schema
6461
65- def added_to (self , registry : Registry ):
66- return registry .with_anchor (anchor = self )
67-
6862
6963AnchorType = Union [Anchor , DynamicAnchor ]
7064
@@ -75,26 +69,15 @@ class IdentifiedResource:
7569 uri : str
7670 resource : Schema
7771
78- def added_to (self , registry : Registry ):
79- return registry .with_identified_resource (
80- uri = self .uri ,
81- resource = self .resource ,
82- )
83-
8472
8573@frozen
8674class Registry :
8775
88- _contents : PMap [str , tuple [Schema , PMap [str , AnchorType ]]] = attrs . field (
76+ _contents : PMap [str , tuple [Schema , PMap [str , AnchorType ]]] = field (
8977 default = m (),
9078 repr = lambda value : f"({ len (value )} entries)" ,
9179 )
92-
93- def resource_at (self , uri ) -> Schema :
94- return self ._contents [uri ][0 ]
95-
96- def anchor_at (self , uri , name ) -> AnchorType :
97- return self ._contents [uri ][1 ][name ]
80+ _uncrawled : PSet [str ] = field (default = s ())
9881
9982 def with_resource (self , resource ) -> Registry :
10083 uri = id_of (resource )
@@ -106,10 +89,16 @@ def with_identified_resource(self, uri, resource) -> Registry:
10689 return self .with_resources ([(uri , resource )])
10790
10891 def update (self , * registries : Registry ) -> Registry :
109- contents = (registry ._contents for registry in registries )
110- return attrs .evolve (self , contents = self ._contents .update (* contents ))
92+ contents = (each ._contents for each in registries )
93+ uncrawled = (each ._uncrawled for each in registries )
94+ return evolve (
95+ self ,
96+ contents = self ._contents .update (* contents ),
97+ uncrawled = self ._uncrawled .update (* uncrawled ),
98+ )
11199
112100 def with_resources (self , pairs ) -> Registry :
101+ uncrawled = self ._uncrawled
113102 contents = self ._contents
114103 for uri , resource in pairs :
115104 assert (
@@ -122,40 +111,92 @@ def with_resources(self, pairs) -> Registry:
122111 id = id_of (resource )
123112 if id is not None :
124113 contents = contents .set (id , (resource , m ()))
125- return attrs .evolve (self , contents = contents )
114+
115+ uncrawled = uncrawled .add (uri )
116+ return evolve (self , contents = contents , uncrawled = uncrawled )
126117
127118 def with_anchor (self , anchor : Anchor | DynamicAnchor ) -> Registry :
128- uri_resource , anchors = self ._contents [anchor .uri ]
129- new = uri_resource , anchors .set (anchor .name , anchor )
130- return attrs .evolve (self , contents = self ._contents .set (anchor .uri , new ))
119+ resource , anchors = self ._contents [anchor .uri ]
120+ new = resource , anchors .set (anchor .name , anchor )
121+ return evolve (self , contents = self ._contents .set (anchor .uri , new ))
122+
123+ def resource_at (self , uri : str ) -> tuple [Schema , Registry ]:
124+ at_uri = self ._contents .get (uri )
125+ if at_uri is not None and at_uri [1 ]:
126+ registry = self
127+ else :
128+ registry = self .crawl ()
129+ return registry ._contents [uri ][0 ], registry
130+
131+ def anchor_at (self , uri , name ) -> AnchorType :
132+ return self ._contents [uri ][1 ][name ]
133+
134+ def crawl (self ) -> Registry :
135+ registry = self
136+ resources = [(uri , self ._contents [uri ][0 ]) for uri in self ._uncrawled ]
137+ while resources :
138+ base_uri , resource = resources .pop ()
139+ if resource is True or resource is False :
140+ continue
141+
142+ uri = urljoin (base_uri , resource .get ("$id" , "" ))
143+ if uri != base_uri :
144+ registry = registry .with_identified_resource (
145+ uri = uri ,
146+ resource = resource ,
147+ )
148+
149+ anchor = resource .get ("$anchor" )
150+ if anchor is not None :
151+ registry = registry .with_anchor (
152+ Anchor (uri = uri , name = anchor , resource = resource ),
153+ )
154+
155+ dynamic_anchor = resource .get ("$dynamicAnchor" )
156+ if dynamic_anchor is not None :
157+ registry = registry .with_anchor (
158+ DynamicAnchor (
159+ uri = uri ,
160+ name = dynamic_anchor ,
161+ resource = resource ,
162+ ),
163+ )
164+
165+ resources .extend ( # TODO: delay finding anchors in subresources...
166+ (uri , resource [k ]) for k in SUBRESOURCE if k in resource
167+ )
168+ resources .extend (
169+ (uri , subresource )
170+ for k in SUBRESOURCE_VALUES
171+ if k in resource
172+ for subresource in resource [k ].values ()
173+ )
174+ resources .extend (
175+ (uri , subresource )
176+ for k in SUBRESOURCE_ITEMS
177+ if k in resource
178+ for subresource in resource [k ]
179+ )
180+ return evolve (registry , uncrawled = s ())
131181
132182 def resolver (self , root ) -> Resolver :
133183 uri = id_of (root ) or ""
134184 registry = self .with_identified_resource (uri = uri , resource = root )
135185 return Resolver (base_uri = uri , registry = registry )
136186
137- def has_not_crawled (self , uri ) -> bool :
138- at_uri = self ._contents .get (uri )
139- return at_uri is None or not at_uri [1 ]
140-
141187
142188@define
143189class Resolver :
144190
145191 _base_uri : str
146192 _registry : Registry
147193
148- def lookup (self , ref : str ):
194+ def lookup (self , ref : str ) -> tuple [ Schema , Resolver ] :
149195 if ref .startswith ("#" ):
150196 uri , fragment = self ._base_uri , ref [1 :]
151197 else :
152198 uri , fragment = urldefrag (urljoin (self ._base_uri , ref ))
153- if self ._registry .has_not_crawled (uri ):
154- root = self ._registry .resource_at (self ._base_uri )
155- for each in find_subresources (base_uri = self ._base_uri , root = root ):
156- self ._registry = each .added_to (self ._registry )
157-
158- target = self ._registry .resource_at (uri )
199+ target , registry = self ._registry .resource_at (uri )
159200 if fragment .startswith ("/" ):
160201 segments = unquote (fragment [1 :]).split ("/" )
161202 for segment in segments :
@@ -165,21 +206,21 @@ def lookup(self, ref: str):
165206 segment = segment .replace ("~1" , "/" ).replace ("~0" , "~" )
166207 target = target [segment ] # type: ignore # this can't be a bool
167208 elif fragment :
168- target = self . _registry .anchor_at (uri = uri , name = fragment ).resource
209+ target = registry .anchor_at (uri = uri , name = fragment ).resource
169210
170- return target , attrs . evolve (self , base_uri = uri )
211+ return target , evolve (self , base_uri = uri , registry = registry )
171212
172213 def with_root (self , root ) -> Resolver :
173214 maybe_relative = id_of (root )
174215 if maybe_relative is None :
175- uri , registry = self . _base_uri , self . _registry
176- else :
177- uri = urljoin (self ._base_uri , maybe_relative )
178- registry = self ._registry .with_identified_resource (
179- uri = uri ,
180- resource = root ,
181- )
182- return attrs . evolve (self , base_uri = uri , registry = registry )
216+ return self
217+
218+ uri = urljoin (self ._base_uri , maybe_relative )
219+ registry = self ._registry .with_identified_resource (
220+ uri = uri ,
221+ resource = root ,
222+ )
223+ return evolve (self , base_uri = uri , registry = registry )
183224
184225
185226SUBRESOURCE = {"items" , "not" }
@@ -191,46 +232,3 @@ def id_of(resource) -> str | None:
191232 if resource is True or resource is False :
192233 return None
193234 return resource .get ("$id" )
194-
195-
196- def find_subresources (
197- root : Schema ,
198- base_uri : str ,
199- ) -> Iterable [Anchor | DynamicAnchor | IdentifiedResource ]:
200- resources = [(base_uri , root )]
201- while resources :
202- base_uri , resource = resources .pop ()
203- if resource is True or resource is False :
204- continue
205-
206- uri = urljoin (base_uri , resource .get ("$id" , "" ))
207- if uri != base_uri :
208- yield IdentifiedResource (uri = uri , resource = resource )
209-
210- anchor = resource .get ("$anchor" )
211- if anchor is not None :
212- yield Anchor (uri = uri , name = anchor , resource = resource )
213-
214- dynamic_anchor = resource .get ("$dynamicAnchor" )
215- if dynamic_anchor is not None :
216- yield DynamicAnchor (
217- uri = uri ,
218- name = dynamic_anchor ,
219- resource = resource ,
220- )
221-
222- resources .extend ( # TODO: delay finding anchors in subresources...
223- (uri , resource [k ]) for k in SUBRESOURCE if k in resource
224- )
225- resources .extend (
226- (uri , subresource )
227- for k in SUBRESOURCE_VALUES
228- if k in resource
229- for subresource in resource [k ].values ()
230- )
231- resources .extend (
232- (uri , subresource )
233- for k in SUBRESOURCE_ITEMS
234- if k in resource
235- for subresource in resource [k ]
236- )
0 commit comments