1
1
from __future__ import annotations
2
2
3
- from collections .abc import Iterable , Mapping , Sequence
3
+ from collections .abc import Mapping , Sequence
4
4
from typing import TYPE_CHECKING , Any , Union
5
5
from urllib .parse import unquote , urldefrag , urljoin
6
6
7
- from pyrsistent import m
8
- from pyrsistent .typing import PMap
9
- import attrs
7
+ from pyrsistent import m , s
8
+ from pyrsistent .typing import PMap , PSet
10
9
11
10
try :
12
11
Mapping [str , str ]
@@ -29,16 +28,17 @@ class UnidentifiedResource(Exception):
29
28
30
29
31
30
if TYPE_CHECKING :
32
- from attrs import define , frozen
31
+ from attrs import define , evolve , field , frozen
33
32
else :
33
+ from attrs import define as _define , evolve , field , frozen as _frozen
34
34
35
35
def define (cls ):
36
36
cls .__init_subclass__ = UnsupportedSubclassing .complain
37
- return attrs . define (cls )
37
+ return _define (cls )
38
38
39
39
def frozen (cls ):
40
40
cls .__init_subclass__ = UnsupportedSubclassing .complain
41
- return attrs . frozen (cls )
41
+ return _frozen (cls )
42
42
43
43
44
44
Schema = Union [bool , Mapping [str , Any ]]
@@ -51,9 +51,6 @@ class Anchor:
51
51
name : str
52
52
resource : Schema
53
53
54
- def added_to (self , registry : Registry ):
55
- return registry .with_anchor (anchor = self )
56
-
57
54
58
55
@frozen
59
56
class DynamicAnchor :
@@ -62,9 +59,6 @@ class DynamicAnchor:
62
59
name : str
63
60
resource : Schema
64
61
65
- def added_to (self , registry : Registry ):
66
- return registry .with_anchor (anchor = self )
67
-
68
62
69
63
AnchorType = Union [Anchor , DynamicAnchor ]
70
64
@@ -75,26 +69,15 @@ class IdentifiedResource:
75
69
uri : str
76
70
resource : Schema
77
71
78
- def added_to (self , registry : Registry ):
79
- return registry .with_identified_resource (
80
- uri = self .uri ,
81
- resource = self .resource ,
82
- )
83
-
84
72
85
73
@frozen
86
74
class Registry :
87
75
88
- _contents : PMap [str , tuple [Schema , PMap [str , AnchorType ]]] = attrs . field (
76
+ _contents : PMap [str , tuple [Schema , PMap [str , AnchorType ]]] = field (
89
77
default = m (),
90
78
repr = lambda value : f"({ len (value )} entries)" ,
91
79
)
92
-
93
- def resource_at (self , uri ) -> Schema :
94
- return self ._contents [uri ][0 ]
95
-
96
- def anchor_at (self , uri , name ) -> AnchorType :
97
- return self ._contents [uri ][1 ][name ]
80
+ _uncrawled : PSet [str ] = field (default = s ())
98
81
99
82
def with_resource (self , resource ) -> Registry :
100
83
uri = id_of (resource )
@@ -106,10 +89,16 @@ def with_identified_resource(self, uri, resource) -> Registry:
106
89
return self .with_resources ([(uri , resource )])
107
90
108
91
def update (self , * registries : Registry ) -> Registry :
109
- contents = (registry ._contents for registry in registries )
110
- return attrs .evolve (self , contents = self ._contents .update (* contents ))
92
+ contents = (each ._contents for each in registries )
93
+ uncrawled = (each ._uncrawled for each in registries )
94
+ return evolve (
95
+ self ,
96
+ contents = self ._contents .update (* contents ),
97
+ uncrawled = self ._uncrawled .update (* uncrawled ),
98
+ )
111
99
112
100
def with_resources (self , pairs ) -> Registry :
101
+ uncrawled = self ._uncrawled
113
102
contents = self ._contents
114
103
for uri , resource in pairs :
115
104
assert (
@@ -122,40 +111,92 @@ def with_resources(self, pairs) -> Registry:
122
111
id = id_of (resource )
123
112
if id is not None :
124
113
contents = contents .set (id , (resource , m ()))
125
- return attrs .evolve (self , contents = contents )
114
+
115
+ uncrawled = uncrawled .add (uri )
116
+ return evolve (self , contents = contents , uncrawled = uncrawled )
126
117
127
118
def with_anchor (self , anchor : Anchor | DynamicAnchor ) -> Registry :
128
- uri_resource , anchors = self ._contents [anchor .uri ]
129
- new = uri_resource , anchors .set (anchor .name , anchor )
130
- return attrs .evolve (self , contents = self ._contents .set (anchor .uri , new ))
119
+ resource , anchors = self ._contents [anchor .uri ]
120
+ new = resource , anchors .set (anchor .name , anchor )
121
+ return evolve (self , contents = self ._contents .set (anchor .uri , new ))
122
+
123
+ def resource_at (self , uri : str ) -> tuple [Schema , Registry ]:
124
+ at_uri = self ._contents .get (uri )
125
+ if at_uri is not None and at_uri [1 ]:
126
+ registry = self
127
+ else :
128
+ registry = self .crawl ()
129
+ return registry ._contents [uri ][0 ], registry
130
+
131
+ def anchor_at (self , uri , name ) -> AnchorType :
132
+ return self ._contents [uri ][1 ][name ]
133
+
134
+ def crawl (self ) -> Registry :
135
+ registry = self
136
+ resources = [(uri , self ._contents [uri ][0 ]) for uri in self ._uncrawled ]
137
+ while resources :
138
+ base_uri , resource = resources .pop ()
139
+ if resource is True or resource is False :
140
+ continue
141
+
142
+ uri = urljoin (base_uri , resource .get ("$id" , "" ))
143
+ if uri != base_uri :
144
+ registry = registry .with_identified_resource (
145
+ uri = uri ,
146
+ resource = resource ,
147
+ )
148
+
149
+ anchor = resource .get ("$anchor" )
150
+ if anchor is not None :
151
+ registry = registry .with_anchor (
152
+ Anchor (uri = uri , name = anchor , resource = resource ),
153
+ )
154
+
155
+ dynamic_anchor = resource .get ("$dynamicAnchor" )
156
+ if dynamic_anchor is not None :
157
+ registry = registry .with_anchor (
158
+ DynamicAnchor (
159
+ uri = uri ,
160
+ name = dynamic_anchor ,
161
+ resource = resource ,
162
+ ),
163
+ )
164
+
165
+ resources .extend ( # TODO: delay finding anchors in subresources...
166
+ (uri , resource [k ]) for k in SUBRESOURCE if k in resource
167
+ )
168
+ resources .extend (
169
+ (uri , subresource )
170
+ for k in SUBRESOURCE_VALUES
171
+ if k in resource
172
+ for subresource in resource [k ].values ()
173
+ )
174
+ resources .extend (
175
+ (uri , subresource )
176
+ for k in SUBRESOURCE_ITEMS
177
+ if k in resource
178
+ for subresource in resource [k ]
179
+ )
180
+ return evolve (registry , uncrawled = s ())
131
181
132
182
def resolver (self , root ) -> Resolver :
133
183
uri = id_of (root ) or ""
134
184
registry = self .with_identified_resource (uri = uri , resource = root )
135
185
return Resolver (base_uri = uri , registry = registry )
136
186
137
- def has_not_crawled (self , uri ) -> bool :
138
- at_uri = self ._contents .get (uri )
139
- return at_uri is None or not at_uri [1 ]
140
-
141
187
142
188
@define
143
189
class Resolver :
144
190
145
191
_base_uri : str
146
192
_registry : Registry
147
193
148
- def lookup (self , ref : str ):
194
+ def lookup (self , ref : str ) -> tuple [ Schema , Resolver ] :
149
195
if ref .startswith ("#" ):
150
196
uri , fragment = self ._base_uri , ref [1 :]
151
197
else :
152
198
uri , fragment = urldefrag (urljoin (self ._base_uri , ref ))
153
- if self ._registry .has_not_crawled (uri ):
154
- root = self ._registry .resource_at (self ._base_uri )
155
- for each in find_subresources (base_uri = self ._base_uri , root = root ):
156
- self ._registry = each .added_to (self ._registry )
157
-
158
- target = self ._registry .resource_at (uri )
199
+ target , registry = self ._registry .resource_at (uri )
159
200
if fragment .startswith ("/" ):
160
201
segments = unquote (fragment [1 :]).split ("/" )
161
202
for segment in segments :
@@ -165,21 +206,21 @@ def lookup(self, ref: str):
165
206
segment = segment .replace ("~1" , "/" ).replace ("~0" , "~" )
166
207
target = target [segment ] # type: ignore # this can't be a bool
167
208
elif fragment :
168
- target = self . _registry .anchor_at (uri = uri , name = fragment ).resource
209
+ target = registry .anchor_at (uri = uri , name = fragment ).resource
169
210
170
- return target , attrs . evolve (self , base_uri = uri )
211
+ return target , evolve (self , base_uri = uri , registry = registry )
171
212
172
213
def with_root (self , root ) -> Resolver :
173
214
maybe_relative = id_of (root )
174
215
if maybe_relative is None :
175
- uri , registry = self . _base_uri , self . _registry
176
- else :
177
- uri = urljoin (self ._base_uri , maybe_relative )
178
- registry = self ._registry .with_identified_resource (
179
- uri = uri ,
180
- resource = root ,
181
- )
182
- return attrs . evolve (self , base_uri = uri , registry = registry )
216
+ return self
217
+
218
+ uri = urljoin (self ._base_uri , maybe_relative )
219
+ registry = self ._registry .with_identified_resource (
220
+ uri = uri ,
221
+ resource = root ,
222
+ )
223
+ return evolve (self , base_uri = uri , registry = registry )
183
224
184
225
185
226
SUBRESOURCE = {"items" , "not" }
@@ -191,46 +232,3 @@ def id_of(resource) -> str | None:
191
232
if resource is True or resource is False :
192
233
return None
193
234
return resource .get ("$id" )
194
-
195
-
196
- def find_subresources (
197
- root : Schema ,
198
- base_uri : str ,
199
- ) -> Iterable [Anchor | DynamicAnchor | IdentifiedResource ]:
200
- resources = [(base_uri , root )]
201
- while resources :
202
- base_uri , resource = resources .pop ()
203
- if resource is True or resource is False :
204
- continue
205
-
206
- uri = urljoin (base_uri , resource .get ("$id" , "" ))
207
- if uri != base_uri :
208
- yield IdentifiedResource (uri = uri , resource = resource )
209
-
210
- anchor = resource .get ("$anchor" )
211
- if anchor is not None :
212
- yield Anchor (uri = uri , name = anchor , resource = resource )
213
-
214
- dynamic_anchor = resource .get ("$dynamicAnchor" )
215
- if dynamic_anchor is not None :
216
- yield DynamicAnchor (
217
- uri = uri ,
218
- name = dynamic_anchor ,
219
- resource = resource ,
220
- )
221
-
222
- resources .extend ( # TODO: delay finding anchors in subresources...
223
- (uri , resource [k ]) for k in SUBRESOURCE if k in resource
224
- )
225
- resources .extend (
226
- (uri , subresource )
227
- for k in SUBRESOURCE_VALUES
228
- if k in resource
229
- for subresource in resource [k ].values ()
230
- )
231
- resources .extend (
232
- (uri , subresource )
233
- for k in SUBRESOURCE_ITEMS
234
- if k in resource
235
- for subresource in resource [k ]
236
- )
0 commit comments