|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +from collections.abc import Iterable, Mapping, Sequence |
| 4 | +from typing import Any |
| 5 | +from urllib.parse import unquote, urldefrag, urljoin |
| 6 | + |
| 7 | +from pyrsistent import m |
| 8 | +from pyrsistent.typing import PMap |
| 9 | +import attrs |
| 10 | + |
| 11 | + |
| 12 | +class UnsupportedSubclassing(Exception): |
| 13 | + @classmethod |
| 14 | + def complain(this): |
| 15 | + raise UnsupportedSubclassing( |
| 16 | + "Subclassing is not part of referencing's public API. " |
| 17 | + "If no other suitable API exists for what you're trying to do, " |
| 18 | + "feel free to file an issue asking for one." |
| 19 | + ) |
| 20 | + |
| 21 | + |
| 22 | +class UnidentifiedResource(Exception): |
| 23 | + pass |
| 24 | + |
| 25 | + |
| 26 | +def define(cls): |
| 27 | + cls.__init_subclass__ = UnsupportedSubclassing.complain |
| 28 | + return attrs.define(cls) |
| 29 | + |
| 30 | + |
| 31 | +def frozen(cls): |
| 32 | + cls.__init_subclass__ = UnsupportedSubclassing.complain |
| 33 | + return attrs.frozen(cls) |
| 34 | + |
| 35 | + |
| 36 | +Schema = bool | Mapping[str, Any] |
| 37 | + |
| 38 | + |
| 39 | +@frozen |
| 40 | +class Anchor: |
| 41 | + |
| 42 | + uri: str |
| 43 | + name: str |
| 44 | + resource: Schema |
| 45 | + |
| 46 | + def added_to(self, registry: Registry): |
| 47 | + return registry.with_anchor( |
| 48 | + uri=self.uri, |
| 49 | + anchor=self.name, |
| 50 | + resource=self.resource, |
| 51 | + ) |
| 52 | + |
| 53 | + |
| 54 | +@frozen |
| 55 | +class DynamicAnchor: |
| 56 | + |
| 57 | + uri: str |
| 58 | + name: str |
| 59 | + resource: Schema |
| 60 | + |
| 61 | + def added_to(self, registry: Registry): |
| 62 | + return registry.with_anchor( |
| 63 | + uri=self.uri, |
| 64 | + anchor=self.name, |
| 65 | + resource=self.resource, |
| 66 | + ) |
| 67 | + |
| 68 | + |
| 69 | +@frozen |
| 70 | +class IdentifiedResource: |
| 71 | + |
| 72 | + uri: str |
| 73 | + resource: Schema |
| 74 | + |
| 75 | + def added_to(self, registry: Registry): |
| 76 | + return registry.with_identified_resource( |
| 77 | + uri=self.uri, |
| 78 | + resource=self.resource, |
| 79 | + ) |
| 80 | + |
| 81 | + |
| 82 | +@frozen |
| 83 | +class Registry: |
| 84 | + |
| 85 | + _contents: PMap[str, tuple[Schema, PMap[str, Schema]]] = attrs.field( |
| 86 | + default=m(), repr=lambda value: f"({len(value)} entries)" |
| 87 | + ) |
| 88 | + |
| 89 | + def resource_at(self, uri): |
| 90 | + return self._contents[uri] |
| 91 | + |
| 92 | + def with_resource(self, resource): |
| 93 | + uri = id_of(resource) |
| 94 | + if uri is None: |
| 95 | + raise UnidentifiedResource(resource) |
| 96 | + return self.with_identified_resource(uri=uri, resource=resource) |
| 97 | + |
| 98 | + def with_identified_resource(self, uri, resource): |
| 99 | + return self.with_resources([(uri, resource)]) |
| 100 | + |
| 101 | + def update(self, *registries: Registry): |
| 102 | + contents = (registry._contents for registry in registries) |
| 103 | + return attrs.evolve(self, contents=self._contents.update(*contents)) |
| 104 | + |
| 105 | + def with_resources(self, pairs): |
| 106 | + contents = self._contents |
| 107 | + for uri, resource in pairs: |
| 108 | + assert ( |
| 109 | + uri == "" |
| 110 | + or uri not in self._contents |
| 111 | + or self._contents[uri][0] == resource |
| 112 | + ), (uri, self._contents[uri], resource) |
| 113 | + contents = contents.set(uri, (resource, m())) |
| 114 | + |
| 115 | + id = id_of(resource) |
| 116 | + if id is not None: |
| 117 | + contents = contents.set(id, (resource, m())) |
| 118 | + return attrs.evolve(self, contents=contents) |
| 119 | + |
| 120 | + def with_anchor(self, uri, anchor, resource): |
| 121 | + uri_resource, anchors = self._contents[uri] |
| 122 | + new = uri_resource, anchors.set(anchor, resource) |
| 123 | + return attrs.evolve(self, contents=self._contents.set(uri, new)) |
| 124 | + |
| 125 | + def resolver(self, root) -> Resolver: |
| 126 | + uri = id_of(root) or "" |
| 127 | + registry = self.with_identified_resource(uri=uri, resource=root) |
| 128 | + return Resolver(base_uri=uri, registry=registry) |
| 129 | + |
| 130 | + def has_not_crawled(self, uri): |
| 131 | + at_uri = self._contents.get(uri) |
| 132 | + return at_uri is None or not at_uri[1] |
| 133 | + |
| 134 | + |
| 135 | +@define |
| 136 | +class Resolver: |
| 137 | + |
| 138 | + _base_uri: str |
| 139 | + _registry: Registry |
| 140 | + |
| 141 | + def lookup(self, ref: str): |
| 142 | + if ref.startswith("#"): |
| 143 | + uri, fragment = self._base_uri, ref[1:] |
| 144 | + else: |
| 145 | + uri, fragment = urldefrag(urljoin(self._base_uri, ref)) |
| 146 | + if self._registry.has_not_crawled(uri): |
| 147 | + root, _ = self._registry.resource_at(self._base_uri) |
| 148 | + for each in find_subresources( |
| 149 | + initial_base_uri=self._base_uri, |
| 150 | + root=root, |
| 151 | + ): |
| 152 | + self._registry = each.added_to(self._registry) |
| 153 | + |
| 154 | + resource, anchors = self._registry.resource_at(uri) |
| 155 | + target = resource |
| 156 | + if fragment.startswith("/"): |
| 157 | + segments = unquote(fragment[1:]).split("/") |
| 158 | + for segment in segments: |
| 159 | + if isinstance(target, Sequence): |
| 160 | + segment = int(segment) # type: ignore |
| 161 | + else: |
| 162 | + segment = segment.replace("~1", "/").replace("~0", "~") |
| 163 | + target = target[segment] |
| 164 | + elif fragment: |
| 165 | + target = anchors[fragment] |
| 166 | + |
| 167 | + return target, self.with_base_uri(uri) |
| 168 | + |
| 169 | + def with_base_uri(self, base_uri): |
| 170 | + return attrs.evolve(self, base_uri=base_uri) |
| 171 | + |
| 172 | + def with_root(self, root) -> Resolver: |
| 173 | + maybe_relative = id_of(root) |
| 174 | + if maybe_relative is None: |
| 175 | + uri, registry = self._base_uri, self._registry |
| 176 | + else: |
| 177 | + uri = urljoin(self._base_uri, maybe_relative) |
| 178 | + registry = self._registry.with_identified_resource( |
| 179 | + uri=uri, resource=root |
| 180 | + ) |
| 181 | + return attrs.evolve(self, base_uri=uri, registry=registry) |
| 182 | + |
| 183 | + |
| 184 | +SUBRESOURCE = {"items", "not"} |
| 185 | +SUBRESOURCE_ITEMS = {"allOf"} |
| 186 | +SUBRESOURCE_VALUES = {"$defs", "properties"} |
| 187 | + |
| 188 | + |
| 189 | +def id_of(resource) -> str | None: |
| 190 | + if resource is True or resource is False: |
| 191 | + return None |
| 192 | + return resource.get("$id") |
| 193 | + |
| 194 | + |
| 195 | +def find_subresources( |
| 196 | + root: Schema, |
| 197 | + initial_base_uri: str, |
| 198 | +) -> Iterable[Anchor | DynamicAnchor | IdentifiedResource]: |
| 199 | + resources = [(initial_base_uri, root)] |
| 200 | + while resources: |
| 201 | + base_uri, resource = resources.pop() |
| 202 | + if resource is True or resource is False: |
| 203 | + continue |
| 204 | + |
| 205 | + uri = urljoin(base_uri, resource.get("$id", "")) |
| 206 | + if uri != base_uri: |
| 207 | + yield IdentifiedResource(uri=uri, resource=resource) |
| 208 | + |
| 209 | + anchor = resource.get("$anchor") |
| 210 | + if anchor is not None: |
| 211 | + yield Anchor(uri=uri, name=anchor, resource=resource) |
| 212 | + |
| 213 | + dynamic_anchor = resource.get("$dynamicAnchor") |
| 214 | + if dynamic_anchor is not None: |
| 215 | + yield DynamicAnchor( |
| 216 | + uri=uri, |
| 217 | + name=dynamic_anchor, |
| 218 | + resource=resource, |
| 219 | + ) |
| 220 | + |
| 221 | + resources.extend( # TODO: delay finding anchors in subresources... |
| 222 | + (uri, resource[k]) for k in SUBRESOURCE if k in resource |
| 223 | + ) |
| 224 | + resources.extend( |
| 225 | + (uri, subresource) |
| 226 | + for k in SUBRESOURCE_VALUES |
| 227 | + if k in resource |
| 228 | + for subresource in resource[k].values() |
| 229 | + ) |
| 230 | + resources.extend( # TODO: delay finding anchors in subresources... |
| 231 | + (uri, subresource) |
| 232 | + for k in SUBRESOURCE_ITEMS |
| 233 | + if k in resource |
| 234 | + for subresource in resource[k] |
| 235 | + ) |
0 commit comments