|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import os |
| 4 | +import itertools |
| 5 | +import json |
| 6 | +import warnings |
| 7 | + |
| 8 | +from pydantic import BaseModel, Field |
| 9 | +from dataclasses import dataclass |
| 10 | +from pathlib import Path |
| 11 | +from typing import Literal, Annotated, Union, Optional |
| 12 | + |
| 13 | + |
| 14 | +ENV_PROJECT_ROOT: str = "QUARTO_PROJECT_ROOT" |
| 15 | + |
| 16 | +# Errors ----------------------------------------------------------------------- |
| 17 | + |
| 18 | + |
| 19 | +class RefSyntaxError(Exception): |
| 20 | + """An error parsing an interlinks reference.""" |
| 21 | + |
| 22 | + |
| 23 | +class InvLookupError(Exception): |
| 24 | + """An error looking up an entry from inventory files.""" |
| 25 | + |
| 26 | + |
| 27 | +# Utility functions ----------------------------------------------------------- |
| 28 | + |
| 29 | + |
| 30 | +def get_path_to_root(): |
| 31 | + # In lua filters you can use quarto.project.offset |
| 32 | + return os.environ[ENV_PROJECT_ROOT] |
| 33 | + |
| 34 | + |
| 35 | +def parse_rst_style_ref(full_text): |
| 36 | + """ |
| 37 | + Returns |
| 38 | + ------- |
| 39 | + tuple |
| 40 | + The parsed title (None if no title specified), and corresponding reference. |
| 41 | + """ |
| 42 | + |
| 43 | + import re |
| 44 | + |
| 45 | + m = re.match(r"(?P<text>.+?)\<(?P<ref>[a-zA-Z\.\-: _]+)\>", full_text) |
| 46 | + if m is None: |
| 47 | + # TODO: print a warning or something |
| 48 | + return full_text, None |
| 49 | + |
| 50 | + text, ref = m.groups() |
| 51 | + |
| 52 | + return ref, text |
| 53 | + |
| 54 | + |
| 55 | +# Dataclasses representing pandoc elements ------------------------------------ |
| 56 | +# These classes are used to help indicate what elements the Interlinks class |
| 57 | +# would return in a pandoc filter. |
| 58 | + |
| 59 | + |
| 60 | +class Link(BaseModel): |
| 61 | + """Indicates a pandoc Link element.""" |
| 62 | + |
| 63 | + kind: Literal["link"] = "link" |
| 64 | + content: str |
| 65 | + url: str |
| 66 | + |
| 67 | + |
| 68 | +class Code(BaseModel): |
| 69 | + """Indicates a pandoc Code element.""" |
| 70 | + |
| 71 | + kind: Literal["code"] = "code" |
| 72 | + content: str |
| 73 | + |
| 74 | + |
| 75 | +class Unchanged(BaseModel): |
| 76 | + """Marker class for content that a function no-ops. |
| 77 | +
|
| 78 | + The main purpose of this class is to indicate when a pandoc filter might |
| 79 | + return the original content element. |
| 80 | + """ |
| 81 | + |
| 82 | + kind: Literal["unchanged"] = "unchanged" |
| 83 | + content: str |
| 84 | + |
| 85 | + |
| 86 | +class TestSpecEntry(BaseModel): |
| 87 | + output_text: Optional[str] = None |
| 88 | + output_link: Optional[str] = None |
| 89 | + output_element: Optional[ |
| 90 | + Annotated[Union[Link, Code, Unchanged], Field(discriminator="kind")] |
| 91 | + ] = None |
| 92 | + error: Optional[str] = None |
| 93 | + warning: Optional[str] = None |
| 94 | + |
| 95 | + |
| 96 | +# Reference syntax ------------------------------------------------------------ |
| 97 | +# note that the classes above were made pydantic models so we could serialize |
| 98 | +# them from json. We could make these ones pydantic too, but there is not a |
| 99 | +# ton of benefit here. |
| 100 | + |
| 101 | + |
| 102 | +@dataclass |
| 103 | +class Ref: |
| 104 | + """Represent a sphinx-style reference. |
| 105 | +
|
| 106 | + These have this format |
| 107 | + :external+<invname>:<domain>:<role>:`<target>` |
| 108 | +
|
| 109 | + """ |
| 110 | + |
| 111 | + target: "str" |
| 112 | + role: "None | str" = None |
| 113 | + domain: "None | str" = None |
| 114 | + invname: "None | str" = None |
| 115 | + |
| 116 | + external: bool = False |
| 117 | + |
| 118 | + @classmethod |
| 119 | + def from_string(cls, ref: str): |
| 120 | + if not (ref.startswith(":") or ref.startswith("`")): |
| 121 | + raise RefSyntaxError( |
| 122 | + 'Ref must start with ":" or "`".\n' f"Received ref string: {ref}" |
| 123 | + ) |
| 124 | + |
| 125 | + if not ref.endswith("`"): |
| 126 | + raise RefSyntaxError( |
| 127 | + 'Ref must end with "`"\n' f"Received ref string: {ref}" |
| 128 | + ) |
| 129 | + |
| 130 | + # Note that optional options after :external: go right-to-left. |
| 131 | + # e.g. :role:`target` |
| 132 | + # e.g. :external:role:`target` |
| 133 | + # e.g. :external:domain:role:`target` |
| 134 | + |
| 135 | + kwargs = {} |
| 136 | + |
| 137 | + # TODO: user may have omitted the starting ` |
| 138 | + params, kwargs["target"], _ = ref.rsplit("`", 2) |
| 139 | + |
| 140 | + if params != "": |
| 141 | + if ref.startswith(":external"): |
| 142 | + external, *parts = params.lstrip(":").rstrip(":").split(":") |
| 143 | + |
| 144 | + kwargs["external"] = True |
| 145 | + if "+" in external: |
| 146 | + kwargs["invname"] = external.split("+")[-1] |
| 147 | + else: |
| 148 | + kwargs["invname"] = None |
| 149 | + |
| 150 | + else: |
| 151 | + kwargs["invname"] = None |
| 152 | + parts = params.lstrip(":").rstrip(":").split(":") |
| 153 | + |
| 154 | + kwargs.update(zip(["role", "domain"], reversed(parts))) |
| 155 | + |
| 156 | + return cls(**kwargs) |
| 157 | + |
| 158 | + |
| 159 | +# Hold all inventory items in a singleton ------------------------------------- |
| 160 | + |
| 161 | + |
| 162 | +@dataclass |
| 163 | +class EnhancedItem: |
| 164 | + # these are defined in the quarto config |
| 165 | + inv_name: str |
| 166 | + inv_url: str |
| 167 | + |
| 168 | + # these are defined in the inventory file itself |
| 169 | + name: str |
| 170 | + domain: str |
| 171 | + role: str |
| 172 | + priority: str |
| 173 | + uri: str |
| 174 | + dispname: str |
| 175 | + |
| 176 | + @property |
| 177 | + def full_uri(self): |
| 178 | + # TODO: this should only apply to a uri ending with "$" |
| 179 | + return self.inv_url + self.uri.replace("$", self.name) |
| 180 | + |
| 181 | + @classmethod |
| 182 | + def make_simple(cls, inv_name, full_url, name, role="function"): |
| 183 | + return cls( |
| 184 | + inv_name, |
| 185 | + "", |
| 186 | + name, |
| 187 | + domain="py", |
| 188 | + role=role, |
| 189 | + priority=1, |
| 190 | + uri=full_url, |
| 191 | + dispname="-", |
| 192 | + ) |
| 193 | + |
| 194 | + |
| 195 | +class Inventories: |
| 196 | + def __init__(self): |
| 197 | + self.registry: dict[str, list[EnhancedItem]] = {} |
| 198 | + |
| 199 | + def items(self): |
| 200 | + return itertools.chain(*self.registry.values()) |
| 201 | + |
| 202 | + def load_inventory(self, inventory: dict, url: str, invname: str): |
| 203 | + all_items = [] |
| 204 | + for item in inventory["items"]: |
| 205 | + # TODO: what are the rules for inventories with overlapping names? |
| 206 | + # it seems like this is where priority and using source name as an |
| 207 | + # optional prefix in references is useful (e.g. siuba:a.b.c). |
| 208 | + enh_item = EnhancedItem(inv_name=invname, inv_url=url, **item) |
| 209 | + all_items.append(enh_item) |
| 210 | + |
| 211 | + self.registry[invname] = all_items |
| 212 | + |
| 213 | + def lookup_reference(self, ref: Ref) -> EnhancedItem: |
| 214 | + """Return the item corresponding to a reference.""" |
| 215 | + |
| 216 | + crnt_items = self.items() |
| 217 | + for field in ["name", "role", "domain", "invname"]: |
| 218 | + if field == "name": |
| 219 | + # target may have ~ option in front, so we strip it off |
| 220 | + field_value = ref.target.lstrip("~") |
| 221 | + else: |
| 222 | + field_value = getattr(ref, field) |
| 223 | + |
| 224 | + if field == "role": |
| 225 | + # for some reason, things like :func: are short for :function:. |
| 226 | + field_value = self.normalize_role(field_value) |
| 227 | + |
| 228 | + crnt_items = self._filter_by_field(crnt_items, field, field_value) |
| 229 | + |
| 230 | + results = list(crnt_items) |
| 231 | + if not results: |
| 232 | + raise InvLookupError( |
| 233 | + f"Cross reference not found in an inventory file: `{ref}`" |
| 234 | + ) |
| 235 | + |
| 236 | + if len(results) > 1: |
| 237 | + raise InvLookupError( |
| 238 | + f"Cross reference matches multiple entries.\n" |
| 239 | + f"Matching entries: {len(results)}\n" |
| 240 | + f"Reference: {ref}\n" |
| 241 | + f"Top 2 matches: \n * {results[0]}\n * {results[1]}" |
| 242 | + ) |
| 243 | + |
| 244 | + return results[0] |
| 245 | + |
| 246 | + def normalize_role(self, role_name): |
| 247 | + """Normalize the role portion of a reference.""" |
| 248 | + |
| 249 | + if role_name == "func": |
| 250 | + return "function" |
| 251 | + |
| 252 | + return role_name |
| 253 | + |
| 254 | + def ref_to_anchor(self, ref: str | Ref, text: "str | None"): |
| 255 | + """Return a Link element based on a reference in interlink format |
| 256 | +
|
| 257 | + Parameters |
| 258 | + ---------- |
| 259 | + ref: |
| 260 | + The interlink reference (e.g. "my_module.my_function"). |
| 261 | + text: |
| 262 | + The text to be displayed for the link. |
| 263 | +
|
| 264 | + Examples |
| 265 | + -------- |
| 266 | +
|
| 267 | + >>> url = "https://example.org/functools.partial.html" |
| 268 | + >>> item = EnhancedItem.make_simple('someinv', url, name = 'functools.partial') |
| 269 | + >>> invs = Inventories.from_items([item]) |
| 270 | + >>> invs.ref_to_anchor("functools.partial") |
| 271 | + Link(content='functools.partial', url='https://example.org/functools.partial.html') |
| 272 | +
|
| 273 | + >>> invs.ref_to_anchor("~functools.partial") |
| 274 | + Link(content='partial', url='https://example.org/functools.partial.html') |
| 275 | + """ |
| 276 | + |
| 277 | + if isinstance(ref, str): |
| 278 | + ref = Ref.from_string(ref) |
| 279 | + |
| 280 | + is_shortened = ref.target.startswith("~") |
| 281 | + |
| 282 | + entry = self.lookup_reference(ref) |
| 283 | + dst_url = entry["full_uri"] |
| 284 | + |
| 285 | + if not text: |
| 286 | + name = entry["name"] if entry["dispname"] == "-" else entry["dispname"] |
| 287 | + if is_shortened: |
| 288 | + # shorten names from module.sub_module.func_name -> func_name |
| 289 | + name = name.split(".")[-1] |
| 290 | + return Link(name, url=dst_url) |
| 291 | + |
| 292 | + return Link(text, url=dst_url) |
| 293 | + |
| 294 | + def pandoc_ref_to_anchor(self, ref: str, text: str) -> Link | Code | Unchanged: |
| 295 | + """Convert a ref to a Link, with special handling for pandoc filters. |
| 296 | +
|
| 297 | + Note that this function is similar to ref_to_anchor, but handles pandoc's |
| 298 | + representation of ` as "%60", uses warnings instead of errors, and returns |
| 299 | + non-ref urls unchanged. |
| 300 | + """ |
| 301 | + |
| 302 | + if (ref.startswith("%60") or ref.startswith(":")) and ref.endswith("%60"): |
| 303 | + # Get URL ---- |
| 304 | + try: |
| 305 | + return self.ref_to_anchor(ref.replace("%60", "`"), text) |
| 306 | + except InvLookupError as e: |
| 307 | + warnings.warn(warnings.warn(str(e))) |
| 308 | + if text: |
| 309 | + # Assuming content is a ListContainer(Str(...)) |
| 310 | + body = text |
| 311 | + else: |
| 312 | + body = ref.replace("%60", "") |
| 313 | + return Code(body) |
| 314 | + |
| 315 | + return Unchanged(ref) |
| 316 | + |
| 317 | + @staticmethod |
| 318 | + def _filter_by_field(items, field_name: str, value: "str | None" = None): |
| 319 | + if value is None: |
| 320 | + return items |
| 321 | + |
| 322 | + return (item for item in items if item[field_name] == value) |
| 323 | + |
| 324 | + @classmethod |
| 325 | + def from_items(cls, items: "list[EnhancedItem]"): |
| 326 | + invs = cls() |
| 327 | + for item in items: |
| 328 | + items = invs.registry.setdefault(item.inv_name, []) |
| 329 | + items.append(item) |
| 330 | + |
| 331 | + return invs |
| 332 | + |
| 333 | + @classmethod |
| 334 | + def from_quarto_config(cls, cfg: dict): |
| 335 | + invs = cls() |
| 336 | + p_root = get_path_to_root() |
| 337 | + |
| 338 | + interlinks = cfg["interlinks"] |
| 339 | + sources = interlinks["sources"] |
| 340 | + cache = interlinks.get("cache", "_inv") |
| 341 | + |
| 342 | + # load this sites inventory ---- |
| 343 | + site_inv = interlinks.get("site_inv", "objects.json") |
| 344 | + |
| 345 | + json_data = json.load(open(p_root / site_inv)) |
| 346 | + invs.load_inventory(json_data, url="/", invname="") |
| 347 | + |
| 348 | + # load other inventories ---- |
| 349 | + for doc_name, cfg in sources.items(): |
| 350 | + |
| 351 | + fname = doc_name + "_objects.json" |
| 352 | + inv_path = p_root / Path(cache) / fname |
| 353 | + |
| 354 | + json_data = json.load(open(inv_path)) |
| 355 | + |
| 356 | + invs.load_inventory(json_data, url=cfg["url"], invname=doc_name) |
0 commit comments