Skip to content

Commit 0f38df2

Browse files
committed
tests: python interlinks
1 parent 65b4706 commit 0f38df2

File tree

8 files changed

+498
-62
lines changed

8 files changed

+498
-62
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,5 @@ dmypy.json
141141

142142
# Local Netlify folder
143143
.netlify
144+
145+
/.luarc.json

quartodoc/interlinks.py

Lines changed: 356 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
from __future__ import annotations
2+
3+
import os
4+
import itertools
5+
import json
6+
import warnings
7+
8+
from pydantic import BaseModel, Field
9+
from dataclasses import dataclass
10+
from pathlib import Path
11+
from typing import Literal, Annotated, Union, Optional
12+
13+
14+
ENV_PROJECT_ROOT: str = "QUARTO_PROJECT_ROOT"
15+
16+
# Errors -----------------------------------------------------------------------
17+
18+
19+
class RefSyntaxError(Exception):
20+
"""An error parsing an interlinks reference."""
21+
22+
23+
class InvLookupError(Exception):
24+
"""An error looking up an entry from inventory files."""
25+
26+
27+
# Utility functions -----------------------------------------------------------
28+
29+
30+
def get_path_to_root():
31+
# In lua filters you can use quarto.project.offset
32+
return os.environ[ENV_PROJECT_ROOT]
33+
34+
35+
def parse_rst_style_ref(full_text):
36+
"""
37+
Returns
38+
-------
39+
tuple
40+
The parsed title (None if no title specified), and corresponding reference.
41+
"""
42+
43+
import re
44+
45+
m = re.match(r"(?P<text>.+?)\<(?P<ref>[a-zA-Z\.\-: _]+)\>", full_text)
46+
if m is None:
47+
# TODO: print a warning or something
48+
return full_text, None
49+
50+
text, ref = m.groups()
51+
52+
return ref, text
53+
54+
55+
# Dataclasses representing pandoc elements ------------------------------------
56+
# These classes are used to help indicate what elements the Interlinks class
57+
# would return in a pandoc filter.
58+
59+
60+
class Link(BaseModel):
61+
"""Indicates a pandoc Link element."""
62+
63+
kind: Literal["link"] = "link"
64+
content: str
65+
url: str
66+
67+
68+
class Code(BaseModel):
69+
"""Indicates a pandoc Code element."""
70+
71+
kind: Literal["code"] = "code"
72+
content: str
73+
74+
75+
class Unchanged(BaseModel):
76+
"""Marker class for content that a function no-ops.
77+
78+
The main purpose of this class is to indicate when a pandoc filter might
79+
return the original content element.
80+
"""
81+
82+
kind: Literal["unchanged"] = "unchanged"
83+
content: str
84+
85+
86+
class TestSpecEntry(BaseModel):
87+
output_text: Optional[str] = None
88+
output_link: Optional[str] = None
89+
output_element: Optional[
90+
Annotated[Union[Link, Code, Unchanged], Field(discriminator="kind")]
91+
] = None
92+
error: Optional[str] = None
93+
warning: Optional[str] = None
94+
95+
96+
# Reference syntax ------------------------------------------------------------
97+
# note that the classes above were made pydantic models so we could serialize
98+
# them from json. We could make these ones pydantic too, but there is not a
99+
# ton of benefit here.
100+
101+
102+
@dataclass
103+
class Ref:
104+
"""Represent a sphinx-style reference.
105+
106+
These have this format
107+
:external+<invname>:<domain>:<role>:`<target>`
108+
109+
"""
110+
111+
target: "str"
112+
role: "None | str" = None
113+
domain: "None | str" = None
114+
invname: "None | str" = None
115+
116+
external: bool = False
117+
118+
@classmethod
119+
def from_string(cls, ref: str):
120+
if not (ref.startswith(":") or ref.startswith("`")):
121+
raise RefSyntaxError(
122+
'Ref must start with ":" or "`".\n' f"Received ref string: {ref}"
123+
)
124+
125+
if not ref.endswith("`"):
126+
raise RefSyntaxError(
127+
'Ref must end with "`"\n' f"Received ref string: {ref}"
128+
)
129+
130+
# Note that optional options after :external: go right-to-left.
131+
# e.g. :role:`target`
132+
# e.g. :external:role:`target`
133+
# e.g. :external:domain:role:`target`
134+
135+
kwargs = {}
136+
137+
# TODO: user may have omitted the starting `
138+
params, kwargs["target"], _ = ref.rsplit("`", 2)
139+
140+
if params != "":
141+
if ref.startswith(":external"):
142+
external, *parts = params.lstrip(":").rstrip(":").split(":")
143+
144+
kwargs["external"] = True
145+
if "+" in external:
146+
kwargs["invname"] = external.split("+")[-1]
147+
else:
148+
kwargs["invname"] = None
149+
150+
else:
151+
kwargs["invname"] = None
152+
parts = params.lstrip(":").rstrip(":").split(":")
153+
154+
kwargs.update(zip(["role", "domain"], reversed(parts)))
155+
156+
return cls(**kwargs)
157+
158+
159+
# Hold all inventory items in a singleton -------------------------------------
160+
161+
162+
@dataclass
163+
class EnhancedItem:
164+
# these are defined in the quarto config
165+
inv_name: str
166+
inv_url: str
167+
168+
# these are defined in the inventory file itself
169+
name: str
170+
domain: str
171+
role: str
172+
priority: str
173+
uri: str
174+
dispname: str
175+
176+
@property
177+
def full_uri(self):
178+
# TODO: this should only apply to a uri ending with "$"
179+
return self.inv_url + self.uri.replace("$", self.name)
180+
181+
@classmethod
182+
def make_simple(cls, inv_name, full_url, name, role="function"):
183+
return cls(
184+
inv_name,
185+
"",
186+
name,
187+
domain="py",
188+
role=role,
189+
priority=1,
190+
uri=full_url,
191+
dispname="-",
192+
)
193+
194+
195+
class Inventories:
196+
def __init__(self):
197+
self.registry: dict[str, list[EnhancedItem]] = {}
198+
199+
def items(self):
200+
return itertools.chain(*self.registry.values())
201+
202+
def load_inventory(self, inventory: dict, url: str, invname: str):
203+
all_items = []
204+
for item in inventory["items"]:
205+
# TODO: what are the rules for inventories with overlapping names?
206+
# it seems like this is where priority and using source name as an
207+
# optional prefix in references is useful (e.g. siuba:a.b.c).
208+
enh_item = EnhancedItem(inv_name=invname, inv_url=url, **item)
209+
all_items.append(enh_item)
210+
211+
self.registry[invname] = all_items
212+
213+
def lookup_reference(self, ref: Ref) -> EnhancedItem:
214+
"""Return the item corresponding to a reference."""
215+
216+
crnt_items = self.items()
217+
for field in ["name", "role", "domain", "invname"]:
218+
if field == "name":
219+
# target may have ~ option in front, so we strip it off
220+
field_value = ref.target.lstrip("~")
221+
else:
222+
field_value = getattr(ref, field)
223+
224+
if field == "role":
225+
# for some reason, things like :func: are short for :function:.
226+
field_value = self.normalize_role(field_value)
227+
228+
crnt_items = self._filter_by_field(crnt_items, field, field_value)
229+
230+
results = list(crnt_items)
231+
if not results:
232+
raise InvLookupError(
233+
f"Cross reference not found in an inventory file: `{ref}`"
234+
)
235+
236+
if len(results) > 1:
237+
raise InvLookupError(
238+
f"Cross reference matches multiple entries.\n"
239+
f"Matching entries: {len(results)}\n"
240+
f"Reference: {ref}\n"
241+
f"Top 2 matches: \n * {results[0]}\n * {results[1]}"
242+
)
243+
244+
return results[0]
245+
246+
def normalize_role(self, role_name):
247+
"""Normalize the role portion of a reference."""
248+
249+
if role_name == "func":
250+
return "function"
251+
252+
return role_name
253+
254+
def ref_to_anchor(self, ref: str | Ref, text: "str | None"):
255+
"""Return a Link element based on a reference in interlink format
256+
257+
Parameters
258+
----------
259+
ref:
260+
The interlink reference (e.g. "my_module.my_function").
261+
text:
262+
The text to be displayed for the link.
263+
264+
Examples
265+
--------
266+
267+
>>> url = "https://example.org/functools.partial.html"
268+
>>> item = EnhancedItem.make_simple('someinv', url, name = 'functools.partial')
269+
>>> invs = Inventories.from_items([item])
270+
>>> invs.ref_to_anchor("functools.partial")
271+
Link(content='functools.partial', url='https://example.org/functools.partial.html')
272+
273+
>>> invs.ref_to_anchor("~functools.partial")
274+
Link(content='partial', url='https://example.org/functools.partial.html')
275+
"""
276+
277+
if isinstance(ref, str):
278+
ref = Ref.from_string(ref)
279+
280+
is_shortened = ref.target.startswith("~")
281+
282+
entry = self.lookup_reference(ref)
283+
dst_url = entry["full_uri"]
284+
285+
if not text:
286+
name = entry["name"] if entry["dispname"] == "-" else entry["dispname"]
287+
if is_shortened:
288+
# shorten names from module.sub_module.func_name -> func_name
289+
name = name.split(".")[-1]
290+
return Link(name, url=dst_url)
291+
292+
return Link(text, url=dst_url)
293+
294+
def pandoc_ref_to_anchor(self, ref: str, text: str) -> Link | Code | Unchanged:
295+
"""Convert a ref to a Link, with special handling for pandoc filters.
296+
297+
Note that this function is similar to ref_to_anchor, but handles pandoc's
298+
representation of ` as "%60", uses warnings instead of errors, and returns
299+
non-ref urls unchanged.
300+
"""
301+
302+
if (ref.startswith("%60") or ref.startswith(":")) and ref.endswith("%60"):
303+
# Get URL ----
304+
try:
305+
return self.ref_to_anchor(ref.replace("%60", "`"), text)
306+
except InvLookupError as e:
307+
warnings.warn(warnings.warn(str(e)))
308+
if text:
309+
# Assuming content is a ListContainer(Str(...))
310+
body = text
311+
else:
312+
body = ref.replace("%60", "")
313+
return Code(body)
314+
315+
return Unchanged(ref)
316+
317+
@staticmethod
318+
def _filter_by_field(items, field_name: str, value: "str | None" = None):
319+
if value is None:
320+
return items
321+
322+
return (item for item in items if item[field_name] == value)
323+
324+
@classmethod
325+
def from_items(cls, items: "list[EnhancedItem]"):
326+
invs = cls()
327+
for item in items:
328+
items = invs.registry.setdefault(item.inv_name, [])
329+
items.append(item)
330+
331+
return invs
332+
333+
@classmethod
334+
def from_quarto_config(cls, cfg: dict):
335+
invs = cls()
336+
p_root = get_path_to_root()
337+
338+
interlinks = cfg["interlinks"]
339+
sources = interlinks["sources"]
340+
cache = interlinks.get("cache", "_inv")
341+
342+
# load this sites inventory ----
343+
site_inv = interlinks.get("site_inv", "objects.json")
344+
345+
json_data = json.load(open(p_root / site_inv))
346+
invs.load_inventory(json_data, url="/", invname="")
347+
348+
# load other inventories ----
349+
for doc_name, cfg in sources.items():
350+
351+
fname = doc_name + "_objects.json"
352+
inv_path = p_root / Path(cache) / fname
353+
354+
json_data = json.load(open(inv_path))
355+
356+
invs.load_inventory(json_data, url=cfg["url"], invname=doc_name)

0 commit comments

Comments
 (0)