Skip to content

Commit 92c656c

Browse files
committed
feat(codegen): add markdown renderers
Jinja2 templates and rendering logic for documentation pages: - markdown_renderer: orchestrates page rendering for features, enums, NewTypes, primitives, and geometry. Recursively expands MODEL-kind fields inline with dot-notation. - markdown_type_format: type string formatting with link-aware rendering via LinkContext - example_loader: loads examples from theme pyproject.toml, validates against Pydantic models, flattens to dot-notation - reverse_references: computes "Used By" cross-references between types and the features that reference them Templates: feature, enum, newtype, primitives, geometry pages. Golden-file snapshot tests verify rendered output stability. Adds renderer-specific fixtures to conftest.py (cli_runner, primitives_markdown, geometry_markdown).
1 parent 1e0ce22 commit 92c656c

30 files changed

+4219
-0
lines changed
Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
"""Load and process example data from theme pyproject.toml files."""
2+
3+
import logging
4+
import sys
5+
from dataclasses import dataclass
6+
from pathlib import Path
7+
from typing import Any
8+
9+
from pydantic import BaseModel, TypeAdapter, ValidationError
10+
from pydantic.fields import FieldInfo
11+
12+
from .model_extraction import resolve_field_alias
13+
from .type_analyzer import single_literal_value
14+
15+
log = logging.getLogger(__name__)
16+
17+
__all__ = ["ExampleRecord", "load_examples", "validate_example"]
18+
19+
# tomllib is stdlib from 3.11+; tomli is the backport for 3.10.
20+
try:
21+
import tomllib # type: ignore[import-not-found]
22+
except ModuleNotFoundError:
23+
import tomli as tomllib # type: ignore[import-not-found]
24+
25+
26+
@dataclass
27+
class ExampleRecord:
28+
"""A flattened example with field-value pairs in documentation order."""
29+
30+
rows: list[tuple[str, Any]]
31+
32+
33+
def _inject_literal_fields(
34+
model_fields_dict: dict[str, FieldInfo], data: dict[str, Any]
35+
) -> dict[str, Any]:
36+
"""Inject single-value Literal field defaults missing from *data*.
37+
38+
Inspects *model_fields_dict* for fields with single-value ``Literal``
39+
annotations. For each field missing from *data*, injects the literal
40+
value using the field's ``validation_alias`` (if set), falling back
41+
to ``alias``, then to the field name.
42+
43+
Returns a new dict; the original is not mutated.
44+
"""
45+
result = data.copy()
46+
47+
for field_name, field_info in model_fields_dict.items():
48+
key = resolve_field_alias(field_name, field_info)
49+
if key in result:
50+
continue
51+
52+
literal_value = single_literal_value(field_info.annotation)
53+
if literal_value is not None:
54+
result[key] = literal_value
55+
56+
return result
57+
58+
59+
def _denull_value(value: object) -> object:
60+
"""Convert a single value, replacing ``"null"`` strings with ``None``."""
61+
if value == "null":
62+
return None
63+
if isinstance(value, dict):
64+
return _denull(value)
65+
if isinstance(value, list):
66+
return [_denull_value(item) for item in value]
67+
return value
68+
69+
70+
def _denull(data: dict[str, Any]) -> dict[str, Any]:
71+
"""Convert ``"null"`` sentinel strings to ``None``.
72+
73+
TOML has no null literal, so example data uses the string ``"null"``
74+
as a stand-in. This recursively walks *data* (including nested dicts,
75+
lists of dicts, and plain lists) and replaces every ``"null"`` value
76+
with ``None``.
77+
78+
Returns a new dict; the original is not mutated.
79+
"""
80+
return {key: _denull_value(value) for key, value in data.items()}
81+
82+
83+
def _known_field_keys(model_fields_dict: dict[str, FieldInfo]) -> frozenset[str]:
84+
"""Alias-resolved field keys from a model_fields dict."""
85+
return frozenset(
86+
resolve_field_alias(name, info) for name, info in model_fields_dict.items()
87+
)
88+
89+
90+
def _strip_null_unknown_fields(
91+
data: dict[str, Any], known_keys: frozenset[str]
92+
) -> dict[str, Any]:
93+
"""Drop null-valued fields not in *known_keys*.
94+
95+
For discriminated unions, *known_keys* contains only common base
96+
fields. Variant-specific null fields from other arms (present in
97+
flat parquet schemas) are stripped so the selected arm's validator
98+
doesn't reject them as unknown extras.
99+
100+
Non-null fields are always kept so the arm's own validator can
101+
accept or reject them normally.
102+
"""
103+
return {k: v for k, v in data.items() if v is not None or k in known_keys}
104+
105+
106+
def validate_example(
107+
validation_type: object,
108+
raw: dict[str, Any],
109+
*,
110+
model_fields: dict[str, FieldInfo] | None = None,
111+
) -> dict[str, Any]:
112+
"""Validate example data against a model or union type.
113+
114+
Uses TypeAdapter for validation, supporting both concrete models
115+
and discriminated union aliases.
116+
117+
Preprocesses *raw* data by:
118+
1. Converting "null" strings to None
119+
2. Injecting missing Literal fields for validation (if model_fields provided)
120+
3. Stripping null-valued fields not in *model_fields* (handles
121+
flat-schema examples from discriminated unions where fields from
122+
non-selected arms appear as nulls)
123+
124+
Returns the denulled dict (not the preprocessed one with injected
125+
literals). Lets ValidationError propagate on validation failure.
126+
"""
127+
denulled = _denull(raw)
128+
129+
if model_fields is None:
130+
if isinstance(validation_type, type) and issubclass(validation_type, BaseModel):
131+
model_fields = validation_type.model_fields
132+
else:
133+
model_fields = {}
134+
135+
known_keys = _known_field_keys(model_fields)
136+
preprocessed = _inject_literal_fields(model_fields, denulled)
137+
preprocessed = _strip_null_unknown_fields(preprocessed, known_keys)
138+
TypeAdapter(validation_type).validate_python(preprocessed)
139+
return denulled
140+
141+
142+
_DEFAULT_SKIP_KEYS: frozenset[str] = frozenset({"bbox"})
143+
144+
145+
def _flatten_value(prefix: str, value: object) -> list[tuple[str, Any]]:
146+
"""Recursively flatten a value into dot/bracket-notation rows."""
147+
if isinstance(value, dict):
148+
result: list[tuple[str, Any]] = []
149+
for k, v in value.items():
150+
result.extend(_flatten_value(f"{prefix}.{k}", v))
151+
return result
152+
if isinstance(value, list) and value and isinstance(value[0], (dict, list)):
153+
result = []
154+
for i, item in enumerate(value):
155+
result.extend(_flatten_value(f"{prefix}[{i}]", item))
156+
return result
157+
return [(prefix, value)]
158+
159+
160+
def flatten_example(
161+
raw: dict[str, Any],
162+
*,
163+
skip_keys: frozenset[str] = _DEFAULT_SKIP_KEYS,
164+
) -> list[tuple[str, Any]]:
165+
"""Flatten nested example dict to dot-notation key-value pairs.
166+
167+
Nested dicts become ``"parent.child"``; lists of dicts become
168+
``"parent[0].child"``; lists of lists of dicts use double-index
169+
notation ``"parent[0][1].child"``. Keys in *skip_keys* are dropped
170+
at the top level only. Plain lists are kept as values.
171+
"""
172+
result: list[tuple[str, Any]] = []
173+
for key, value in raw.items():
174+
if key in skip_keys:
175+
continue
176+
result.extend(_flatten_value(key, value))
177+
return result
178+
179+
180+
def extract_base_field(key: str) -> str:
181+
"""Extract the top-level field name from a flattened key.
182+
183+
>>> extract_base_field("sources[0].dataset")
184+
'sources'
185+
>>> extract_base_field("names.primary")
186+
'names'
187+
>>> extract_base_field("id")
188+
'id'
189+
"""
190+
if "[" in key:
191+
return key.split("[")[0]
192+
if "." in key:
193+
return key.split(".")[0]
194+
return key
195+
196+
197+
def order_example_rows(
198+
flat_rows: list[tuple[str, Any]],
199+
field_names: list[str],
200+
) -> list[tuple[str, Any]]:
201+
"""Order flattened rows by field position in documentation.
202+
203+
Sorts by position of base field name in *field_names*.
204+
Fields with the same base maintain their original order (stable sort).
205+
Unknown fields sort to end.
206+
"""
207+
position = {name: i for i, name in enumerate(field_names)}
208+
sentinel = len(field_names)
209+
210+
def sort_key(row: tuple[str, Any]) -> int:
211+
return position.get(extract_base_field(row[0]), sentinel)
212+
213+
return sorted(flat_rows, key=sort_key)
214+
215+
216+
def load_examples_from_toml(
217+
pyproject_path: Path,
218+
model_name: str,
219+
) -> list[dict[str, Any]]:
220+
"""Load ``[examples.<model_name>]`` from a pyproject.toml file."""
221+
with pyproject_path.open("rb") as f:
222+
data = tomllib.load(f)
223+
224+
examples: dict[str, list[dict[str, Any]]] = data.get("examples", {})
225+
return examples.get(model_name, [])
226+
227+
228+
def resolve_pyproject_path(model_class: type) -> Path | None:
229+
"""Find pyproject.toml by walking up from the model's module location."""
230+
module_name = getattr(model_class, "__module__", None)
231+
if not module_name:
232+
return None
233+
234+
module = sys.modules.get(module_name)
235+
if not module:
236+
return None
237+
238+
module_file = getattr(module, "__file__", None)
239+
if not module_file:
240+
return None
241+
242+
# Walk up from module directory
243+
current = Path(module_file).parent
244+
while current != current.parent: # Stop at filesystem root
245+
pyproject = current / "pyproject.toml"
246+
if pyproject.exists():
247+
return pyproject
248+
current = current.parent
249+
250+
return None
251+
252+
253+
def load_examples(
254+
validation_type: object,
255+
model_name: str,
256+
field_names: list[str],
257+
*,
258+
pyproject_source: type | None = None,
259+
model_fields: dict[str, FieldInfo] | None = None,
260+
) -> list[ExampleRecord]:
261+
"""Load examples for a model, flattened and ordered by *field_names*.
262+
263+
Validates each example against the validation type. Invalid examples
264+
are skipped with a warning logged. Returns an empty list on any failure
265+
(missing file, missing section, parse error).
266+
267+
Parameters
268+
----------
269+
validation_type : type[BaseModel] | object
270+
Model class or union alias to validate against.
271+
model_name : str
272+
Name of the model to load examples for.
273+
field_names : list[str]
274+
List of field names for ordering output.
275+
pyproject_source : type or None
276+
Type to use for finding pyproject.toml. If None,
277+
uses validation_type if it's a class.
278+
model_fields : dict[str, FieldInfo] or None
279+
Field info dict for Literal injection. If None, infers
280+
from validation_type if it's a BaseModel class.
281+
"""
282+
source_type = pyproject_source if pyproject_source is not None else validation_type
283+
if not isinstance(source_type, type):
284+
return []
285+
286+
pyproject_path = resolve_pyproject_path(source_type)
287+
if not pyproject_path:
288+
return []
289+
290+
try:
291+
raw_examples = load_examples_from_toml(pyproject_path, model_name)
292+
except (OSError, tomllib.TOMLDecodeError):
293+
log.debug("Failed to load examples for %s", model_name, exc_info=True)
294+
return []
295+
296+
if not raw_examples:
297+
return []
298+
299+
records = []
300+
for raw in raw_examples:
301+
try:
302+
denulled = validate_example(validation_type, raw, model_fields=model_fields)
303+
except ValidationError as e:
304+
log.warning(
305+
"Skipping invalid example for %s in %s: %s",
306+
model_name,
307+
pyproject_path,
308+
e,
309+
)
310+
continue
311+
flat_rows = flatten_example(denulled)
312+
ordered_rows = order_example_rows(flat_rows, field_names)
313+
records.append(ExampleRecord(rows=ordered_rows))
314+
315+
return records

packages/overture-schema-codegen/src/overture/schema/codegen/link_computation.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from dataclasses import dataclass
44
from pathlib import PurePosixPath
55

6+
from .case_conversion import slug_filename
7+
68
__all__ = ["LinkContext", "relative_link"]
79

810

@@ -19,6 +21,14 @@ def resolve_link(self, name: str) -> str | None:
1921
return relative_link(self.page_path, self.registry[name])
2022
return None
2123

24+
def resolve_link_or_slug(self, name: str) -> str:
25+
"""Resolve *name* to a relative link, falling back to a slug filename.
26+
27+
Always returns a usable link string. Use when the caller needs a
28+
link regardless of whether the type has a registered page.
29+
"""
30+
return self.resolve_link(name) or slug_filename(name)
31+
2232

2333
def _is_normalized(path: PurePosixPath) -> bool:
2434
"""True when the path contains no '..' or '.' components (except root '.')."""

0 commit comments

Comments
 (0)