Skip to content

Commit 8cdcdd1

Browse files
committed
feat(codegen): add CLI and integration tests
Click-based CLI entry point (overture-codegen generate) that wires discovery → extraction → output layout → rendering: - Discovers models via discover_models() entry points - Filters themes, extracts specs, builds placement registry - Renders markdown pages with field tables, examples, cross- references, and sidebar metadata - Supports --theme filtering and --output-dir targeting Integration tests verify extraction against real Overture models (Building, Division, Segment, etc.) to catch schema drift. CLI tests verify end-to-end generation, output structure, and link integrity.
1 parent 1fcce76 commit 8cdcdd1

File tree

10 files changed

+1113
-64
lines changed

10 files changed

+1113
-64
lines changed
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
"""CLI entrypoint for schema code generation."""
2+
3+
import json
4+
import logging
5+
from pathlib import Path, PurePosixPath
6+
7+
import click
8+
9+
from overture.schema.core.discovery import discover_models
10+
11+
from .markdown_pipeline import generate_markdown_pages
12+
from .model_extraction import extract_model
13+
from .module_layout import (
14+
OUTPUT_ROOT,
15+
compute_schema_root,
16+
entry_point_class,
17+
entry_point_module,
18+
)
19+
from .specs import (
20+
FeatureSpec,
21+
is_model_class,
22+
is_union_alias,
23+
)
24+
from .union_extraction import extract_union
25+
26+
log = logging.getLogger(__name__)
27+
28+
__all__ = ["cli"]
29+
30+
_OUTPUT_FORMATS = ("markdown",)
31+
32+
_FEATURE_FRONTMATTER = "---\nsidebar_position: 1\n---\n\n"
33+
34+
35+
def _write_output(
36+
content: str,
37+
output_dir: Path | None,
38+
output_path: PurePosixPath,
39+
) -> None:
40+
"""Write content to a file under output_dir, or stdout."""
41+
if output_dir:
42+
file_path = output_dir / output_path
43+
file_path.parent.mkdir(parents=True, exist_ok=True)
44+
file_path.write_text(content)
45+
else:
46+
click.echo(content)
47+
click.echo() # separate entries with a blank line in stdout mode
48+
49+
50+
@click.group()
51+
def cli() -> None:
52+
"""Overture Schema code generator.
53+
54+
Generate documentation and code from Pydantic schema models.
55+
"""
56+
57+
58+
@cli.command("list")
59+
def list_models() -> None:
60+
"""List all discovered models."""
61+
models = discover_models()
62+
names = sorted(
63+
model.__name__ if isinstance(model, type) else str(model)
64+
for model in models.values()
65+
)
66+
for name in names:
67+
click.echo(name)
68+
69+
70+
@cli.command()
71+
@click.option(
72+
"--format",
73+
"output_format",
74+
required=True,
75+
type=click.Choice(_OUTPUT_FORMATS),
76+
help="Output format",
77+
)
78+
@click.option(
79+
"--theme",
80+
multiple=True,
81+
help="Filter to specific theme(s); repeatable (e.g., --theme buildings --theme places)",
82+
)
83+
@click.option(
84+
"--output-dir",
85+
type=click.Path(path_type=Path),
86+
default=None,
87+
help="Write output to directory (default: stdout)",
88+
)
89+
def generate(
90+
output_format: str,
91+
theme: tuple[str, ...],
92+
output_dir: Path | None,
93+
) -> None:
94+
"""Generate code/docs from discovered models."""
95+
all_models = discover_models()
96+
97+
# Schema root from ALL entry points (before theme filter).
98+
module_paths = [entry_point_module(k.entry_point) for k in all_models]
99+
schema_root = compute_schema_root(module_paths)
100+
101+
models = (
102+
{k: v for k, v in all_models.items() if k.theme in theme}
103+
if theme
104+
else all_models
105+
)
106+
107+
if output_dir:
108+
output_dir.mkdir(parents=True, exist_ok=True)
109+
110+
feature_specs: list[FeatureSpec] = []
111+
for key, entry in models.items():
112+
if is_model_class(entry):
113+
feature_specs.append(extract_model(entry, entry_point=key.entry_point))
114+
elif is_union_alias(entry):
115+
feature_specs.append(
116+
extract_union(
117+
entry_point_class(key.entry_point),
118+
entry,
119+
entry_point=key.entry_point,
120+
)
121+
)
122+
123+
_generate_markdown(feature_specs, schema_root, output_dir)
124+
125+
126+
def _generate_markdown(
127+
feature_specs: list[FeatureSpec],
128+
schema_root: str,
129+
output_dir: Path | None,
130+
) -> None:
131+
"""Generate markdown with directory layout and placement-aware links."""
132+
pages = generate_markdown_pages(feature_specs, schema_root)
133+
134+
for page in pages:
135+
content = (
136+
f"{_FEATURE_FRONTMATTER}{page.content}" if page.is_feature else page.content
137+
)
138+
_write_output(content, output_dir, page.path)
139+
140+
if output_dir:
141+
feature_paths = {page.path for page in pages if page.is_feature}
142+
all_paths = {page.path for page in pages}
143+
_write_category_files(output_dir, all_paths, feature_paths)
144+
145+
146+
def _ancestor_dirs(paths: set[PurePosixPath]) -> set[PurePosixPath]:
147+
"""Collect all ancestor directories for a set of file paths."""
148+
dirs: set[PurePosixPath] = set()
149+
for path in paths:
150+
parent = path.parent
151+
while parent != OUTPUT_ROOT:
152+
dirs.add(parent)
153+
parent = parent.parent
154+
return dirs
155+
156+
157+
def _top_level_positions(
158+
dirs: set[PurePosixPath],
159+
feature_paths: set[PurePosixPath],
160+
) -> dict[PurePosixPath, int]:
161+
"""Assign sidebar positions: feature dirs first, then non-feature, both alphabetical."""
162+
feature_dir_names = {p.parts[0] for p in feature_paths}
163+
top_level = sorted(d for d in dirs if d.parent == OUTPUT_ROOT)
164+
feature_dirs = [d for d in top_level if d.name in feature_dir_names]
165+
non_feature_dirs = [d for d in top_level if d.name not in feature_dir_names]
166+
return {d: i for i, d in enumerate(feature_dirs + non_feature_dirs, start=1)}
167+
168+
169+
def _write_category_files(
170+
output_dir: Path,
171+
all_paths: set[PurePosixPath],
172+
feature_paths: set[PurePosixPath],
173+
) -> None:
174+
"""Write _category_.json files for Docusaurus sidebar navigation."""
175+
dirs = _ancestor_dirs(all_paths)
176+
positions = _top_level_positions(dirs, feature_paths)
177+
178+
for dir_path in sorted(dirs):
179+
label = dir_path.name.replace("_", " ").title()
180+
category: dict[str, object] = {"label": label}
181+
if dir_path in positions:
182+
category["position"] = positions[dir_path]
183+
184+
file_path = output_dir / dir_path / "_category_.json"
185+
file_path.parent.mkdir(parents=True, exist_ok=True)
186+
file_path.write_text(json.dumps(category, indent=2) + "\n")
187+
188+
189+
def main() -> None:
190+
"""Run the CLI entry point."""
191+
cli()
192+
193+
194+
if __name__ == "__main__":
195+
main()

packages/overture-schema-codegen/src/overture/schema/codegen/link_computation.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def resolve_link_or_slug(self, name: str) -> str:
3131

3232

3333
def _is_normalized(path: PurePosixPath) -> bool:
34-
"""True when the path contains no '..' or '.' components (except root '.')."""
34+
"""Check whether the path contains no '..' or '.' components (except root '.')."""
3535
return ".." not in path.parts and path.parts.count(".") <= 1
3636

3737

@@ -41,8 +41,12 @@ def relative_link(source: PurePosixPath, target: PurePosixPath) -> str:
4141
Both paths must be normalized (no ``..`` components) and relative
4242
to the same output root.
4343
"""
44-
assert _is_normalized(source), f"Source path not normalized: {source}"
45-
assert _is_normalized(target), f"Target path not normalized: {target}"
44+
if not _is_normalized(source):
45+
msg = f"Source path not normalized: {source}"
46+
raise ValueError(msg)
47+
if not _is_normalized(target):
48+
msg = f"Target path not normalized: {target}"
49+
raise ValueError(msg)
4650
source_dir = source.parent
4751
# Count how many levels up from source_dir to common ancestor,
4852
# then descend to target. PurePosixPath doesn't have os.path.relpath,
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
"""Markdown generation pipeline: render pages without I/O.
2+
3+
Orchestrates tree expansion, type collection, placement, reverse
4+
references, and rendering into a list of RenderedPage objects. The
5+
caller decides what to do with them (write to disk, add frontmatter,
6+
stream to stdout, etc.).
7+
"""
8+
9+
from collections.abc import Sequence
10+
from dataclasses import dataclass
11+
from pathlib import PurePosixPath
12+
13+
import overture.schema.system.primitive as _system_primitive
14+
from overture.schema.system.primitive import GeometryType
15+
16+
from .example_loader import ExampleRecord, load_examples
17+
from .link_computation import LinkContext
18+
from .markdown_renderer import (
19+
render_enum,
20+
render_feature,
21+
render_geometry_from_values,
22+
render_newtype,
23+
render_primitives_from_specs,
24+
)
25+
from .model_extraction import expand_model_tree
26+
from .path_assignment import (
27+
GEOMETRY_PAGE,
28+
PRIMITIVES_PAGE,
29+
build_placement_registry,
30+
resolve_output_path,
31+
)
32+
from .primitive_extraction import (
33+
extract_primitives,
34+
partition_primitive_and_geometry_names,
35+
)
36+
from .reverse_references import UsedByEntry, compute_reverse_references
37+
from .specs import (
38+
EnumSpec,
39+
FeatureSpec,
40+
ModelSpec,
41+
NewTypeSpec,
42+
SupplementarySpec,
43+
UnionSpec,
44+
)
45+
from .type_collection import collect_all_supplementary_types
46+
47+
__all__ = ["RenderedPage", "generate_markdown_pages"]
48+
49+
50+
@dataclass(frozen=True, slots=True)
51+
class RenderedPage:
52+
"""A rendered page with its content and output path."""
53+
54+
content: str
55+
path: PurePosixPath
56+
is_feature: bool = False
57+
58+
59+
def _load_model_examples(
60+
spec: FeatureSpec,
61+
) -> list[ExampleRecord] | None:
62+
"""Load examples for a feature spec, returning None when absent."""
63+
if isinstance(spec, UnionSpec):
64+
pyproject_source = spec.members[0] if spec.members else None
65+
validation_type = spec.source_annotation
66+
model_fields = spec.common_base.model_fields
67+
else:
68+
pyproject_source = spec.source_type
69+
validation_type = spec.source_type
70+
model_fields = spec.source_type.model_fields if spec.source_type else {}
71+
if not pyproject_source:
72+
return None
73+
field_names = [f.name for f in spec.fields]
74+
examples = load_examples(
75+
validation_type,
76+
spec.name,
77+
field_names,
78+
pyproject_source=pyproject_source,
79+
model_fields=model_fields,
80+
)
81+
return examples or None
82+
83+
84+
def _render_supplement(
85+
name: str,
86+
spec: SupplementarySpec,
87+
registry: dict[str, PurePosixPath],
88+
reverse_refs: dict[str, list[UsedByEntry]],
89+
) -> RenderedPage:
90+
"""Render a single supplementary page (enum, NewType, or sub-model)."""
91+
output_path = resolve_output_path(name, registry)
92+
ctx = LinkContext(output_path, registry)
93+
used_by = reverse_refs.get(name)
94+
95+
if isinstance(spec, EnumSpec):
96+
content = render_enum(spec, link_ctx=ctx, used_by=used_by)
97+
elif isinstance(spec, NewTypeSpec):
98+
content = render_newtype(spec, ctx, used_by=used_by)
99+
elif isinstance(spec, ModelSpec):
100+
content = render_feature(spec, ctx, used_by=used_by)
101+
else:
102+
raise TypeError(f"Unhandled SupplementarySpec variant: {type(spec).__name__}")
103+
104+
return RenderedPage(content=content, path=output_path)
105+
106+
107+
def generate_markdown_pages(
108+
feature_specs: Sequence[FeatureSpec],
109+
schema_root: str,
110+
) -> list[RenderedPage]:
111+
"""Generate all markdown pages from feature specs.
112+
113+
Returns rendered pages without writing to disk. The caller handles
114+
I/O, frontmatter injection, and any output-format-specific concerns
115+
(like Docusaurus category files).
116+
"""
117+
cache: dict[type, ModelSpec] = {}
118+
for spec in feature_specs:
119+
expand_model_tree(spec, cache)
120+
121+
primitive_names, geometry_names = partition_primitive_and_geometry_names(
122+
_system_primitive
123+
)
124+
all_specs = collect_all_supplementary_types(feature_specs)
125+
registry = build_placement_registry(
126+
feature_specs, all_specs, primitive_names, geometry_names, schema_root
127+
)
128+
129+
reverse_refs = compute_reverse_references(feature_specs, all_specs)
130+
131+
pages: list[RenderedPage] = []
132+
133+
for spec in feature_specs:
134+
output_path = registry[spec.name]
135+
ctx = LinkContext(output_path, registry)
136+
examples = _load_model_examples(spec)
137+
used_by = reverse_refs.get(spec.name)
138+
content = render_feature(spec, link_ctx=ctx, examples=examples, used_by=used_by)
139+
pages.append(RenderedPage(content=content, path=output_path, is_feature=True))
140+
141+
for name, supp_spec in all_specs.items():
142+
pages.append(_render_supplement(name, supp_spec, registry, reverse_refs))
143+
144+
pages.append(
145+
RenderedPage(
146+
content=render_primitives_from_specs(
147+
extract_primitives(primitive_names, _system_primitive)
148+
),
149+
path=PRIMITIVES_PAGE,
150+
)
151+
)
152+
153+
pages.append(
154+
RenderedPage(
155+
content=render_geometry_from_values([m.value for m in GeometryType]),
156+
path=GEOMETRY_PAGE,
157+
)
158+
)
159+
160+
return pages

0 commit comments

Comments
 (0)