Skip to content

Commit 69dc977

Browse files
committed
feat(codegen): generate pages and links for Pydantic built-in types
Pydantic types like HttpUrl and EmailStr appear in field annotations but previously rendered as unlinked inline code. Each referenced Pydantic type now gets its own page under pydantic/<module>/ with a description, upstream Pydantic docs link, and Used By section. Discovery is reference-driven: the type collection visitor detects PRIMITIVE-kind types from pydantic modules in expanded feature trees. PydanticTypeSpec joins the SupplementarySpec union and flows through placement, reverse references, and rendering. Linking is registry-driven for all PRIMITIVE-kind types. Any primitive with a page in the placement registry gets linked, whether it's a Pydantic type (individual page) or a registered numeric primitive (aggregate page). This also links int32/float64 to the primitives page, which they weren't before. Shared is_pydantic_sourced() predicate gates collection and reverse reference tracking to pydantic-origin types without restricting the linking mechanism.
1 parent 216771f commit 69dc977

17 files changed

+435
-22
lines changed

packages/overture-schema-codegen/src/overture/schema/codegen/markdown_pipeline.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
render_geometry_from_values,
2222
render_newtype,
2323
render_primitives_from_specs,
24+
render_pydantic_type,
2425
)
2526
from .model_extraction import expand_model_tree
2627
from .path_assignment import (
@@ -39,6 +40,7 @@
3940
FeatureSpec,
4041
ModelSpec,
4142
NewTypeSpec,
43+
PydanticTypeSpec,
4244
SupplementarySpec,
4345
TypeIdentity,
4446
UnionSpec,
@@ -88,7 +90,7 @@ def _render_supplement(
8890
registry: dict[TypeIdentity, PurePosixPath],
8991
reverse_refs: dict[TypeIdentity, list[UsedByEntry]],
9092
) -> RenderedPage:
91-
"""Render a single supplementary page (enum, NewType, or sub-model)."""
93+
"""Render a single supplementary type page."""
9294
output_path = resolve_output_path(tid, registry)
9395
ctx = LinkContext(output_path, registry)
9496
used_by = reverse_refs.get(tid)
@@ -99,6 +101,8 @@ def _render_supplement(
99101
content = render_newtype(spec, ctx, used_by=used_by)
100102
elif isinstance(spec, ModelSpec):
101103
content = render_feature(spec, ctx, used_by=used_by)
104+
elif isinstance(spec, PydanticTypeSpec):
105+
content = render_pydantic_type(spec, link_ctx=ctx, used_by=used_by)
102106
else:
103107
raise TypeError(f"Unhandled SupplementarySpec variant: {type(spec).__name__}")
104108

packages/overture-schema-codegen/src/overture/schema/codegen/markdown_renderer.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
ModelSpec,
3030
NewTypeSpec,
3131
PrimitiveSpec,
32+
PydanticTypeSpec,
3233
TypeIdentity,
3334
UnionSpec,
3435
)
@@ -42,6 +43,7 @@
4243
"render_geometry_from_values",
4344
"render_newtype",
4445
"render_primitives_from_specs",
46+
"render_pydantic_type",
4547
]
4648

4749

@@ -496,6 +498,19 @@ def render_newtype(
496498
)
497499

498500

501+
def render_pydantic_type(
502+
spec: PydanticTypeSpec,
503+
link_ctx: LinkContext | None = None,
504+
used_by: list[UsedByEntry] | None = None,
505+
) -> str:
506+
"""Render a PydanticTypeSpec as Markdown documentation."""
507+
template = _get_jinja_env().get_template("pydantic_type.md.jinja2")
508+
return template.render(
509+
pydantic_type=spec,
510+
used_by=_build_used_by_context(used_by, link_ctx),
511+
)
512+
513+
499514
# Matches the ge/le bounds of the int64 NewType in overture.schema.system.primitive.
500515
_INT64_MIN = -(2**63)
501516
_INT64_MAX = 2**63 - 1

packages/overture-schema-codegen/src/overture/schema/codegen/markdown_type_format.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,26 @@ def _linked_type_identity(ti: TypeInfo) -> TypeIdentity | None:
6262
return None
6363

6464

65+
def _try_primitive_link(
66+
ti: TypeInfo, display_name: str, ctx: LinkContext | None
67+
) -> str | None:
68+
"""Try to link a PRIMITIVE type to its page via registry lookup.
69+
70+
Registered primitives (int32, Geometry) and Pydantic types (HttpUrl)
71+
can have pages in the registry. Uses the type registry display name
72+
(e.g. ``geometry`` not ``Geometry``) for the link text.
73+
"""
74+
if ti.kind != TypeKind.PRIMITIVE or not ctx:
75+
return None
76+
candidate = ti.newtype_ref or ti.source_type
77+
if candidate is None:
78+
return None
79+
href = ctx.resolve_link(TypeIdentity(candidate, display_name))
80+
if href:
81+
return _code_link(display_name, href)
82+
return None
83+
84+
6585
def _markdown_type_name(ti: TypeInfo) -> str:
6686
"""Return the markdown display name for a type.
6787
@@ -129,7 +149,12 @@ def format_type(
129149
display = _wrap_list_n(display, ti.list_depth)
130150
else:
131151
base = resolve_type_name(ti, "markdown")
132-
if ti.is_list:
152+
link = _try_primitive_link(ti, base, ctx)
153+
if link and ti.is_list:
154+
display = _wrap_list_n(link, ti.list_depth)
155+
elif link:
156+
display = link
157+
elif ti.is_list:
133158
display = _plain_list_type(base, ti.list_depth)
134159
else:
135160
display = f"`{base}`"

packages/overture-schema-codegen/src/overture/schema/codegen/path_assignment.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from .case_conversion import slug_filename
1111
from .module_layout import compute_output_dir, output_dir_for_entry_point
12-
from .specs import FeatureSpec, SupplementarySpec, TypeIdentity
12+
from .specs import FeatureSpec, PydanticTypeSpec, SupplementarySpec, TypeIdentity
1313

1414
__all__ = [
1515
"GEOMETRY_PAGE",
@@ -48,6 +48,13 @@ def build_placement_registry(
4848
for tid, supp_spec in all_specs.items():
4949
if tid in registry:
5050
continue
51+
if isinstance(supp_spec, PydanticTypeSpec):
52+
registry[tid] = (
53+
PurePosixPath("pydantic")
54+
/ supp_spec.source_module
55+
/ slug_filename(tid.name)
56+
)
57+
continue
5158
source_module = getattr(supp_spec.source_type, "__module__", None)
5259
if source_module is None:
5360
continue
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""Pydantic built-in type extraction."""
2+
3+
import re
4+
5+
from .docstring import first_docstring_line
6+
from .specs import PydanticTypeSpec
7+
8+
__all__ = ["extract_pydantic_type"]
9+
10+
# Matches bare admonition labels like "Info:" or "Note:" with no following text.
11+
_ADMONITION_LABEL = re.compile(r"^\w+:\s*$")
12+
13+
14+
def _usable_description(doc: str | None) -> str | None:
15+
"""Return the first docstring line, or None if it's an admonition label."""
16+
line = first_docstring_line(doc)
17+
if line is None or _ADMONITION_LABEL.match(line):
18+
return None
19+
return line
20+
21+
22+
def extract_pydantic_type(cls: type) -> PydanticTypeSpec:
23+
"""Extract a PydanticTypeSpec from a Pydantic built-in type class."""
24+
module = getattr(cls, "__module__", "")
25+
if not module.startswith("pydantic"):
26+
msg = f"Expected a pydantic type, got {cls!r} from {module!r}"
27+
raise ValueError(msg)
28+
return PydanticTypeSpec(
29+
name=cls.__name__,
30+
description=_usable_description(cls.__doc__),
31+
source_type=cls,
32+
source_module=cls.__module__.removeprefix("pydantic."),
33+
)

packages/overture-schema-codegen/src/overture/schema/codegen/reverse_references.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
SupplementarySpec,
1515
TypeIdentity,
1616
UnionSpec,
17+
is_pydantic_type,
1718
)
1819
from .type_analyzer import TypeInfo, TypeKind, walk_type_info
1920

@@ -94,6 +95,11 @@ def _visit(node: TypeInfo) -> None:
9495
referrer_kind,
9596
)
9697

98+
if is_pydantic_type(node):
99+
add_reference(
100+
TypeIdentity.of(node.source_type), referrer, referrer_kind
101+
)
102+
97103
if node.union_members is not None:
98104
for member_cls in node.union_members:
99105
add_reference(

packages/overture-schema-codegen/src/overture/schema/codegen/specs.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,13 @@
2222
"ModelSpec",
2323
"NewTypeSpec",
2424
"PrimitiveSpec",
25+
"PydanticTypeSpec",
2526
"SupplementarySpec",
2627
"TypeIdentity",
2728
"filter_model_classes",
2829
"is_model_class",
30+
"is_pydantic_sourced",
31+
"is_pydantic_type",
2932
"is_union_alias",
3033
]
3134

@@ -59,8 +62,8 @@ def __hash__(self) -> int:
5962
class _SourceTypeIdentityMixin:
6063
"""Mixin providing ``identity`` from ``source_type`` and ``name``.
6164
62-
Shared by EnumSpec, ModelSpec, and NewTypeSpec -- each has a
63-
``source_type`` (the Python class/callable) and a ``name``.
65+
Shared by EnumSpec, ModelSpec, NewTypeSpec, and PydanticTypeSpec --
66+
each has a ``source_type`` (the Python class/callable) and a ``name``.
6467
UnionSpec uses ``source_annotation`` instead, so it defines its
6568
own ``identity``.
6669
"""
@@ -190,14 +193,46 @@ class PrimitiveSpec:
190193
float_bits: int | None = None
191194

192195

193-
SupplementarySpec = EnumSpec | NewTypeSpec | ModelSpec
196+
@dataclass
197+
class PydanticTypeSpec(_SourceTypeIdentityMixin):
198+
"""Specification for a Pydantic built-in type (HttpUrl, EmailStr, etc.)."""
199+
200+
name: str
201+
description: str | None
202+
source_type: type
203+
source_module: str
204+
205+
@property
206+
def docs_url(self) -> str:
207+
"""Pydantic documentation URL for this type."""
208+
return (
209+
f"https://docs.pydantic.dev/latest/api/{self.source_module}"
210+
f"/#pydantic.{self.source_module}.{self.name}"
211+
)
212+
213+
214+
SupplementarySpec = EnumSpec | NewTypeSpec | ModelSpec | PydanticTypeSpec
194215
"""Non-feature types referenced by feature models.
195216
196217
Excludes PrimitiveSpec and geometry types, which are extracted
197218
separately via dedicated functions.
198219
"""
199220

200221

222+
def is_pydantic_sourced(source_type: type | None) -> bool:
223+
"""Check whether *source_type* originates from the ``pydantic`` package."""
224+
return getattr(source_type, "__module__", "").startswith("pydantic")
225+
226+
227+
def is_pydantic_type(ti: TypeInfo) -> bool:
228+
"""Check whether a TypeInfo represents a Pydantic built-in type."""
229+
return (
230+
ti.kind == TypeKind.PRIMITIVE
231+
and ti.source_type is not None
232+
and is_pydantic_sourced(ti.source_type)
233+
)
234+
235+
201236
def is_model_class(obj: object) -> TypeGuard[type[BaseModel]]:
202237
"""Check whether *obj* is a concrete BaseModel subclass (not a type alias)."""
203238
return isinstance(obj, type) and issubclass(obj, BaseModel)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# {{ pydantic_type.name }}
2+
{% if pydantic_type.description %}
3+
4+
{{ pydantic_type.description | linkify_urls }}
5+
{% endif %}
6+
7+
See: [Pydantic docs]({{ pydantic_type.docs_url }})
8+
{% include '_used_by.md.jinja2' %}

packages/overture-schema-codegen/src/overture/schema/codegen/type_collection.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,15 @@
1010
from .enum_extraction import extract_enum
1111
from .model_extraction import extract_model
1212
from .newtype_extraction import extract_newtype
13-
from .specs import FeatureSpec, FieldSpec, ModelSpec, SupplementarySpec, TypeIdentity
13+
from .pydantic_extraction import extract_pydantic_type
14+
from .specs import (
15+
FeatureSpec,
16+
FieldSpec,
17+
ModelSpec,
18+
SupplementarySpec,
19+
TypeIdentity,
20+
is_pydantic_type,
21+
)
1422
from .type_analyzer import TypeInfo, TypeKind, analyze_type, is_newtype, walk_type_info
1523
from .type_registry import is_semantic_newtype
1624

@@ -106,6 +114,12 @@ def _visit(node: TypeInfo) -> None:
106114
if newly_registered:
107115
_collect_inner_newtypes(node.newtype_ref)
108116

117+
if is_pydantic_type(node):
118+
assert node.source_type is not None # guaranteed by is_pydantic_type
119+
pid = TypeIdentity.of(node.source_type)
120+
if pid not in all_specs:
121+
all_specs[pid] = extract_pydantic_type(node.source_type)
122+
109123
walk_type_info(ti, _visit)
110124

111125
def _collect_from_fields(fields: list[FieldSpec]) -> None:

packages/overture-schema-codegen/tests/codegen_test_support.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import pytest
1515
from overture.schema.codegen.model_extraction import extract_model
16+
from overture.schema.codegen.pydantic_extraction import extract_pydantic_type
1617
from overture.schema.codegen.specs import (
1718
AnnotatedField,
1819
EnumMemberSpec,
@@ -37,7 +38,7 @@
3738
)
3839
from overture.schema.system.ref import Id, Identified, Reference, Relationship
3940
from overture.schema.system.string import HexColor, LanguageTag, StrippedString
40-
from pydantic import BaseModel, Field
41+
from pydantic import BaseModel, EmailStr, Field, HttpUrl
4142

4243
STR_TYPE = TypeInfo(base_type="str", kind=TypeKind.PRIMITIVE)
4344

@@ -196,6 +197,17 @@ class FeatureWithDict(
196197
metadata: dict[str, int] = Field(description="Numeric metadata")
197198

198199

200+
class FeatureWithUrl(FeatureBase[Literal["test"], Literal["linked"]]):
201+
"""A feature with Pydantic URL and email fields."""
202+
203+
website: HttpUrl | None = None
204+
emails: list[EmailStr] | None = None
205+
206+
207+
HTTP_URL_SPEC = extract_pydantic_type(HttpUrl)
208+
EMAIL_STR_SPEC = extract_pydantic_type(EmailStr)
209+
210+
199211
class SegmentBase(BaseModel):
200212
"""Common base for test segments."""
201213

0 commit comments

Comments
 (0)