Skip to content

Commit 887948f

Browse files
Add __repr__ and _repr_html_ to builder and template (TGSAI#719)
* Add string representations to core builder classes for better usability and debugging * Add error message hint for special case issue * Add html representations for key classes * pre-commit * pre-commit * Remove specific edge case error message * Migrate html reprs to isolated source file * pre-commit * Safety, accessibility, and styling * Isolate template registry html repr * DRY principals * pre-commit * Use correct type annotation * Reduce repeated html * Cleanup code * update type annotations in HTML formatting utilities * validate unit type in `add_units` method of abstract dataset template * refactor HTML formatting utilities with centralized CSS styles and improved table-building logic * update import path for AbstractDatasetTemplate to use `base` module * handle `None` case in `_update_template_units` to return template directly * handle early return for `None` case in `_update_template_units` for clarity * add unit addition to template before early return in `_update_template_units` --------- Co-authored-by: Altay Sansal <[email protected]>
1 parent 12dfee9 commit 887948f

File tree

5 files changed

+418
-2
lines changed

5 files changed

+418
-2
lines changed

src/mdio/builder/dataset_builder.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Any
88

99
from mdio import __version__
10+
from mdio.builder.formatting_html import dataset_builder_repr_html
1011
from mdio.builder.schemas.compressors import ZFP
1112
from mdio.builder.schemas.compressors import Blosc
1213
from mdio.builder.schemas.dimension import NamedDimension
@@ -295,3 +296,21 @@ def build(self) -> Dataset:
295296
raise ValueError(msg)
296297

297298
return Dataset(variables=self._variables, metadata=self._metadata)
299+
300+
def __repr__(self) -> str:
301+
"""Return a string representation of the builder."""
302+
dim_names = [d.name for d in self._dimensions]
303+
coord_names = [c.name for c in self._coordinates]
304+
var_names = [v.name for v in self._variables]
305+
return (
306+
f"MDIODatasetBuilder("
307+
f"name={self._metadata.name!r}, "
308+
f"state={self._state.name}, "
309+
f"dimensions={dim_names}, "
310+
f"coordinates={coord_names}, "
311+
f"variables={var_names})"
312+
)
313+
314+
def _repr_html_(self) -> str:
315+
"""Return an HTML representation of the builder for Jupyter notebooks."""
316+
return dataset_builder_repr_html(self)
Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
"""HTML formatting utilities for MDIO builder classes."""
2+
3+
from __future__ import annotations
4+
5+
import html
6+
from dataclasses import dataclass
7+
from typing import TYPE_CHECKING
8+
9+
from mdio.builder.schemas.v1.units import AllUnitModel
10+
11+
if TYPE_CHECKING:
12+
from mdio.builder.dataset_builder import MDIODatasetBuilder
13+
from mdio.builder.template_registry import TemplateRegistry
14+
from mdio.builder.templates.base import AbstractDatasetTemplate
15+
16+
17+
@dataclass(frozen=True)
18+
class CSSStyles:
19+
"""Centralized CSS styles for HTML rendering."""
20+
21+
box: str = (
22+
"font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; "
23+
"border: 1px solid rgba(128, 128, 128, 0.3); border-radius: 6px; "
24+
"padding: 12px; max-width: 100%; box-sizing: border-box; "
25+
"background: rgba(255, 255, 255, 0.02);"
26+
)
27+
header: str = (
28+
"padding: 8px 12px; margin: -12px -12px 10px -12px; "
29+
"border-bottom: 1px solid rgba(128, 128, 128, 0.3); "
30+
"background: rgba(128, 128, 128, 0.05); border-radius: 6px 6px 0 0;"
31+
)
32+
td_base: str = (
33+
"padding: 6px 6px; border-bottom: 1px solid rgba(128, 128, 128, 0.2); "
34+
"font-family: 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', "
35+
"Consolas, 'Courier New', monospace; font-size: 12px; line-height: 1.3;"
36+
)
37+
td_left: str = f"{td_base} text-align: left;"
38+
td_center: str = f"{td_base} text-align: center;"
39+
td_right: str = f"{td_base} text-align: right;"
40+
summary_base: str = (
41+
"cursor: pointer; font-weight: 600; padding: 6px 8px; border-radius: 4px; transition: background-color 0.2s;"
42+
)
43+
summary_first: str = f"{summary_base} margin-bottom: 6px;"
44+
summary_subsequent: str = f"{summary_base} margin: 10px 0 6px 0;"
45+
46+
47+
class TableBuilder:
48+
"""Utility class for building HTML tables."""
49+
50+
def __init__(self, headers: list[tuple[str, str]], table_id: str):
51+
self.headers = headers
52+
self.table_id = table_id
53+
self.rows: list[str] = []
54+
55+
def add_row(self, *cells: object, aligns: list[str] | None = None, center_last: bool = False) -> None:
56+
"""Add a table row with specified cell values and alignments."""
57+
if not cells:
58+
return
59+
60+
cell_html_parts: list[str] = []
61+
css = CSSStyles()
62+
63+
if aligns:
64+
for i, cell in enumerate(cells):
65+
align = aligns[i] if i < len(aligns) else "left"
66+
style = css.td_left if align == "left" else css.td_center if align == "center" else css.td_right
67+
cell_html_parts.append(f' <td style="{style}">{html.escape(str(cell))}</td>')
68+
else:
69+
cell_html_parts.extend(
70+
[f' <td style="{css.td_left}">{html.escape(str(cell))}</td>' for cell in cells[:-1]]
71+
)
72+
last_style = css.td_center if center_last else css.td_left
73+
cell_html_parts.append(f' <td style="{last_style}">{html.escape(str(cells[-1]))}</td>')
74+
75+
self.rows.append("\n<tr>\n" + "\n".join(cell_html_parts) + "\n</tr>\n")
76+
77+
def add_empty_row(self, message: str) -> None:
78+
"""Add an empty row with a message spanning all columns."""
79+
self.rows.append(
80+
f"\n<tr>\n"
81+
f' <td colspan="{len(self.headers)}" style="padding: 8px; opacity: 0.5; text-align: left;">'
82+
f"{message}</td>\n"
83+
f"</tr>\n"
84+
)
85+
86+
def build(self) -> str:
87+
"""Build the complete HTML table."""
88+
header_html = "\n".join(
89+
f' <th style="{align}; padding: 6px; font-weight: 600;" '
90+
f'role="columnheader" scope="col">{name}</th>'
91+
for name, align in self.headers
92+
)
93+
header_section = (
94+
f" <thead>\n"
95+
f' <tr style="border-bottom: 1px solid rgba(128, 128, 128, 0.4);" '
96+
f'role="row">\n'
97+
f"{header_html}\n"
98+
f" </tr>\n"
99+
f" </thead>"
100+
)
101+
body_content = "".join(self.rows) if self.rows else self.add_empty_row("No data available")
102+
return (
103+
f"\n <style>\n"
104+
f" #{self.table_id} tbody tr:nth-child(odd) {{ "
105+
f"background: rgba(128, 128, 128, 0.03); }}\n"
106+
f" #{self.table_id} tbody tr:hover {{ "
107+
f"background: rgba(128, 128, 128, 0.06); }}\n"
108+
f" </style>\n"
109+
f' <table id="{self.table_id}" style="width: 100%; border-collapse: collapse;" '
110+
f'role="table" aria-labelledby="{self.table_id}">\n'
111+
f"{header_section}\n"
112+
f' <tbody role="rowgroup">\n'
113+
f" {body_content}\n"
114+
f" </tbody>\n"
115+
f" </table>\n"
116+
)
117+
118+
119+
def make_html_container(header_title: str, content: str, header_id: str = "header") -> str:
120+
"""Create an HTML container with a header."""
121+
css = CSSStyles()
122+
escaped_title = html.escape(str(header_title))
123+
return (
124+
f'\n <div style="{css.box}" role="region" aria-labelledby="{header_id}">\n'
125+
f' <header style="{css.header}" id="{header_id}">\n'
126+
f' <h3 style="font-size: 1.1em; margin: 0;">{escaped_title}</h3>\n'
127+
f" </header>\n"
128+
f" {content}\n"
129+
f" </div>\n"
130+
)
131+
132+
133+
def make_metadata_section(metadata_items: list[tuple[str, str]]) -> str:
134+
"""Create a metadata display section."""
135+
items_html = "\n".join(f" <strong>{key}:</strong> {value}<br>" for key, value in metadata_items)
136+
return f'<div style="margin-bottom: 10px;">\n{items_html}\n</div>'
137+
138+
139+
def make_details_section(
140+
summary_text: str,
141+
table_html: str,
142+
section_id: str,
143+
expanded: bool = True,
144+
is_first: bool = False,
145+
) -> str:
146+
"""Create a collapsible details section with a table."""
147+
css = CSSStyles()
148+
expanded_attr = 'open aria-expanded="true"' if expanded else 'aria-expanded="false"'
149+
style = css.summary_first if is_first else css.summary_subsequent
150+
summary_id = f"{section_id}-summary"
151+
table_id = f"{section_id}-table"
152+
return (
153+
f"\n <details {expanded_attr}>\n"
154+
f' <summary style="{style}" aria-controls="{table_id}" '
155+
f'id="{summary_id}">{summary_text}</summary>\n'
156+
f' <div style="margin-left: 12px;">\n'
157+
f" {table_html}\n"
158+
f" </div>\n"
159+
f" </details>\n"
160+
)
161+
162+
163+
def format_unit_for_display(unit_model: AllUnitModel | None) -> str:
164+
"""Return a human-friendly unit string from a unit model or enum."""
165+
if unit_model is None or not isinstance(unit_model, AllUnitModel):
166+
return "—"
167+
field_name = list(unit_model.__fields__)[0]
168+
return str(getattr(unit_model, field_name).value)
169+
170+
171+
def format_int_or_dash(value: object) -> str:
172+
"""Format integers with thousands separators; return an em dash for None/empty markers."""
173+
if isinstance(value, bool):
174+
return str(value)
175+
if isinstance(value, int):
176+
return f"{value:,}"
177+
if value in (None, "—", "Not set"):
178+
return "—"
179+
return str(value)
180+
181+
182+
def dataset_builder_repr_html(builder: MDIODatasetBuilder) -> str:
183+
"""Return an HTML representation of the builder for Jupyter notebooks."""
184+
# Build dimension table
185+
dim_table = TableBuilder(
186+
headers=[
187+
("Name", "text-align: left"),
188+
("Size", "text-align: left"),
189+
],
190+
table_id="builder-dimensions-summary",
191+
)
192+
for dim in builder._dimensions:
193+
dim_table.add_row(dim.name, dim.size)
194+
195+
# Build coordinates table
196+
coord_table = TableBuilder(
197+
headers=[
198+
("Name", "text-align: left"),
199+
("Dimensions", "text-align: left"),
200+
("Type", "text-align: left"),
201+
],
202+
table_id="builder-coordinates-summary",
203+
)
204+
for coord in builder._coordinates:
205+
coord_table.add_row(coord.name, ", ".join(d.name for d in coord.dimensions), coord.data_type)
206+
207+
# Build variables table
208+
var_table = TableBuilder(
209+
headers=[
210+
("Name", "text-align: left"),
211+
("Dimensions", "text-align: left"),
212+
("Type", "text-align: left"),
213+
],
214+
table_id="builder-variables-summary",
215+
)
216+
for var in builder._variables:
217+
var_table.add_row(var.name, ", ".join(d.name for d in var.dimensions), var.data_type)
218+
219+
# Metadata section
220+
created_str = builder._metadata.created_on.strftime("%Y-%m-%d %H:%M:%S UTC")
221+
metadata_items = [
222+
("Name", html.escape(str(builder._metadata.name))),
223+
("State", html.escape(str(builder._state.name))),
224+
("API Version", html.escape(str(builder._metadata.api_version))),
225+
("Created", html.escape(created_str)),
226+
]
227+
metadata_html = make_metadata_section(metadata_items)
228+
229+
# Details sections
230+
dimensions_section = make_details_section(
231+
f"Dimensions ({len(builder._dimensions)})",
232+
dim_table.build(),
233+
"builder-dimensions",
234+
is_first=True,
235+
)
236+
coordinates_section = make_details_section(
237+
f"Coordinates ({len(builder._coordinates)})",
238+
coord_table.build(),
239+
"builder-coordinates",
240+
)
241+
variables_section = make_details_section(
242+
f"Variables ({len(builder._variables)})",
243+
var_table.build(),
244+
"builder-variables",
245+
)
246+
247+
content = metadata_html + dimensions_section + coordinates_section + variables_section
248+
return make_html_container("MDIODatasetBuilder", content, "builder-header")
249+
250+
251+
def template_repr_html(template: AbstractDatasetTemplate) -> str:
252+
"""Return an HTML representation of the template for Jupyter notebooks."""
253+
# Build dimension table
254+
dim_table = TableBuilder(
255+
headers=[
256+
("Name", "text-align: left"),
257+
("Size", "text-align: right"),
258+
("Chunk Sizes", "text-align: right"),
259+
("Units", "text-align: right"),
260+
("Spatial", "text-align: center"),
261+
],
262+
table_id="dimensions-summary",
263+
)
264+
for i, name in enumerate(template.dimension_names):
265+
size_val = template._dim_sizes[i]
266+
chunk_val = template.full_chunk_size[i]
267+
unit_str = format_unit_for_display(template.get_unit_by_key(name))
268+
is_spatial = "✓" if name in template.spatial_dimension_names else ""
269+
dim_table.add_row(
270+
name,
271+
format_int_or_dash(size_val),
272+
format_int_or_dash(chunk_val),
273+
unit_str,
274+
is_spatial,
275+
aligns=["left", "right", "right", "right", "center"],
276+
)
277+
278+
# Build coordinates table
279+
all_coords = template.coordinate_names
280+
coord_table = TableBuilder(
281+
headers=[
282+
("Name", "text-align: left"),
283+
("Type", "text-align: left"),
284+
("Units", "text-align: left"),
285+
],
286+
table_id="coordinates-summary",
287+
)
288+
for coord in all_coords:
289+
coord_table.add_row(
290+
coord,
291+
"Physical" if coord in template.physical_coordinate_names else "Logical",
292+
format_unit_for_display(template.get_unit_by_key(coord)),
293+
)
294+
295+
# Metadata section
296+
default_variable_name = getattr(template, "_default_variable_name", "")
297+
default_var_units = format_unit_for_display(template.get_unit_by_key(default_variable_name))
298+
metadata_items = [
299+
("Template Name", str(template.name)),
300+
("Data Domain", template._data_domain),
301+
("Default Variable", template.default_variable_name),
302+
("Default Variable Units", default_var_units),
303+
]
304+
metadata_html = make_metadata_section(metadata_items)
305+
306+
# Details sections
307+
dimensions_section = make_details_section(
308+
f"Dimensions ({len(template.dimension_names)})",
309+
dim_table.build(),
310+
"dimensions",
311+
is_first=True,
312+
)
313+
coordinates_section = make_details_section(
314+
f"Coordinates ({len(all_coords)})",
315+
coord_table.build(),
316+
"coordinates",
317+
)
318+
319+
content = metadata_html + dimensions_section + coordinates_section
320+
return make_html_container(template.__class__.__name__, content, "template-header")
321+
322+
323+
def template_registry_repr_html(registry: TemplateRegistry) -> str:
324+
"""Return an HTML representation of the template registry for Jupyter notebooks."""
325+
registered_templates = registry.list_all_templates()
326+
n_templates = len(registered_templates)
327+
328+
# Build template table with count next to column header
329+
table = TableBuilder(
330+
headers=[
331+
(
332+
f'Template <span style="opacity: 0.6; font-weight: 500;">({n_templates})</span>',
333+
"text-align: left",
334+
),
335+
("Default Var", "text-align: center"),
336+
("Dimensions", "text-align: left"),
337+
("Chunk Sizes", "text-align: left"),
338+
("Coords", "text-align: left"),
339+
],
340+
table_id="registry-header",
341+
)
342+
343+
for name in sorted(registered_templates):
344+
template = registry.get(name)
345+
default_var = template._default_variable_name
346+
dim_names_str = ", ".join(template.dimension_names)
347+
coords_names_str = ", ".join(template.coordinate_names)
348+
chunk_str = "×".join(str(cs) for cs in template.full_chunk_size)
349+
table.add_row(
350+
name,
351+
default_var,
352+
dim_names_str,
353+
chunk_str,
354+
coords_names_str,
355+
aligns=["left", "center", "left", "left", "left"],
356+
)
357+
358+
content = table.build()
359+
return make_html_container("TemplateRegistry", content, "registry-header")

0 commit comments

Comments
 (0)