Skip to content

Commit 4d1087e

Browse files
committed
Begin implementing a builder pattern for Datasets
1 parent 3b8dca5 commit 4d1087e

File tree

2 files changed

+520
-0
lines changed

2 files changed

+520
-0
lines changed
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
"""Builder pattern implementation for MDIO v1 schema models."""
2+
3+
from datetime import datetime, timezone
4+
from typing import Any, Optional, List, Dict, Union
5+
from enum import Enum, auto
6+
7+
from pydantic import AwareDatetime
8+
9+
from mdio.schema.dimension import NamedDimension
10+
from mdio.schema.compressors import Blosc, ZFP
11+
from mdio.schema.dtype import ScalarType, StructuredType
12+
from mdio.schema.metadata import UserAttributes
13+
from mdio.schema.v1.units import AllUnits
14+
from mdio.schema.v1.dataset import Dataset, DatasetMetadata
15+
from mdio.schema.v1.variable import Variable, Coordinate, VariableMetadata
16+
from mdio.schema.v1.template_factory import (
17+
make_named_dimension,
18+
make_coordinate,
19+
make_variable,
20+
make_dataset_metadata,
21+
make_dataset,
22+
)
23+
24+
25+
class _BuilderState(Enum):
26+
"""States for the template builder."""
27+
INITIAL = auto()
28+
HAS_DIMENSIONS = auto()
29+
HAS_COORDINATES = auto()
30+
HAS_VARIABLES = auto()
31+
32+
class TemplateBuilder:
33+
"""Builder for creating MDIO datasets with enforced build order:
34+
1. Must add dimensions first via add_dimension()
35+
2. Can optionally add coordinates via add_coordinate()
36+
3. Must add variables via add_variable()
37+
4. Must call build() to create the dataset
38+
"""
39+
def __init__(self, name: str, attributes: Optional[Dict[str, Any]] = None):
40+
self.name = name
41+
self.api_version = "1.0.0" # TODO: Pull from package metadata
42+
self.created_on = datetime.now(timezone.utc)
43+
self.attributes = attributes
44+
self._dimensions: List[NamedDimension] = []
45+
self._coordinates: List[Coordinate] = []
46+
self._variables: List[Variable] = []
47+
self._state = _BuilderState.INITIAL
48+
self._unnamed_variable_counter = 0
49+
50+
def add_dimension(self,
51+
name: str,
52+
size: int,
53+
long_name: str = None,
54+
data_type: ScalarType | StructuredType = ScalarType.INT32,
55+
metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None) -> 'TemplateBuilder':
56+
"""Add a dimension. This must be called at least once before adding coordinates or variables.
57+
58+
Args:
59+
name: Name of the dimension
60+
size: Size of the dimension
61+
long_name: Optional long name for the dimension variable
62+
data_type: Data type for the dimension variable (defaults to INT32)
63+
metadata: Optional metadata for the dimension variable
64+
"""
65+
# Create the dimension
66+
dimension = make_named_dimension(name, size)
67+
self._dimensions.append(dimension)
68+
69+
# Create a variable for the dimension
70+
dim_var = make_variable(
71+
name=name,
72+
long_name=long_name,
73+
dimensions=[dimension],
74+
data_type=data_type,
75+
metadata=metadata
76+
)
77+
self._variables.append(dim_var)
78+
79+
self._state = _BuilderState.HAS_DIMENSIONS
80+
return self
81+
82+
def add_coordinate(self,
83+
name: str = "",
84+
*,
85+
long_name: str = None,
86+
dimensions: List[NamedDimension | str] = [],
87+
data_type: ScalarType | StructuredType = ScalarType.FLOAT32,
88+
metadata: Optional[List[AllUnits | UserAttributes]] | Dict[str, Any] = None) -> 'TemplateBuilder':
89+
"""Add a coordinate after adding at least one dimension."""
90+
if self._state == _BuilderState.INITIAL:
91+
raise ValueError("Must add at least one dimension before adding coordinates")
92+
93+
if name == "":
94+
name = f"coord_{len(self._coordinates)}"
95+
if dimensions == []:
96+
dimensions = self._dimensions
97+
if isinstance(metadata, dict):
98+
metadata = [metadata]
99+
100+
# Convert string dimension names to NamedDimension objects
101+
dim_objects = []
102+
for dim in dimensions:
103+
if isinstance(dim, str):
104+
dim_obj = next((d for d in self._dimensions if d.name == dim), None)
105+
if dim_obj is None:
106+
raise ValueError(f"Dimension '{dim}' not found")
107+
dim_objects.append(dim_obj)
108+
else:
109+
dim_objects.append(dim)
110+
111+
self._coordinates.append(make_coordinate(
112+
name=name,
113+
long_name=long_name,
114+
dimensions=dim_objects,
115+
data_type=data_type,
116+
metadata=metadata
117+
))
118+
self._state = _BuilderState.HAS_COORDINATES
119+
return self
120+
121+
def add_variable(self,
122+
name: str = "",
123+
*,
124+
long_name: str = None,
125+
dimensions: List[NamedDimension | str] = [],
126+
data_type: ScalarType | StructuredType = ScalarType.FLOAT32,
127+
compressor: Blosc | ZFP | None = None,
128+
coordinates: Optional[List[Coordinate | str]] = None,
129+
metadata: Optional[VariableMetadata] = None) -> 'TemplateBuilder':
130+
"""Add a variable after adding at least one dimension."""
131+
if self._state == _BuilderState.INITIAL:
132+
raise ValueError("Must add at least one dimension before adding variables")
133+
134+
if name == "":
135+
name = f"var_{self._unnamed_variable_counter}"
136+
self._unnamed_variable_counter += 1
137+
if dimensions == []:
138+
dimensions = self._dimensions
139+
140+
# Convert string dimension names to NamedDimension objects
141+
dim_objects = []
142+
for dim in dimensions:
143+
if isinstance(dim, str):
144+
dim_obj = next((d for d in self._dimensions if d.name == dim), None)
145+
if dim_obj is None:
146+
raise ValueError(f"Dimension '{dim}' not found")
147+
dim_objects.append(dim_obj)
148+
else:
149+
dim_objects.append(dim)
150+
151+
self._variables.append(make_variable(
152+
name=name,
153+
long_name=long_name,
154+
dimensions=dim_objects,
155+
data_type=data_type,
156+
compressor=compressor,
157+
coordinates=coordinates,
158+
metadata=metadata
159+
))
160+
self._state = _BuilderState.HAS_VARIABLES
161+
return self
162+
163+
def build(self) -> Dataset:
164+
"""Build the final dataset."""
165+
if self._state == _BuilderState.INITIAL:
166+
raise ValueError("Must add at least one dimension before building")
167+
168+
metadata = make_dataset_metadata(
169+
self.name,
170+
self.api_version,
171+
self.created_on,
172+
self.attributes
173+
)
174+
175+
# Add coordinates as variables to the dataset
176+
# We make a copy so that coordinates are not duplicated if the builder is reused
177+
all_variables = self._variables.copy()
178+
for coord in self._coordinates:
179+
# Convert coordinate to variable
180+
coord_var = make_variable(
181+
name=coord.name,
182+
long_name=coord.long_name,
183+
dimensions=coord.dimensions,
184+
data_type=coord.data_type,
185+
metadata=coord.metadata
186+
)
187+
all_variables.append(coord_var)
188+
189+
return make_dataset(all_variables, metadata)

0 commit comments

Comments
 (0)