1+ """Builder pattern implementation for MDIO v1 schema models."""
2+
3+ from datetime import datetime , timezone
4+ from typing import Any , Optional , List , Dict , Union
5+ from enum import Enum , auto
6+
7+ from pydantic import AwareDatetime
8+
9+ from mdio .schema .dimension import NamedDimension
10+ from mdio .schema .compressors import Blosc , ZFP
11+ from mdio .schema .dtype import ScalarType , StructuredType
12+ from mdio .schema .metadata import UserAttributes
13+ from mdio .schema .v1 .units import AllUnits
14+ from mdio .schema .v1 .dataset import Dataset , DatasetMetadata
15+ from mdio .schema .v1 .variable import Variable , Coordinate , VariableMetadata
16+ from mdio .schema .v1 .template_factory import (
17+ make_named_dimension ,
18+ make_coordinate ,
19+ make_variable ,
20+ make_dataset_metadata ,
21+ make_dataset ,
22+ )
23+
24+
25+ class _BuilderState (Enum ):
26+ """States for the template builder."""
27+ INITIAL = auto ()
28+ HAS_DIMENSIONS = auto ()
29+ HAS_COORDINATES = auto ()
30+ HAS_VARIABLES = auto ()
31+
32+ class TemplateBuilder :
33+ """Builder for creating MDIO datasets with enforced build order:
34+ 1. Must add dimensions first via add_dimension()
35+ 2. Can optionally add coordinates via add_coordinate()
36+ 3. Must add variables via add_variable()
37+ 4. Must call build() to create the dataset
38+ """
39+ def __init__ (self , name : str , attributes : Optional [Dict [str , Any ]] = None ):
40+ self .name = name
41+ self .api_version = "1.0.0" # TODO: Pull from package metadata
42+ self .created_on = datetime .now (timezone .utc )
43+ self .attributes = attributes
44+ self ._dimensions : List [NamedDimension ] = []
45+ self ._coordinates : List [Coordinate ] = []
46+ self ._variables : List [Variable ] = []
47+ self ._state = _BuilderState .INITIAL
48+ self ._unnamed_variable_counter = 0
49+
50+ def add_dimension (self ,
51+ name : str ,
52+ size : int ,
53+ long_name : str = None ,
54+ data_type : ScalarType | StructuredType = ScalarType .INT32 ,
55+ metadata : Optional [List [AllUnits | UserAttributes ]] | Dict [str , Any ] = None ) -> 'TemplateBuilder' :
56+ """Add a dimension. This must be called at least once before adding coordinates or variables.
57+
58+ Args:
59+ name: Name of the dimension
60+ size: Size of the dimension
61+ long_name: Optional long name for the dimension variable
62+ data_type: Data type for the dimension variable (defaults to INT32)
63+ metadata: Optional metadata for the dimension variable
64+ """
65+ # Create the dimension
66+ dimension = make_named_dimension (name , size )
67+ self ._dimensions .append (dimension )
68+
69+ # Create a variable for the dimension
70+ dim_var = make_variable (
71+ name = name ,
72+ long_name = long_name ,
73+ dimensions = [dimension ],
74+ data_type = data_type ,
75+ metadata = metadata
76+ )
77+ self ._variables .append (dim_var )
78+
79+ self ._state = _BuilderState .HAS_DIMENSIONS
80+ return self
81+
82+ def add_coordinate (self ,
83+ name : str = "" ,
84+ * ,
85+ long_name : str = None ,
86+ dimensions : List [NamedDimension | str ] = [],
87+ data_type : ScalarType | StructuredType = ScalarType .FLOAT32 ,
88+ metadata : Optional [List [AllUnits | UserAttributes ]] | Dict [str , Any ] = None ) -> 'TemplateBuilder' :
89+ """Add a coordinate after adding at least one dimension."""
90+ if self ._state == _BuilderState .INITIAL :
91+ raise ValueError ("Must add at least one dimension before adding coordinates" )
92+
93+ if name == "" :
94+ name = f"coord_{ len (self ._coordinates )} "
95+ if dimensions == []:
96+ dimensions = self ._dimensions
97+ if isinstance (metadata , dict ):
98+ metadata = [metadata ]
99+
100+ # Convert string dimension names to NamedDimension objects
101+ dim_objects = []
102+ for dim in dimensions :
103+ if isinstance (dim , str ):
104+ dim_obj = next ((d for d in self ._dimensions if d .name == dim ), None )
105+ if dim_obj is None :
106+ raise ValueError (f"Dimension '{ dim } ' not found" )
107+ dim_objects .append (dim_obj )
108+ else :
109+ dim_objects .append (dim )
110+
111+ self ._coordinates .append (make_coordinate (
112+ name = name ,
113+ long_name = long_name ,
114+ dimensions = dim_objects ,
115+ data_type = data_type ,
116+ metadata = metadata
117+ ))
118+ self ._state = _BuilderState .HAS_COORDINATES
119+ return self
120+
121+ def add_variable (self ,
122+ name : str = "" ,
123+ * ,
124+ long_name : str = None ,
125+ dimensions : List [NamedDimension | str ] = [],
126+ data_type : ScalarType | StructuredType = ScalarType .FLOAT32 ,
127+ compressor : Blosc | ZFP | None = None ,
128+ coordinates : Optional [List [Coordinate | str ]] = None ,
129+ metadata : Optional [VariableMetadata ] = None ) -> 'TemplateBuilder' :
130+ """Add a variable after adding at least one dimension."""
131+ if self ._state == _BuilderState .INITIAL :
132+ raise ValueError ("Must add at least one dimension before adding variables" )
133+
134+ if name == "" :
135+ name = f"var_{ self ._unnamed_variable_counter } "
136+ self ._unnamed_variable_counter += 1
137+ if dimensions == []:
138+ dimensions = self ._dimensions
139+
140+ # Convert string dimension names to NamedDimension objects
141+ dim_objects = []
142+ for dim in dimensions :
143+ if isinstance (dim , str ):
144+ dim_obj = next ((d for d in self ._dimensions if d .name == dim ), None )
145+ if dim_obj is None :
146+ raise ValueError (f"Dimension '{ dim } ' not found" )
147+ dim_objects .append (dim_obj )
148+ else :
149+ dim_objects .append (dim )
150+
151+ self ._variables .append (make_variable (
152+ name = name ,
153+ long_name = long_name ,
154+ dimensions = dim_objects ,
155+ data_type = data_type ,
156+ compressor = compressor ,
157+ coordinates = coordinates ,
158+ metadata = metadata
159+ ))
160+ self ._state = _BuilderState .HAS_VARIABLES
161+ return self
162+
163+ def build (self ) -> Dataset :
164+ """Build the final dataset."""
165+ if self ._state == _BuilderState .INITIAL :
166+ raise ValueError ("Must add at least one dimension before building" )
167+
168+ metadata = make_dataset_metadata (
169+ self .name ,
170+ self .api_version ,
171+ self .created_on ,
172+ self .attributes
173+ )
174+
175+ # Add coordinates as variables to the dataset
176+ # We make a copy so that coordinates are not duplicated if the builder is reused
177+ all_variables = self ._variables .copy ()
178+ for coord in self ._coordinates :
179+ # Convert coordinate to variable
180+ coord_var = make_variable (
181+ name = coord .name ,
182+ long_name = coord .long_name ,
183+ dimensions = coord .dimensions ,
184+ data_type = coord .data_type ,
185+ metadata = coord .metadata
186+ )
187+ all_variables .append (coord_var )
188+
189+ return make_dataset (all_variables , metadata )
0 commit comments