1+ """Dataset template factory for MDIO v1. 
2+ 
3+ This module provides a factory for creating MDIO dataset templates, both canonical 
4+ and custom. It includes a builder pattern for flexible dataset creation. 
5+ """ 
6+ 
7+ from  __future__ import  annotations 
8+ 
9+ from  datetime  import  datetime 
10+ from  datetime  import  timezone 
11+ from  typing  import  Callable 
12+ from  typing  import  List 
13+ from  typing  import  Optional 
14+ from  typing  import  Union 
15+ 
16+ from  mdio .schema .base  import  BaseDataset 
17+ from  mdio .schema .compressors  import  Blosc 
18+ from  mdio .schema .compressors  import  ZFP 
19+ from  mdio .schema .v1 .dataset  import  Dataset 
20+ from  mdio .schema .v1 .dataset  import  DatasetMetadata 
21+ from  mdio .schema .chunk_grid  import  RegularChunkGrid 
22+ from  mdio .schema .chunk_grid  import  RectilinearChunkGrid 
23+ 
24+ 
25+ class  DatasetTemplateFactory :
26+     """Factory for creating MDIO dataset templates. 
27+      
28+     This factory provides methods to create both canonical dataset templates 
29+     and allows for custom dataset creation with user-defined parameters. 
30+     """ 
31+     
32+     def  __init__ (self ):
33+         self ._templates  =  {}
34+         self ._register_canonical_templates ()
35+     
36+     def  _register_canonical_templates (self ):
37+         """Register built-in canonical templates.""" 
38+         # Register common seismic data templates 
39+         self ._templates ["seismic_poststack" ] =  self ._create_poststack_template 
40+         self ._templates ["seismic_prestack" ] =  self ._create_prestack_template 
41+         # Add more canonical templates as needed 
42+     
43+     def  create_template (self , template_name : str , ** kwargs ) ->  Dataset :
44+         """Create a dataset template by name with optional customization. 
45+          
46+         Args: 
47+             template_name: Name of the template to create 
48+             **kwargs: Customization parameters for the template 
49+              
50+         Returns: 
51+             A configured Dataset instance 
52+              
53+         Raises: 
54+             ValueError: If template_name is not registered 
55+         """ 
56+         if  template_name  not  in   self ._templates :
57+             raise  ValueError (f"Unknown template: { template_name }  " )
58+             
59+         return  self ._templates [template_name ](** kwargs )
60+     
61+     def  register_template (self , name : str , template_func : Callable ):
62+         """Register a custom template function. 
63+          
64+         Args: 
65+             name: Name of the template 
66+             template_func: Function that returns a configured Dataset 
67+         """ 
68+         self ._templates [name ] =  template_func 
69+     
70+     def  _create_poststack_template (self , ** kwargs ) ->  Dataset :
71+         """Create a post-stack seismic dataset template.""" 
72+         # Default configuration for post-stack data 
73+         default_config  =  {
74+             "variables" : [
75+                 {
76+                     "name" : "data" ,
77+                     "data_type" : "float32" ,
78+                     "dimensions" : ["inline" , "crossline" , "sample" ],
79+                     "compressor" : Blosc (),
80+                     "chunk_grid" : RegularChunkGrid (chunk_shape = [64 , 64 , 64 ])
81+                 }
82+             ],
83+             "metadata" : {
84+                 "name" : kwargs .get ("name" , "poststack_seismic" ),
85+                 "api_version" : "1.0" ,
86+                 "created_on" : datetime .now (timezone .utc )
87+             }
88+         }
89+         
90+         # Merge with any custom configuration 
91+         config  =  {** default_config , ** kwargs }
92+         return  Dataset (** config )
93+     
94+     def  _create_prestack_template (self , ** kwargs ) ->  Dataset :
95+         """Create a pre-stack seismic dataset template.""" 
96+         # Default configuration for pre-stack data 
97+         default_config  =  {
98+             "variables" : [
99+                 {
100+                     "name" : "data" ,
101+                     "data_type" : "float32" ,
102+                     "dimensions" : ["inline" , "crossline" , "offset" , "sample" ],
103+                     "compressor" : Blosc (),
104+                     "chunk_grid" : RegularChunkGrid (chunk_shape = [32 , 32 , 32 , 64 ])
105+                 }
106+             ],
107+             "metadata" : {
108+                 "name" : kwargs .get ("name" , "prestack_seismic" ),
109+                 "api_version" : "1.0" ,
110+                 "created_on" : datetime .now (timezone .utc )
111+             }
112+         }
113+         
114+         # Merge with any custom configuration 
115+         config  =  {** default_config , ** kwargs }
116+         return  Dataset (** config )
117+ 
118+ 
119+ class  DatasetBuilder :
120+     """Builder for creating custom MDIO datasets.""" 
121+     
122+     def  __init__ (self ):
123+         self ._variables  =  []
124+         self ._metadata  =  {}
125+         
126+     def  add_variable (self , name : str , data_type : str , dimensions : List [str ],
127+                     compressor : Optional [Union [Blosc , ZFP ]] =  None ,
128+                     chunk_grid : Optional [Union [RegularChunkGrid , RectilinearChunkGrid ]] =  None ) ->  "DatasetBuilder" :
129+         """Add a variable to the dataset. 
130+          
131+         Args: 
132+             name: Variable name 
133+             data_type: Data type (from ScalarType) 
134+             dimensions: List of dimension names 
135+             compressor: Optional compressor configuration 
136+             chunk_grid: Optional chunk grid configuration 
137+              
138+         Returns: 
139+             self for method chaining 
140+         """ 
141+         variable  =  {
142+             "name" : name ,
143+             "data_type" : data_type ,
144+             "dimensions" : dimensions 
145+         }
146+         
147+         if  compressor :
148+             variable ["compressor" ] =  compressor 
149+         if  chunk_grid :
150+             variable ["chunk_grid" ] =  chunk_grid 
151+             
152+         self ._variables .append (variable )
153+         return  self 
154+         
155+     def  set_metadata (self , ** kwargs ) ->  "DatasetBuilder" :
156+         """Set dataset metadata. 
157+          
158+         Args: 
159+             **kwargs: Metadata key-value pairs 
160+              
161+         Returns: 
162+             self for method chaining 
163+         """ 
164+         self ._metadata .update (kwargs )
165+         return  self 
166+         
167+     def  build (self ) ->  Dataset :
168+         """Build the dataset with configured variables and metadata. 
169+          
170+         Returns: 
171+             A configured Dataset instance 
172+         """ 
173+         return  Dataset (
174+             variables = self ._variables ,
175+             metadata = DatasetMetadata (
176+                 ** self ._metadata ,
177+                 api_version = "1.0" ,
178+                 created_on = datetime .now (timezone .utc )
179+             )
180+         )
181+ 
182+ 
183+ def  create_dataset (template_name : Optional [str ] =  None , ** kwargs ) ->  Dataset :
184+     """Create a new MDIO dataset. 
185+      
186+     This is the main entry point for creating MDIO datasets. It can either: 
187+     1. Create a dataset from a canonical template 
188+     2. Create a custom dataset using the builder pattern 
189+      
190+     Args: 
191+         template_name: Optional name of a canonical template to use 
192+         **kwargs: Additional configuration parameters 
193+          
194+     Returns: 
195+         A configured Dataset instance 
196+     """ 
197+     factory  =  DatasetTemplateFactory ()
198+     
199+     if  template_name :
200+         return  factory .create_template (template_name , ** kwargs )
201+     else :
202+         builder  =  DatasetBuilder ()
203+         return  builder .build ()
204+ 
205+ 
206+ if  __name__  ==  "__main__" :
207+     # Example 1: Create a post-stack dataset using the canonical template 
208+     poststack  =  create_dataset (
209+         template_name = "seismic_poststack" ,
210+         name = "my_survey" ,
211+         description = "A post-stack seismic dataset" 
212+     )
213+     print ("Post-stack dataset created:" )
214+     print (f"Name: { poststack .metadata .name }  " )
215+     print (f"Variables: { [var .name  for  var  in  poststack .variables ]}  " )
216+     print (f"Dimensions: { poststack .variables [0 ].dimensions }  " )
217+     print ()
218+ 
219+     # Example 2: Create a pre-stack dataset using the canonical template 
220+     prestack  =  create_dataset (
221+         template_name = "seismic_prestack" ,
222+         name = "my_prestack_survey" ,
223+         description = "A pre-stack seismic dataset" 
224+     )
225+     print ("Pre-stack dataset created:" )
226+     print (f"Name: { prestack .metadata .name }  " )
227+     print (f"Variables: { [var .name  for  var  in  prestack .variables ]}  " )
228+     print (f"Dimensions: { prestack .variables [0 ].dimensions }  " )
229+     print ()
230+ 
231+     # Example 3: Create a custom dataset using the builder pattern 
232+     custom  =  (
233+         DatasetBuilder ()
234+         .add_variable (
235+             name = "data" ,
236+             data_type = "float32" ,
237+             dimensions = ["x" , "y" , "z" ],
238+             compressor = Blosc (),
239+             chunk_grid = RegularChunkGrid (chunk_shape = [32 , 32 , 32 ])
240+         )
241+         .add_variable (
242+             name = "quality" ,
243+             data_type = "uint8" ,
244+             dimensions = ["x" , "y" , "z" ],
245+             compressor = ZFP ()
246+         )
247+         .set_metadata (
248+             name = "custom_survey" ,
249+             description = "A custom seismic dataset with quality control" ,
250+             author = "John Doe" ,
251+             date_acquired = "2024-01-01" 
252+         )
253+         .build ()
254+     )
255+     print ("Custom dataset created:" )
256+     print (f"Name: { custom .metadata .name }  " )
257+     print (f"Variables: { [var .name  for  var  in  custom .variables ]}  " )
258+     print (f"Description: { custom .metadata .description }  " ) 
0 commit comments