Skip to content

Commit 1d27207

Browse files
committed
Begin brainstorm
1 parent 6b10729 commit 1d27207

File tree

1 file changed

+258
-0
lines changed

1 file changed

+258
-0
lines changed

src/mdio/core/_v1_factory.py

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
"""Dataset template factory for MDIO v1.
2+
3+
This module provides a factory for creating MDIO dataset templates, both canonical
4+
and custom. It includes a builder pattern for flexible dataset creation.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
from datetime import datetime
10+
from datetime import timezone
11+
from typing import Callable
12+
from typing import List
13+
from typing import Optional
14+
from typing import Union
15+
16+
from mdio.schema.base import BaseDataset
17+
from mdio.schema.compressors import Blosc
18+
from mdio.schema.compressors import ZFP
19+
from mdio.schema.v1.dataset import Dataset
20+
from mdio.schema.v1.dataset import DatasetMetadata
21+
from mdio.schema.chunk_grid import RegularChunkGrid
22+
from mdio.schema.chunk_grid import RectilinearChunkGrid
23+
24+
25+
class DatasetTemplateFactory:
26+
"""Factory for creating MDIO dataset templates.
27+
28+
This factory provides methods to create both canonical dataset templates
29+
and allows for custom dataset creation with user-defined parameters.
30+
"""
31+
32+
def __init__(self):
33+
self._templates = {}
34+
self._register_canonical_templates()
35+
36+
def _register_canonical_templates(self):
37+
"""Register built-in canonical templates."""
38+
# Register common seismic data templates
39+
self._templates["seismic_poststack"] = self._create_poststack_template
40+
self._templates["seismic_prestack"] = self._create_prestack_template
41+
# Add more canonical templates as needed
42+
43+
def create_template(self, template_name: str, **kwargs) -> Dataset:
44+
"""Create a dataset template by name with optional customization.
45+
46+
Args:
47+
template_name: Name of the template to create
48+
**kwargs: Customization parameters for the template
49+
50+
Returns:
51+
A configured Dataset instance
52+
53+
Raises:
54+
ValueError: If template_name is not registered
55+
"""
56+
if template_name not in self._templates:
57+
raise ValueError(f"Unknown template: {template_name}")
58+
59+
return self._templates[template_name](**kwargs)
60+
61+
def register_template(self, name: str, template_func: Callable):
62+
"""Register a custom template function.
63+
64+
Args:
65+
name: Name of the template
66+
template_func: Function that returns a configured Dataset
67+
"""
68+
self._templates[name] = template_func
69+
70+
def _create_poststack_template(self, **kwargs) -> Dataset:
71+
"""Create a post-stack seismic dataset template."""
72+
# Default configuration for post-stack data
73+
default_config = {
74+
"variables": [
75+
{
76+
"name": "data",
77+
"data_type": "float32",
78+
"dimensions": ["inline", "crossline", "sample"],
79+
"compressor": Blosc(),
80+
"chunk_grid": RegularChunkGrid(chunk_shape=[64, 64, 64])
81+
}
82+
],
83+
"metadata": {
84+
"name": kwargs.get("name", "poststack_seismic"),
85+
"api_version": "1.0",
86+
"created_on": datetime.now(timezone.utc)
87+
}
88+
}
89+
90+
# Merge with any custom configuration
91+
config = {**default_config, **kwargs}
92+
return Dataset(**config)
93+
94+
def _create_prestack_template(self, **kwargs) -> Dataset:
95+
"""Create a pre-stack seismic dataset template."""
96+
# Default configuration for pre-stack data
97+
default_config = {
98+
"variables": [
99+
{
100+
"name": "data",
101+
"data_type": "float32",
102+
"dimensions": ["inline", "crossline", "offset", "sample"],
103+
"compressor": Blosc(),
104+
"chunk_grid": RegularChunkGrid(chunk_shape=[32, 32, 32, 64])
105+
}
106+
],
107+
"metadata": {
108+
"name": kwargs.get("name", "prestack_seismic"),
109+
"api_version": "1.0",
110+
"created_on": datetime.now(timezone.utc)
111+
}
112+
}
113+
114+
# Merge with any custom configuration
115+
config = {**default_config, **kwargs}
116+
return Dataset(**config)
117+
118+
119+
class DatasetBuilder:
120+
"""Builder for creating custom MDIO datasets."""
121+
122+
def __init__(self):
123+
self._variables = []
124+
self._metadata = {}
125+
126+
def add_variable(self, name: str, data_type: str, dimensions: List[str],
127+
compressor: Optional[Union[Blosc, ZFP]] = None,
128+
chunk_grid: Optional[Union[RegularChunkGrid, RectilinearChunkGrid]] = None) -> "DatasetBuilder":
129+
"""Add a variable to the dataset.
130+
131+
Args:
132+
name: Variable name
133+
data_type: Data type (from ScalarType)
134+
dimensions: List of dimension names
135+
compressor: Optional compressor configuration
136+
chunk_grid: Optional chunk grid configuration
137+
138+
Returns:
139+
self for method chaining
140+
"""
141+
variable = {
142+
"name": name,
143+
"data_type": data_type,
144+
"dimensions": dimensions
145+
}
146+
147+
if compressor:
148+
variable["compressor"] = compressor
149+
if chunk_grid:
150+
variable["chunk_grid"] = chunk_grid
151+
152+
self._variables.append(variable)
153+
return self
154+
155+
def set_metadata(self, **kwargs) -> "DatasetBuilder":
156+
"""Set dataset metadata.
157+
158+
Args:
159+
**kwargs: Metadata key-value pairs
160+
161+
Returns:
162+
self for method chaining
163+
"""
164+
self._metadata.update(kwargs)
165+
return self
166+
167+
def build(self) -> Dataset:
168+
"""Build the dataset with configured variables and metadata.
169+
170+
Returns:
171+
A configured Dataset instance
172+
"""
173+
return Dataset(
174+
variables=self._variables,
175+
metadata=DatasetMetadata(
176+
**self._metadata,
177+
api_version="1.0",
178+
created_on=datetime.now(timezone.utc)
179+
)
180+
)
181+
182+
183+
def create_dataset(template_name: Optional[str] = None, **kwargs) -> Dataset:
184+
"""Create a new MDIO dataset.
185+
186+
This is the main entry point for creating MDIO datasets. It can either:
187+
1. Create a dataset from a canonical template
188+
2. Create a custom dataset using the builder pattern
189+
190+
Args:
191+
template_name: Optional name of a canonical template to use
192+
**kwargs: Additional configuration parameters
193+
194+
Returns:
195+
A configured Dataset instance
196+
"""
197+
factory = DatasetTemplateFactory()
198+
199+
if template_name:
200+
return factory.create_template(template_name, **kwargs)
201+
else:
202+
builder = DatasetBuilder()
203+
return builder.build()
204+
205+
206+
if __name__ == "__main__":
207+
# Example 1: Create a post-stack dataset using the canonical template
208+
poststack = create_dataset(
209+
template_name="seismic_poststack",
210+
name="my_survey",
211+
description="A post-stack seismic dataset"
212+
)
213+
print("Post-stack dataset created:")
214+
print(f"Name: {poststack.metadata.name}")
215+
print(f"Variables: {[var.name for var in poststack.variables]}")
216+
print(f"Dimensions: {poststack.variables[0].dimensions}")
217+
print()
218+
219+
# Example 2: Create a pre-stack dataset using the canonical template
220+
prestack = create_dataset(
221+
template_name="seismic_prestack",
222+
name="my_prestack_survey",
223+
description="A pre-stack seismic dataset"
224+
)
225+
print("Pre-stack dataset created:")
226+
print(f"Name: {prestack.metadata.name}")
227+
print(f"Variables: {[var.name for var in prestack.variables]}")
228+
print(f"Dimensions: {prestack.variables[0].dimensions}")
229+
print()
230+
231+
# Example 3: Create a custom dataset using the builder pattern
232+
custom = (
233+
DatasetBuilder()
234+
.add_variable(
235+
name="data",
236+
data_type="float32",
237+
dimensions=["x", "y", "z"],
238+
compressor=Blosc(),
239+
chunk_grid=RegularChunkGrid(chunk_shape=[32, 32, 32])
240+
)
241+
.add_variable(
242+
name="quality",
243+
data_type="uint8",
244+
dimensions=["x", "y", "z"],
245+
compressor=ZFP()
246+
)
247+
.set_metadata(
248+
name="custom_survey",
249+
description="A custom seismic dataset with quality control",
250+
author="John Doe",
251+
date_acquired="2024-01-01"
252+
)
253+
.build()
254+
)
255+
print("Custom dataset created:")
256+
print(f"Name: {custom.metadata.name}")
257+
print(f"Variables: {[var.name for var in custom.variables]}")
258+
print(f"Description: {custom.metadata.description}")

0 commit comments

Comments
 (0)