Skip to content

Commit 3c3dc22

Browse files
committed
Begin canonical dataset factory creation.
1 parent 12677f7 commit 3c3dc22

File tree

3 files changed

+353
-289
lines changed

3 files changed

+353
-289
lines changed

src/mdio/core/v1/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,18 @@
1313
make_named_dimension,
1414
make_variable,
1515
)
16-
from .factory import AbstractTemplateFactory
17-
16+
from .factory import MDIOSchemaType
17+
from .factory import SCHEMA_TEMPLATE_MAP
1818

1919
__all__ = [
2020
"MDIODatasetBuilder",
21-
"AbstractTemplateFactory",
2221
"make_coordinate",
2322
"make_dataset",
2423
"make_dataset_metadata",
2524
"make_named_dimension",
2625
"make_variable",
2726
"mdio",
2827
"write_mdio_metadata",
28+
"MDIOSchemaType",
29+
"SCHEMA_TEMPLATE_MAP",
2930
]

src/mdio/core/v1/factory.py

Lines changed: 216 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,105 +1,238 @@
1-
"""Factory implementation for MDIO v1 datasets."""
1+
"""MDIO factories for seismic data."""
22

3+
from __future__ import annotations
4+
5+
import importlib
6+
from datetime import UTC
37
from datetime import datetime
4-
from datetime import timezone
8+
from enum import Enum
9+
from enum import auto
10+
from typing import Any
11+
from typing import Dict
512
from typing import List
613
from typing import Optional
714

8-
from mdio.schema.compressors import ZFP
15+
from mdio.core.v1.builder import MDIODatasetBuilder
916
from mdio.schema.compressors import Blosc
10-
from mdio.schema.dimension import NamedDimension
1117
from mdio.schema.dtype import ScalarType
1218
from mdio.schema.dtype import StructuredType
13-
from mdio.schema.metadata import UserAttributes
1419
from mdio.schema.v1.dataset import Dataset
1520
from mdio.schema.v1.units import AllUnits
16-
from mdio.schema.v1.variable import Coordinate
17-
from mdio.schema.v1.variable import Variable
18-
from mdio.schema.v1.variable import VariableMetadata
21+
from mdio.schema.v1.units import LengthUnitModel
22+
23+
24+
class MDIOSchemaType(Enum):
25+
"""MDIO templates for specific data types."""
1926

20-
from ._serializer import (
21-
make_coordinate,
22-
make_dataset,
23-
make_dataset_metadata,
24-
make_named_dimension,
25-
make_variable,
26-
)
27+
SEISMIC_3D_POST_STACK_GENERIC = auto()
28+
SEISMIC_3D_POST_STACK_TIME = auto()
29+
SEISMIC_3D_POST_STACK_DEPTH = auto()
30+
SEISMIC_3D_PRE_STACK_CDP_TIME = auto()
31+
SEISMIC_3D_PRE_STACK_CDP_DEPTH = auto()
2732

2833

29-
class AbstractTemplateFactory:
30-
"""Abstract factory for creating MDIO datasets."""
34+
class Seismic3DPostStackGeneric:
35+
"""Generic 3D seismic post stack dataset."""
3136

32-
def __init__(self, name: str):
33-
"""Initialize the factory.
37+
def __init__(self):
38+
"""Initialize generic post stack dataset."""
39+
self._dim_names = ["inline", "crossline", "sample"]
40+
self._chunks = [128, 128, 128] # 8 mb
41+
self._coords = {
42+
"cdp-x": ("float32", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]),
43+
"cdp-y": ("float32", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]),
44+
}
45+
46+
def create(
47+
self,
48+
name: str,
49+
shape: List[int],
50+
header_fields: Dict[str, str],
51+
create_coords: bool = False,
52+
sample_format: Optional[str] = None,
53+
chunks: Optional[List[int]] = None,
54+
sample_units: Optional[Dict[str, str]] = None,
55+
z_units: Optional[Dict[str, str]] = None,
56+
attributes: Optional[Dict[str, Any]] = None,
57+
) -> Dataset:
58+
"""Create a generic seismic dataset schema.
3459
3560
Args:
3661
name: Name of the dataset
62+
shape: Shape of the dataset
63+
header_fields: Header fields to include as a dict of field_name: dtype
64+
create_coords: Whether to create coordinates
65+
sample_format: Format of the samples
66+
chunks: Chunk sizes
67+
sample_units: Units for samples
68+
z_units: Units for z-axis
69+
attributes: Additional attributes to include in the dataset metadata
70+
71+
Returns:
72+
Dataset: The created dataset
73+
"""
74+
chunks = chunks or self._chunks
75+
sample_format = sample_format or "float32"
76+
77+
builder = MDIODatasetBuilder(
78+
name=name,
79+
attributes=attributes,
80+
)
81+
82+
# Add dimensions
83+
for dim_name, dim_size in zip(self._dim_names, shape):
84+
builder.add_dimension(
85+
name=dim_name,
86+
size=dim_size,
87+
data_type=ScalarType.UINT32,
88+
metadata=z_units if dim_name == "sample" else None,
89+
)
90+
91+
# Add coordinates if requested
92+
if create_coords:
93+
for coord_name, (format_, unit, coord_dims) in self._coords.items():
94+
builder.add_coordinate(
95+
name=coord_name,
96+
data_type=ScalarType(format_),
97+
dimensions=coord_dims,
98+
metadata=unit,
99+
)
100+
101+
# Add seismic variable
102+
builder.add_variable(
103+
name="seismic",
104+
data_type=ScalarType(sample_format),
105+
dimensions=self._dim_names,
106+
compressor=Blosc(name="blosc", algorithm="zstd"),
107+
metadata=sample_units,
108+
)
109+
110+
# Add header variable with structured dtype
111+
header_dtype = StructuredType(fields=[
112+
{"name": field_name, "format": field_type}
113+
for field_name, field_type in header_fields.items()
114+
])
115+
builder.add_variable(
116+
name="headers",
117+
data_type=header_dtype,
118+
dimensions=self._dim_names[:-1],
119+
compressor=Blosc(name="blosc"),
120+
)
121+
122+
# Add trace mask
123+
builder.add_variable(
124+
name="trace_mask",
125+
data_type=ScalarType.BOOL,
126+
dimensions=self._dim_names[:-1],
127+
compressor=Blosc(name="blosc"),
128+
)
129+
130+
return builder.build()
131+
132+
133+
class Seismic3DPostStack(Seismic3DPostStackGeneric):
134+
"""3D seismic post stack dataset with domain-specific attributes."""
135+
136+
def __init__(self, domain: str):
137+
"""Initialize post stack dataset.
138+
139+
Args:
140+
domain: Domain of the dataset (time/depth)
37141
"""
38-
self.name = name
39-
self.api_version = "1.0.0" # TODO: Pull from package metadata
40-
self.created_on = datetime.now(timezone.utc)
41-
self.dimensions: List[NamedDimension] = []
42-
self.coordinates: List[Coordinate] = []
43-
self.variables: List[Variable] = []
44-
45-
def add_dimension(self, name: str, size: int) -> "AbstractTemplateFactory":
46-
"""Add a dimension to the factory."""
47-
self.dimensions.append(make_named_dimension(name, size))
48-
return self
49-
50-
def add_coordinate(
142+
super().__init__()
143+
self._dim_names = ["inline", "crossline", domain]
144+
145+
def create(
51146
self,
52-
name: str = "",
53-
dimensions: Optional[List[NamedDimension | str]] = None,
54-
data_type: ScalarType | StructuredType = ScalarType.FLOAT32,
55-
metadata: Optional[List[AllUnits | UserAttributes]] = None,
56-
) -> "AbstractTemplateFactory":
57-
"""Add a coordinate to the factory."""
58-
if name == "":
59-
name = f"coord_{len(self.coordinates)}"
60-
if dimensions is None:
61-
dimensions = self.dimensions
62-
self.coordinates.append(make_coordinate(name, dimensions, data_type, metadata))
63-
return self
64-
65-
def add_variable(
147+
name: str,
148+
shape: List[int],
149+
header_fields: Dict[str, str],
150+
create_coords: bool = False,
151+
sample_format: Optional[str] = None,
152+
chunks: Optional[List[int]] = None,
153+
sample_units: Optional[Dict[str, str]] = None,
154+
z_units: Optional[Dict[str, str]] = None,
155+
attributes: Optional[Dict[str, Any]] = None,
156+
) -> Dataset:
157+
"""Create a seismic dataset schema with domain-specific attributes."""
158+
# Add seismic-specific attributes
159+
seismic_attrs = {
160+
"surveyDimensionality": "3D",
161+
"ensembleType": "line",
162+
"processingStage": "post-stack",
163+
}
164+
if attributes:
165+
seismic_attrs.update(attributes)
166+
167+
return super().create(
168+
name=name,
169+
shape=shape,
170+
header_fields=header_fields,
171+
create_coords=create_coords,
172+
sample_format=sample_format,
173+
chunks=chunks,
174+
sample_units=sample_units,
175+
z_units=z_units,
176+
attributes=seismic_attrs,
177+
)
178+
179+
180+
class Seismic3DPreStack(Seismic3DPostStackGeneric):
181+
"""3D seismic pre stack dataset."""
182+
183+
def __init__(self, domain: str):
184+
"""Initialize pre stack dataset.
185+
186+
Args:
187+
domain: Domain of the dataset (time/depth)
188+
"""
189+
super().__init__()
190+
self._dim_names = ["inline", "crossline", "offset", domain]
191+
self._chunks = [1, 1, 512, 4096] # 8 mb
192+
self._coords = {
193+
"cdp-x": ("float32", {"length": "m"}, self._dim_names[:-2]),
194+
"cdp-y": ("float32", {"length": "m"}, self._dim_names[:-2]),
195+
}
196+
197+
def create(
66198
self,
67-
name: str = "",
68-
dimensions: Optional[List[NamedDimension | str]] = None,
69-
data_type: ScalarType | StructuredType = ScalarType.FLOAT32,
70-
compressor: Blosc | ZFP | None = None,
71-
coordinates: Optional[List[Coordinate | str]] = None,
72-
metadata: Optional[VariableMetadata] = None,
73-
) -> "AbstractTemplateFactory":
74-
"""Add a variable to the factory."""
75-
if name == "":
76-
name = f"var_{len(self.variables)}"
77-
if dimensions is None:
78-
dimensions = self.dimensions
79-
self.variables.append(
80-
make_variable(
81-
name, dimensions, data_type, compressor, coordinates, metadata
82-
)
199+
name: str,
200+
shape: List[int],
201+
header_fields: Dict[str, str],
202+
create_coords: bool = False,
203+
sample_format: Optional[str] = None,
204+
chunks: Optional[List[int]] = None,
205+
sample_units: Optional[Dict[str, str]] = None,
206+
z_units: Optional[Dict[str, str]] = None,
207+
attributes: Optional[Dict[str, Any]] = None,
208+
) -> Dataset:
209+
"""Create a seismic dataset schema with pre-stack attributes."""
210+
# Add seismic-specific attributes
211+
seismic_attrs = {
212+
"surveyDimensionality": "3D",
213+
"ensembleType": "cdp",
214+
"processingStage": "pre-stack",
215+
}
216+
if attributes:
217+
seismic_attrs.update(attributes)
218+
219+
return super().create(
220+
name=name,
221+
shape=shape,
222+
header_fields=header_fields,
223+
create_coords=create_coords,
224+
sample_format=sample_format,
225+
chunks=chunks,
226+
sample_units=sample_units,
227+
z_units=z_units,
228+
attributes=seismic_attrs,
83229
)
84-
return self
85-
86-
def _compose_metadata(self):
87-
"""Compose the DatasetMetadata with the given name, api_version, and created_on."""
88-
return make_dataset_metadata(self.name, self.api_version, self.created_on)
89-
90-
def _compose_variables(self) -> List[Variable]:
91-
"""Compose the Variables with the given parameters."""
92-
return [
93-
make_variable(
94-
self.name,
95-
self.dimensions,
96-
self.data_type,
97-
self.compressor,
98-
self.coordinates,
99-
self.metadata,
100-
)
101-
]
102230

103-
def make_dataset(self, variables: List[Variable]) -> Dataset:
104-
"""Create a Dataset with the given variables and metadata."""
105-
return Dataset(variables=variables, metadata=self._compose_metadata())
231+
232+
SCHEMA_TEMPLATE_MAP = {
233+
MDIOSchemaType.SEISMIC_3D_POST_STACK_GENERIC: Seismic3DPostStackGeneric(),
234+
MDIOSchemaType.SEISMIC_3D_POST_STACK_TIME: Seismic3DPostStack("time"),
235+
MDIOSchemaType.SEISMIC_3D_POST_STACK_DEPTH: Seismic3DPostStack("depth"),
236+
MDIOSchemaType.SEISMIC_3D_PRE_STACK_CDP_TIME: Seismic3DPreStack("time"),
237+
MDIOSchemaType.SEISMIC_3D_PRE_STACK_CDP_DEPTH: Seismic3DPreStack("depth"),
238+
}

0 commit comments

Comments
 (0)