|
1 | | -"""Factory implementation for MDIO v1 datasets.""" |
| 1 | +"""MDIO factories for seismic data.""" |
2 | 2 |
|
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import importlib |
| 6 | +from datetime import UTC |
3 | 7 | from datetime import datetime |
4 | | -from datetime import timezone |
| 8 | +from enum import Enum |
| 9 | +from enum import auto |
| 10 | +from typing import Any |
| 11 | +from typing import Dict |
5 | 12 | from typing import List |
6 | 13 | from typing import Optional |
7 | 14 |
|
8 | | -from mdio.schema.compressors import ZFP |
| 15 | +from mdio.core.v1.builder import MDIODatasetBuilder |
9 | 16 | from mdio.schema.compressors import Blosc |
10 | | -from mdio.schema.dimension import NamedDimension |
11 | 17 | from mdio.schema.dtype import ScalarType |
12 | 18 | from mdio.schema.dtype import StructuredType |
13 | | -from mdio.schema.metadata import UserAttributes |
14 | 19 | from mdio.schema.v1.dataset import Dataset |
15 | 20 | from mdio.schema.v1.units import AllUnits |
16 | | -from mdio.schema.v1.variable import Coordinate |
17 | | -from mdio.schema.v1.variable import Variable |
18 | | -from mdio.schema.v1.variable import VariableMetadata |
| 21 | +from mdio.schema.v1.units import LengthUnitModel |
| 22 | + |
| 23 | + |
| 24 | +class MDIOSchemaType(Enum): |
| 25 | + """MDIO templates for specific data types.""" |
19 | 26 |
|
20 | | -from ._serializer import ( |
21 | | - make_coordinate, |
22 | | - make_dataset, |
23 | | - make_dataset_metadata, |
24 | | - make_named_dimension, |
25 | | - make_variable, |
26 | | -) |
| 27 | + SEISMIC_3D_POST_STACK_GENERIC = auto() |
| 28 | + SEISMIC_3D_POST_STACK_TIME = auto() |
| 29 | + SEISMIC_3D_POST_STACK_DEPTH = auto() |
| 30 | + SEISMIC_3D_PRE_STACK_CDP_TIME = auto() |
| 31 | + SEISMIC_3D_PRE_STACK_CDP_DEPTH = auto() |
27 | 32 |
|
28 | 33 |
|
29 | | -class AbstractTemplateFactory: |
30 | | - """Abstract factory for creating MDIO datasets.""" |
| 34 | +class Seismic3DPostStackGeneric: |
| 35 | + """Generic 3D seismic post stack dataset.""" |
31 | 36 |
|
32 | | - def __init__(self, name: str): |
33 | | - """Initialize the factory. |
| 37 | + def __init__(self): |
| 38 | + """Initialize generic post stack dataset.""" |
| 39 | + self._dim_names = ["inline", "crossline", "sample"] |
| 40 | + self._chunks = [128, 128, 128] # 8 mb |
| 41 | + self._coords = { |
| 42 | + "cdp-x": ("float32", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]), |
| 43 | + "cdp-y": ("float32", {"unitsV1": {"length": "m"}}, self._dim_names[:-1]), |
| 44 | + } |
| 45 | + |
| 46 | + def create( |
| 47 | + self, |
| 48 | + name: str, |
| 49 | + shape: List[int], |
| 50 | + header_fields: Dict[str, str], |
| 51 | + create_coords: bool = False, |
| 52 | + sample_format: Optional[str] = None, |
| 53 | + chunks: Optional[List[int]] = None, |
| 54 | + sample_units: Optional[Dict[str, str]] = None, |
| 55 | + z_units: Optional[Dict[str, str]] = None, |
| 56 | + attributes: Optional[Dict[str, Any]] = None, |
| 57 | + ) -> Dataset: |
| 58 | + """Create a generic seismic dataset schema. |
34 | 59 |
|
35 | 60 | Args: |
36 | 61 | name: Name of the dataset |
| 62 | + shape: Shape of the dataset |
| 63 | + header_fields: Header fields to include as a dict of field_name: dtype |
| 64 | + create_coords: Whether to create coordinates |
| 65 | + sample_format: Format of the samples |
| 66 | + chunks: Chunk sizes |
| 67 | + sample_units: Units for samples |
| 68 | + z_units: Units for z-axis |
| 69 | + attributes: Additional attributes to include in the dataset metadata |
| 70 | +
|
| 71 | + Returns: |
| 72 | + Dataset: The created dataset |
| 73 | + """ |
| 74 | + chunks = chunks or self._chunks |
| 75 | + sample_format = sample_format or "float32" |
| 76 | + |
| 77 | + builder = MDIODatasetBuilder( |
| 78 | + name=name, |
| 79 | + attributes=attributes, |
| 80 | + ) |
| 81 | + |
| 82 | + # Add dimensions |
| 83 | + for dim_name, dim_size in zip(self._dim_names, shape): |
| 84 | + builder.add_dimension( |
| 85 | + name=dim_name, |
| 86 | + size=dim_size, |
| 87 | + data_type=ScalarType.UINT32, |
| 88 | + metadata=z_units if dim_name == "sample" else None, |
| 89 | + ) |
| 90 | + |
| 91 | + # Add coordinates if requested |
| 92 | + if create_coords: |
| 93 | + for coord_name, (format_, unit, coord_dims) in self._coords.items(): |
| 94 | + builder.add_coordinate( |
| 95 | + name=coord_name, |
| 96 | + data_type=ScalarType(format_), |
| 97 | + dimensions=coord_dims, |
| 98 | + metadata=unit, |
| 99 | + ) |
| 100 | + |
| 101 | + # Add seismic variable |
| 102 | + builder.add_variable( |
| 103 | + name="seismic", |
| 104 | + data_type=ScalarType(sample_format), |
| 105 | + dimensions=self._dim_names, |
| 106 | + compressor=Blosc(name="blosc", algorithm="zstd"), |
| 107 | + metadata=sample_units, |
| 108 | + ) |
| 109 | + |
| 110 | + # Add header variable with structured dtype |
| 111 | + header_dtype = StructuredType(fields=[ |
| 112 | + {"name": field_name, "format": field_type} |
| 113 | + for field_name, field_type in header_fields.items() |
| 114 | + ]) |
| 115 | + builder.add_variable( |
| 116 | + name="headers", |
| 117 | + data_type=header_dtype, |
| 118 | + dimensions=self._dim_names[:-1], |
| 119 | + compressor=Blosc(name="blosc"), |
| 120 | + ) |
| 121 | + |
| 122 | + # Add trace mask |
| 123 | + builder.add_variable( |
| 124 | + name="trace_mask", |
| 125 | + data_type=ScalarType.BOOL, |
| 126 | + dimensions=self._dim_names[:-1], |
| 127 | + compressor=Blosc(name="blosc"), |
| 128 | + ) |
| 129 | + |
| 130 | + return builder.build() |
| 131 | + |
| 132 | + |
| 133 | +class Seismic3DPostStack(Seismic3DPostStackGeneric): |
| 134 | + """3D seismic post stack dataset with domain-specific attributes.""" |
| 135 | + |
| 136 | + def __init__(self, domain: str): |
| 137 | + """Initialize post stack dataset. |
| 138 | +
|
| 139 | + Args: |
| 140 | + domain: Domain of the dataset (time/depth) |
37 | 141 | """ |
38 | | - self.name = name |
39 | | - self.api_version = "1.0.0" # TODO: Pull from package metadata |
40 | | - self.created_on = datetime.now(timezone.utc) |
41 | | - self.dimensions: List[NamedDimension] = [] |
42 | | - self.coordinates: List[Coordinate] = [] |
43 | | - self.variables: List[Variable] = [] |
44 | | - |
45 | | - def add_dimension(self, name: str, size: int) -> "AbstractTemplateFactory": |
46 | | - """Add a dimension to the factory.""" |
47 | | - self.dimensions.append(make_named_dimension(name, size)) |
48 | | - return self |
49 | | - |
50 | | - def add_coordinate( |
| 142 | + super().__init__() |
| 143 | + self._dim_names = ["inline", "crossline", domain] |
| 144 | + |
| 145 | + def create( |
51 | 146 | self, |
52 | | - name: str = "", |
53 | | - dimensions: Optional[List[NamedDimension | str]] = None, |
54 | | - data_type: ScalarType | StructuredType = ScalarType.FLOAT32, |
55 | | - metadata: Optional[List[AllUnits | UserAttributes]] = None, |
56 | | - ) -> "AbstractTemplateFactory": |
57 | | - """Add a coordinate to the factory.""" |
58 | | - if name == "": |
59 | | - name = f"coord_{len(self.coordinates)}" |
60 | | - if dimensions is None: |
61 | | - dimensions = self.dimensions |
62 | | - self.coordinates.append(make_coordinate(name, dimensions, data_type, metadata)) |
63 | | - return self |
64 | | - |
65 | | - def add_variable( |
| 147 | + name: str, |
| 148 | + shape: List[int], |
| 149 | + header_fields: Dict[str, str], |
| 150 | + create_coords: bool = False, |
| 151 | + sample_format: Optional[str] = None, |
| 152 | + chunks: Optional[List[int]] = None, |
| 153 | + sample_units: Optional[Dict[str, str]] = None, |
| 154 | + z_units: Optional[Dict[str, str]] = None, |
| 155 | + attributes: Optional[Dict[str, Any]] = None, |
| 156 | + ) -> Dataset: |
| 157 | + """Create a seismic dataset schema with domain-specific attributes.""" |
| 158 | + # Add seismic-specific attributes |
| 159 | + seismic_attrs = { |
| 160 | + "surveyDimensionality": "3D", |
| 161 | + "ensembleType": "line", |
| 162 | + "processingStage": "post-stack", |
| 163 | + } |
| 164 | + if attributes: |
| 165 | + seismic_attrs.update(attributes) |
| 166 | + |
| 167 | + return super().create( |
| 168 | + name=name, |
| 169 | + shape=shape, |
| 170 | + header_fields=header_fields, |
| 171 | + create_coords=create_coords, |
| 172 | + sample_format=sample_format, |
| 173 | + chunks=chunks, |
| 174 | + sample_units=sample_units, |
| 175 | + z_units=z_units, |
| 176 | + attributes=seismic_attrs, |
| 177 | + ) |
| 178 | + |
| 179 | + |
| 180 | +class Seismic3DPreStack(Seismic3DPostStackGeneric): |
| 181 | + """3D seismic pre stack dataset.""" |
| 182 | + |
| 183 | + def __init__(self, domain: str): |
| 184 | + """Initialize pre stack dataset. |
| 185 | +
|
| 186 | + Args: |
| 187 | + domain: Domain of the dataset (time/depth) |
| 188 | + """ |
| 189 | + super().__init__() |
| 190 | + self._dim_names = ["inline", "crossline", "offset", domain] |
| 191 | + self._chunks = [1, 1, 512, 4096] # 8 mb |
| 192 | + self._coords = { |
| 193 | + "cdp-x": ("float32", {"length": "m"}, self._dim_names[:-2]), |
| 194 | + "cdp-y": ("float32", {"length": "m"}, self._dim_names[:-2]), |
| 195 | + } |
| 196 | + |
| 197 | + def create( |
66 | 198 | self, |
67 | | - name: str = "", |
68 | | - dimensions: Optional[List[NamedDimension | str]] = None, |
69 | | - data_type: ScalarType | StructuredType = ScalarType.FLOAT32, |
70 | | - compressor: Blosc | ZFP | None = None, |
71 | | - coordinates: Optional[List[Coordinate | str]] = None, |
72 | | - metadata: Optional[VariableMetadata] = None, |
73 | | - ) -> "AbstractTemplateFactory": |
74 | | - """Add a variable to the factory.""" |
75 | | - if name == "": |
76 | | - name = f"var_{len(self.variables)}" |
77 | | - if dimensions is None: |
78 | | - dimensions = self.dimensions |
79 | | - self.variables.append( |
80 | | - make_variable( |
81 | | - name, dimensions, data_type, compressor, coordinates, metadata |
82 | | - ) |
| 199 | + name: str, |
| 200 | + shape: List[int], |
| 201 | + header_fields: Dict[str, str], |
| 202 | + create_coords: bool = False, |
| 203 | + sample_format: Optional[str] = None, |
| 204 | + chunks: Optional[List[int]] = None, |
| 205 | + sample_units: Optional[Dict[str, str]] = None, |
| 206 | + z_units: Optional[Dict[str, str]] = None, |
| 207 | + attributes: Optional[Dict[str, Any]] = None, |
| 208 | + ) -> Dataset: |
| 209 | + """Create a seismic dataset schema with pre-stack attributes.""" |
| 210 | + # Add seismic-specific attributes |
| 211 | + seismic_attrs = { |
| 212 | + "surveyDimensionality": "3D", |
| 213 | + "ensembleType": "cdp", |
| 214 | + "processingStage": "pre-stack", |
| 215 | + } |
| 216 | + if attributes: |
| 217 | + seismic_attrs.update(attributes) |
| 218 | + |
| 219 | + return super().create( |
| 220 | + name=name, |
| 221 | + shape=shape, |
| 222 | + header_fields=header_fields, |
| 223 | + create_coords=create_coords, |
| 224 | + sample_format=sample_format, |
| 225 | + chunks=chunks, |
| 226 | + sample_units=sample_units, |
| 227 | + z_units=z_units, |
| 228 | + attributes=seismic_attrs, |
83 | 229 | ) |
84 | | - return self |
85 | | - |
86 | | - def _compose_metadata(self): |
87 | | - """Compose the DatasetMetadata with the given name, api_version, and created_on.""" |
88 | | - return make_dataset_metadata(self.name, self.api_version, self.created_on) |
89 | | - |
90 | | - def _compose_variables(self) -> List[Variable]: |
91 | | - """Compose the Variables with the given parameters.""" |
92 | | - return [ |
93 | | - make_variable( |
94 | | - self.name, |
95 | | - self.dimensions, |
96 | | - self.data_type, |
97 | | - self.compressor, |
98 | | - self.coordinates, |
99 | | - self.metadata, |
100 | | - ) |
101 | | - ] |
102 | 230 |
|
103 | | - def make_dataset(self, variables: List[Variable]) -> Dataset: |
104 | | - """Create a Dataset with the given variables and metadata.""" |
105 | | - return Dataset(variables=variables, metadata=self._compose_metadata()) |
| 231 | + |
| 232 | +SCHEMA_TEMPLATE_MAP = { |
| 233 | + MDIOSchemaType.SEISMIC_3D_POST_STACK_GENERIC: Seismic3DPostStackGeneric(), |
| 234 | + MDIOSchemaType.SEISMIC_3D_POST_STACK_TIME: Seismic3DPostStack("time"), |
| 235 | + MDIOSchemaType.SEISMIC_3D_POST_STACK_DEPTH: Seismic3DPostStack("depth"), |
| 236 | + MDIOSchemaType.SEISMIC_3D_PRE_STACK_CDP_TIME: Seismic3DPreStack("time"), |
| 237 | + MDIOSchemaType.SEISMIC_3D_PRE_STACK_CDP_DEPTH: Seismic3DPreStack("depth"), |
| 238 | +} |
0 commit comments