Skip to content

Commit 992eeed

Browse files
committed
Merge branch 'main' into zarrv3_with_uv -- Probably broken
2 parents 6c36189 + 7a8b751 commit 992eeed

File tree

15 files changed

+1522
-189
lines changed

15 files changed

+1522
-189
lines changed

docs/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ maxdepth: 1
1717
---
1818
installation
1919
notebooks/quickstart
20+
notebooks/creation
2021
notebooks/compression
2122
notebooks/rechunking
2223
usage

docs/notebooks/creation.ipynb

Lines changed: 815 additions & 0 deletions
Large diffs are not rendered by default.

src/mdio/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77
from mdio.api.convenience import copy_mdio
88
from mdio.converters import mdio_to_segy
99
from mdio.converters import segy_to_mdio
10+
from mdio.core.dimension import Dimension
11+
from mdio.core.factory import MDIOCreateConfig
12+
from mdio.core.factory import MDIOVariableConfig
13+
from mdio.core.factory import create_empty
14+
from mdio.core.factory import create_empty_like
15+
from mdio.core.grid import Grid
1016

1117

1218
__all__ = [
@@ -15,6 +21,12 @@
1521
"copy_mdio",
1622
"mdio_to_segy",
1723
"segy_to_mdio",
24+
"Dimension",
25+
"MDIOCreateConfig",
26+
"MDIOVariableConfig",
27+
"create_empty",
28+
"create_empty_like",
29+
"Grid",
1830
]
1931

2032

src/mdio/api/accessor.py

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ class MDIOAccessor:
129129
we can directly unpack it and use it further down our code.
130130
"""
131131

132+
_stats_keys = {"mean", "std", "rms", "min", "max"}
133+
132134
_array_load_function_mapper = {
133135
"zarr": open_zarr_array,
134136
"dask": open_zarr_array_dask,
@@ -154,16 +156,13 @@ def __init__(
154156

155157
# Set private attributes for public interface.
156158
# Pep8 complains because they are defined outside __init__
157-
self._binary_header = None
158159
self._chunks = None
159160
self._live_mask = None
160161
self._root = None
161162
self._n_dim = None
162163
self._orig_chunks = None
163164
self._store = None
164165
self._shape = None
165-
self._stats = None
166-
self._text_header = None
167166
self._trace_count = None
168167

169168
# Private attributes
@@ -205,13 +204,25 @@ def _validate_store(self, storage_options):
205204

206205
def _connect(self):
207206
"""Open the zarr root."""
208-
if self.mode in {"r", "r+"}:
209-
self.root = zarr.open_consolidated(store=self.store, mode=self.mode)
210-
elif self.mode == "w":
211-
self.root = zarr.open(store=self.store, mode="r+")
212-
else:
213-
msg = f"Invalid mode: {self.mode}"
214-
raise ValueError(msg)
207+
try:
208+
if self.mode in {"r", "r+"}:
209+
self.root = zarr.open_consolidated(store=self.store, mode=self.mode)
210+
elif self.mode == "w":
211+
self.root = zarr.open(store=self.store, mode="r+")
212+
else:
213+
msg = f"Invalid mode: {self.mode}"
214+
raise ValueError(msg)
215+
except KeyError as e:
216+
msg = (
217+
f"MDIO file not found or corrupt at {self.store.path}. "
218+
"Please check the URL or ensure it is not a deprecated "
219+
"version of MDIO file."
220+
)
221+
raise MDIONotFoundError(msg) from e
222+
223+
def _consolidate_metadata(self) -> None:
224+
"""Flush optimized MDIO metadata, run after modifying it."""
225+
zarr.consolidate_metadata(self.root.store)
215226

216227
def _deserialize_grid(self):
217228
"""Deserialize grid from Zarr metadata."""
@@ -220,12 +231,6 @@ def _deserialize_grid(self):
220231
def _set_attributes(self):
221232
"""Deserialize attributes from Zarr metadata."""
222233
self.trace_count = self.root.attrs["trace_count"]
223-
self.stats = {
224-
key: self.root.attrs[key] for key in ["mean", "std", "rms", "min", "max"]
225-
}
226-
227-
self.text_header = self._metadata_group.attrs["text_header"]
228-
self.binary_header = self._metadata_group.attrs["binary_header"]
229234

230235
# Grid based attributes
231236
self.shape = self.grid.shape
@@ -332,26 +337,28 @@ def trace_count(self, value: int) -> None:
332337
@property
333338
def text_header(self) -> list:
334339
"""Get seismic text header."""
335-
return self._text_header
340+
return self._metadata_group.attrs["text_header"]
336341

337342
@text_header.setter
338343
def text_header(self, value: list) -> None:
339344
"""Validate and set seismic text header."""
340-
if not isinstance(value, list):
345+
if not isinstance(value, list) or len(value) != 40:
341346
raise AttributeError("Text header must be a list of str with 40 elements")
342-
self._text_header = value
347+
self._metadata_group.attrs["text_header"] = value
348+
self._consolidate_metadata()
343349

344350
@property
345351
def binary_header(self) -> dict:
346352
"""Get seismic binary header metadata."""
347-
return self._binary_header
353+
return self._metadata_group.attrs["binary_header"]
348354

349355
@binary_header.setter
350356
def binary_header(self, value: dict) -> None:
351357
"""Validate and set seismic binary header metadata."""
352358
if not isinstance(value, dict):
353359
raise AttributeError("Binary header has to be a dictionary type collection")
354-
self._binary_header = value
360+
self._metadata_group.attrs["binary_header"] = value
361+
self._consolidate_metadata()
355362

356363
@property
357364
def chunks(self) -> tuple[int, ...]:
@@ -366,12 +373,16 @@ def chunks(self, value: tuple[int, ...]) -> None:
366373
@property
367374
def stats(self) -> dict:
368375
"""Get global statistics like min/max/rms/std."""
369-
return self._stats
376+
return {key: self.root.attrs[key] for key in self._stats_keys}
370377

371378
@stats.setter
372379
def stats(self, value: dict) -> None:
373380
"""Set global statistics like min/max/rms/std."""
374-
self._stats = value
381+
if not isinstance(value, dict) or not self._stats_keys.issubset(value.keys()):
382+
msg = f"For settings status, you must provide keys: {self._stats_keys}"
383+
raise AttributeError(msg)
384+
self.root.attrs.update(value)
385+
self._consolidate_metadata()
375386

376387
@property
377388
def _metadata_group(self) -> zarr.Group:
@@ -404,6 +415,7 @@ def __getitem__(self, item: int | tuple) -> npt.ArrayLike | da.Array | tuple:
404415
def __setitem__(self, key: int | tuple, value: npt.ArrayLike) -> None:
405416
"""Data setter."""
406417
self._traces[key] = value
418+
self._live_mask[key] = True
407419

408420
def coord_to_index(
409421
self,
@@ -644,10 +656,10 @@ def __init__(
644656
memory_cache_size: int = 0,
645657
disk_cache: bool = False,
646658
): # TODO: Disabled all caching by default, sometimes causes performance issues
647-
"""Initialize super class with `r+` permission."""
659+
"""Initialize accessor class with `w` permission."""
648660
super().__init__(
649661
mdio_path_or_buffer=mdio_path_or_buffer,
650-
mode="r+",
662+
mode="w",
651663
access_pattern=access_pattern,
652664
storage_options=storage_options,
653665
return_metadata=return_metadata,

src/mdio/constants.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,26 @@
1212
FLOAT64_MIN = np.finfo("float64").min
1313
FLOAT64_MAX = np.finfo("float64").max
1414

15-
INT8_MIN = -0x80
16-
INT8_MAX = 0x7F
15+
INT8_MIN = np.iinfo("int8").min
16+
INT8_MAX = np.iinfo("int8").max
1717

18-
INT16_MIN = -0x8000
19-
INT16_MAX = 0x7FFF
18+
INT16_MIN = np.iinfo("int16").min
19+
INT16_MAX = np.iinfo("int16").max
2020

21-
INT32_MIN = -0x80000000
22-
INT32_MAX = 0x7FFFFFFF
21+
INT32_MIN = np.iinfo("int32").min
22+
INT32_MAX = np.iinfo("int32").max
2323

24-
UINT8_MIN = 0x0
25-
UINT8_MAX = 0xFF
24+
INT64_MIN = np.iinfo("int64").min
25+
INT64_MAX = np.iinfo("int64").max
2626

27-
UINT16_MIN = 0x0
28-
UINT16_MAX = 0xFFFF
27+
UINT8_MIN = 0
28+
UINT8_MAX = np.iinfo("uint8").max
2929

30-
UINT32_MIN = 0x0
31-
UINT32_MAX = 0xFFFFFFFF
30+
UINT16_MIN = 0
31+
UINT16_MAX = np.iinfo("uint16").max
32+
33+
UINT32_MIN = 0
34+
UINT32_MAX = np.iinfo("uint32").max
35+
36+
UINT64_MIN = 0
37+
UINT64_MAX = np.iinfo("uint64").max

0 commit comments

Comments
 (0)