Skip to content

Commit 8c6ef94

Browse files
Altay SansalBrianMichell
authored andcommitted
Refactor MDIO metadata handling and test structure.
1 parent fa71f0f commit 8c6ef94

File tree

7 files changed

+60
-51
lines changed

7 files changed

+60
-51
lines changed

src/mdio/api/accessor.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -190,28 +190,29 @@ def _validate_store(self, storage_options):
190190
url=self.url,
191191
disk_cache=self._disk_cache,
192192
)
193-
self.store = zarr.open(
194-
self.url, mode=self.mode, storage_options=storage_options
195-
).store
196193

197-
def _connect(self):
198-
"""Open the zarr root."""
199194
try:
200-
if self.mode in {"r", "r+"}:
201-
self.root = zarr.open_consolidated(store=self.store, mode=self.mode)
202-
elif self.mode == "w":
203-
self.root = zarr.open(store=self.store, mode="r+")
204-
else:
205-
msg = f"Invalid mode: {self.mode}"
206-
raise ValueError(msg)
207-
except KeyError as e:
195+
self.store = zarr.open(
196+
self.url, mode=self.mode, storage_options=storage_options
197+
).store
198+
except FileNotFoundError as e:
208199
msg = (
209-
f"MDIO file not found or corrupt at {self.store.path}. "
200+
f"MDIO file not found or corrupt at {self.url}. "
210201
"Please check the URL or ensure it is not a deprecated "
211202
"version of MDIO file."
212203
)
213204
raise MDIONotFoundError(msg) from e
214205

206+
def _connect(self):
207+
"""Open the zarr root."""
208+
if self.mode in {"r", "r+"}:
209+
self.root = zarr.open_consolidated(store=self.store, mode=self.mode)
210+
elif self.mode == "w":
211+
self.root = zarr.open(store=self.store, mode="r+")
212+
else:
213+
msg = f"Invalid mode: {self.mode}"
214+
raise ValueError(msg)
215+
215216
def _deserialize_grid(self):
216217
"""Deserialize grid from Zarr metadata."""
217218
self.grid = Grid.from_zarr(self.root)

src/mdio/api/io_utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import dask.array as da
66
import zarr
7-
from zarr.storage._utils import normalize_path
87

98

109
def process_url(
@@ -69,7 +68,7 @@ def process_url(
6968
if disk_cache is True:
7069
url = "::".join(["simplecache", url])
7170

72-
return normalize_path(url)
71+
return url
7372

7473

7574
def open_zarr_array(group_handle: zarr.Group, name: str) -> zarr.Array:

src/mdio/segy/helpers_segy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def create_zarr_hierarchy(root_group: "Group", overwrite: bool) -> "Group":
2929
root_group.create_group(name="metadata", overwrite=overwrite)
3030
except ContainsGroupError as e:
3131
msg = (
32-
f"An MDIO file with data already exists at {root_group.store.path}. "
32+
f"An MDIO file with data already exists at {root_group.store}. "
3333
"If this is intentional, please specify 'overwrite=True'."
3434
)
3535
raise MDIOAlreadyExistsError(msg) from e

tests/integration/test_segy_import_export.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def test_3d_import(segy_input, zarr_tmp, index_bytes, index_names):
259259
"""Test importing a SEG-Y file to MDIO."""
260260
segy_to_mdio(
261261
segy_path=segy_input.__str__(),
262-
mdio_path_or_buffer=zarr_tmp.__str__(),
262+
mdio_path_or_buffer=(zarr_tmp / "teapot.mdio").__str__(),
263263
index_bytes=index_bytes,
264264
index_names=index_names,
265265
overwrite=True,
@@ -272,13 +272,13 @@ class TestReader:
272272

273273
def test_meta_read(self, zarr_tmp):
274274
"""Metadata reading tests."""
275-
mdio = MDIOReader(zarr_tmp.__str__())
275+
mdio = MDIOReader((zarr_tmp / "teapot.mdio").__str__())
276276
assert mdio.binary_header["samples_per_trace"] == 1501
277277
assert mdio.binary_header["sample_interval"] == 2000
278278

279279
def test_grid(self, zarr_tmp):
280280
"""Grid reading tests."""
281-
mdio = MDIOReader(zarr_tmp.__str__())
281+
mdio = MDIOReader((zarr_tmp / "teapot.mdio").__str__())
282282
grid = mdio.grid
283283

284284
assert grid.select_dim("inline") == Dimension(range(1, 346), "inline")
@@ -287,7 +287,7 @@ def test_grid(self, zarr_tmp):
287287

288288
def test_get_data(self, zarr_tmp):
289289
"""Data retrieval tests."""
290-
mdio = MDIOReader(zarr_tmp.__str__())
290+
mdio = MDIOReader((zarr_tmp / "teapot.mdio").__str__())
291291

292292
assert mdio.shape == (345, 188, 1501)
293293
assert mdio[0, :, :].shape == (188, 1501)
@@ -296,7 +296,7 @@ def test_get_data(self, zarr_tmp):
296296

297297
def test_inline(self, zarr_tmp):
298298
"""Read and compare every 75 inlines' mean and std. dev."""
299-
mdio = MDIOReader(zarr_tmp.__str__())
299+
mdio = MDIOReader((zarr_tmp / "teapot.mdio").__str__())
300300

301301
inlines = mdio[::75, :, :]
302302
mean, std = inlines.mean(), inlines.std()
@@ -305,7 +305,7 @@ def test_inline(self, zarr_tmp):
305305

306306
def test_crossline(self, zarr_tmp):
307307
"""Read and compare every 75 crosslines' mean and std. dev."""
308-
mdio = MDIOReader(zarr_tmp.__str__())
308+
mdio = MDIOReader((zarr_tmp / "teapot.mdio").__str__())
309309

310310
xlines = mdio[:, ::75, :]
311311
mean, std = xlines.mean(), xlines.std()
@@ -314,7 +314,7 @@ def test_crossline(self, zarr_tmp):
314314

315315
def test_zslice(self, zarr_tmp):
316316
"""Read and compare every 225 z-slices' mean and std. dev."""
317-
mdio = MDIOReader(zarr_tmp.__str__())
317+
mdio = MDIOReader((zarr_tmp / "teapot.mdio").__str__())
318318

319319
slices = mdio[:, :, ::225]
320320
mean, std = slices.mean(), slices.std()
@@ -329,7 +329,7 @@ class TestExport:
329329
def test_3d_export(self, zarr_tmp, segy_export_tmp):
330330
"""Test 3D export to IBM and IEEE."""
331331
mdio_to_segy(
332-
mdio_path_or_buffer=zarr_tmp.__str__(),
332+
mdio_path_or_buffer=(zarr_tmp / "teapot.mdio").__str__(),
333333
output_segy_path=segy_export_tmp.__str__(),
334334
)
335335

tests/unit/conftest.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44

55
from datetime import datetime
66
from importlib import metadata
7+
from pathlib import Path
78

89
import numpy as np
910
import pytest
1011
import zarr
1112
from numpy.typing import NDArray
12-
from zarr import Group
1313
from zarr import consolidate_metadata
1414
from zarr import open_group
1515

@@ -30,11 +30,9 @@
3030

3131

3232
@pytest.fixture(scope="module")
33-
def mock_root_group(tmp_path_factory) -> Group:
33+
def mock_mdio_path(tmp_path_factory):
3434
"""Make a mocked MDIO store for writing."""
35-
zarr.config.set({"default_zarr_format": 2, "write_empty_chunks": False})
36-
tmp_dir = tmp_path_factory.mktemp("mdio")
37-
return open_group(tmp_dir.name, mode="w")
35+
return tmp_path_factory.mktemp("mdio") / "mock.mdio"
3836

3937

4038
@pytest.fixture
@@ -75,16 +73,19 @@ def mock_data(mock_coords):
7573

7674
@pytest.fixture
7775
def mock_mdio(
78-
mock_root_group: Group,
76+
mock_mdio_path: Path,
7977
mock_dimensions: list[Dimension],
8078
mock_coords: tuple[NDArray],
8179
mock_data: NDArray,
8280
mock_text: list[str],
8381
mock_bin: dict[str, int],
8482
):
8583
"""This mocks most of mdio.converters.segy in memory."""
86-
zarr_root = create_zarr_hierarchy(
87-
root_group=mock_root_group,
84+
zarr.config.set({"default_zarr_format": 2, "write_empty_chunks": False})
85+
zarr_root = open_group(mock_mdio_path, mode="w")
86+
87+
create_zarr_hierarchy(
88+
root_group=zarr_root,
8889
overwrite=True,
8990
)
9091

@@ -114,6 +115,7 @@ def mock_mdio(
114115
shape=grid.shape[:-1],
115116
chunks=grid.shape[:-1],
116117
chunk_key_encoding={"name": "v2", "separator": "/"},
118+
overwrite=True,
117119
)
118120
live_mask_arr[...] = grid.live_mask[...]
119121

@@ -135,34 +137,39 @@ def mock_mdio(
135137

136138
data_arr = data_grp.create_array(
137139
"chunked_012",
138-
data=mock_data,
140+
dtype=mock_data.dtype,
141+
shape=mock_data.shape,
139142
chunk_key_encoding={"name": "v2", "separator": "/"},
143+
overwrite=True,
140144
)
145+
data_arr[...] = mock_data
141146

142-
metadata_grp.create_array(
143-
data=il_grid * xl_grid,
147+
metadata_arr = metadata_grp.create_array(
144148
name="_".join(["chunked_012", "trace_headers"]),
149+
dtype=il_grid.dtype,
145150
shape=grid.shape[:-1], # Same spatial shape as data
146151
chunks=data_arr.chunks[:-1], # Same spatial chunks as data
147152
chunk_key_encoding={"name": "v2", "separator": "/"},
153+
overwrite=True,
148154
)
155+
metadata_arr[...] = il_grid * xl_grid
149156

150-
consolidate_metadata(mock_root_group.store)
157+
consolidate_metadata(zarr_root.store)
151158

152-
return zarr_root
159+
return mock_mdio_path
153160

154161

155162
@pytest.fixture
156-
def mock_reader(mock_mdio: Group) -> MDIOReader:
163+
def mock_reader(mock_mdio: Path) -> MDIOReader:
157164
"""Reader that points to the mocked data to be used later."""
158-
return MDIOReader(mock_mdio.store.path)
165+
return MDIOReader(mock_mdio.__str__())
159166

160167

161168
@pytest.fixture
162-
def mock_reader_cached(mock_mdio: Group) -> MDIOReader:
169+
def mock_reader_cached(mock_mdio: Path) -> MDIOReader:
163170
"""Reader that points to the mocked data to be used later. (with local caching)."""
164171
return MDIOReader(
165-
mock_mdio.store.path,
172+
mock_mdio.__str__(),
166173
disk_cache=True,
167174
storage_options={"simplecache": {"cache_storage": "./mdio_test_cache"}},
168175
)

tests/unit/test_accessor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,6 @@ def test_wrong_index(self, mock_reader: MDIOReader) -> None:
132132

133133
def test_mdio_exists(self, mock_reader: MDIOReader) -> None:
134134
"""MDIO doesn't exist or corrupt."""
135-
mock_store = mock_reader.store
135+
mock_root = mock_reader.root
136136
with pytest.raises(MDIOAlreadyExistsError):
137-
create_zarr_hierarchy(mock_store, overwrite=False)
137+
create_zarr_hierarchy(mock_root, overwrite=False)

tests/unit/test_compat.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,20 @@
2727
def update_mdio_for_version_0_7_4(root):
2828
"""Update MDIO metadata to mimic version 0.7.4."""
2929
# Update binary header revision keys
30-
bin_hdr = root.metadata.attrs[BINARY_HEADER_KEY]
30+
bin_hdr = root["metadata"].attrs[BINARY_HEADER_KEY]
3131
bin_hdr["SEGYRevision"] = bin_hdr.pop("segy_revision_major")
3232
bin_hdr["SEGYRevisionMinor"] = bin_hdr.pop("segy_revision_minor")
33-
root.metadata.attrs[BINARY_HEADER_KEY] = bin_hdr
33+
root["metadata"].attrs[BINARY_HEADER_KEY] = bin_hdr
3434

3535
# Remove trace headers past field 232 (pre-0.8 schema)
36-
orig_hdr = root.metadata[CHUNKED_TRACE_HEADERS_KEY]
36+
orig_hdr = root["metadata"][CHUNKED_TRACE_HEADERS_KEY]
3737
new_dtype = np.dtype(orig_hdr.dtype.descr[:-1])
38-
new_hdr = zarr.zeros_like(orig_hdr, dtype=new_dtype)
39-
root.metadata.create_dataset(
40-
CHUNKED_TRACE_HEADERS_KEY,
41-
data=new_hdr,
38+
root["metadata"].create_array(
39+
name=CHUNKED_TRACE_HEADERS_KEY,
40+
shape=orig_hdr.shape,
41+
dtype=new_dtype,
42+
chunks=orig_hdr.chunks,
43+
chunk_key_encoding={"name": "v2", "separator": "/"},
4244
overwrite=True,
4345
)
4446
zarr.consolidate_metadata(root.store)

0 commit comments

Comments
 (0)