Skip to content

Commit 5f1b3c3

Browse files
dmitriyrepinAltay Sansal
andauthored
Fix or suppress known warnings (TGSAI#655)
* Fix warnings * Address PR review comments * remove tests irrelevant to MDIO * refactor warning handling and remove unnecessary ones * remove warning filter from quickstart --------- Co-authored-by: Altay Sansal <[email protected]>
1 parent d83718f commit 5f1b3c3

File tree

9 files changed

+54
-90
lines changed

9 files changed

+54
-90
lines changed

docs/tutorials/quickstart.ipynb

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,25 +46,18 @@
4646
]
4747
},
4848
{
49+
"metadata": {},
4950
"cell_type": "code",
50-
"metadata": {
51-
"pycharm": {
52-
"name": "#%%\n"
53-
}
54-
},
51+
"outputs": [],
52+
"execution_count": null,
5553
"source": [
5654
"import os\n",
57-
"import warnings\n",
58-
"\n",
59-
"warnings.filterwarnings(\"ignore\")\n",
6055
"\n",
6156
"os.environ[\"MDIO__IMPORT__CLOUD_NATIVE\"] = \"true\"\n",
6257
"os.environ[\"MDIO__IMPORT__SAVE_SEGY_FILE_HEADER\"] = \"true\"\n",
6358
"\n",
6459
"input_url = \"http://s3.amazonaws.com/teapot/filt_mig.sgy\""
65-
],
66-
"outputs": [],
67-
"execution_count": null
60+
]
6861
},
6962
{
7063
"cell_type": "markdown",

src/mdio/__main__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
KNOWN_MODULES = ["segy.py", "copy.py", "info.py"]
1717

1818

19-
class MyCLI(click.MultiCommand):
19+
class MyCLI(click.Group):
2020
"""CLI generator via plugin design pattern.
2121
2222
This class dynamically loads command modules from the specified `plugin_folder`. If the
@@ -26,8 +26,8 @@ class MyCLI(click.MultiCommand):
2626
2727
Args:
2828
plugin_folder: Path to the directory containing command modules
29-
*args: Variable length argument list passed to the click.MultiCommand.
30-
**kwargs: Arbitrary keyword arguments passed to the click.MultiCommand.
29+
*args: Variable length argument list passed to the click.Group.
30+
**kwargs: Arbitrary keyword arguments passed to the click.Group.
3131
"""
3232

3333
def __init__(self, plugin_folder: Path, *args: Any, **kwargs: Any): # noqa: ANN401

src/mdio/api/io.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from xarray.backends.api import to_zarr as xr_to_zarr
1414

1515
from mdio.constants import ZarrFormat
16+
from mdio.core.zarr_io import zarr_warnings_suppress_unstable_structs_v3
1617

1718
if TYPE_CHECKING:
1819
from collections.abc import Mapping
@@ -51,11 +52,13 @@ def open_mdio(input_path: UPath | Path | str, chunks: T_Chunks = None) -> xr_Dat
5152
input_path = _normalize_path(input_path)
5253
storage_options = _normalize_storage_options(input_path)
5354
zarr_format = zarr.config.get("default_zarr_format")
55+
5456
return xr_open_zarr(
5557
input_path.as_posix(),
5658
chunks=chunks,
5759
storage_options=storage_options,
5860
mask_and_scale=zarr_format == ZarrFormat.V3, # off for v2, on for v3
61+
consolidated=zarr_format == ZarrFormat.V2, # on for v2, off for v3
5962
)
6063

6164

@@ -86,13 +89,15 @@ def to_mdio( # noqa: PLR0913
8689
output_path = _normalize_path(output_path)
8790
storage_options = _normalize_storage_options(output_path)
8891
zarr_format = zarr.config.get("default_zarr_format")
89-
xr_to_zarr(
90-
dataset,
91-
store=output_path.as_posix(), # xarray doesn't like URI when file:// is protocol
92-
mode=mode,
93-
compute=compute,
94-
consolidated=zarr_format == ZarrFormat.V2, # off for v3, on for v2
95-
region=region,
96-
storage_options=storage_options,
97-
write_empty_chunks=False,
98-
)
92+
93+
with zarr_warnings_suppress_unstable_structs_v3():
94+
xr_to_zarr(
95+
dataset,
96+
store=output_path.as_posix(), # xarray doesn't like URI when file:// is protocol
97+
mode=mode,
98+
compute=compute,
99+
consolidated=zarr_format == ZarrFormat.V2, # on for v2, off for v3
100+
region=region,
101+
storage_options=storage_options,
102+
write_empty_chunks=False,
103+
)

src/mdio/builder/schemas/v1/units.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from mdio.builder.schemas.units import create_unit_model
1111

1212
ureg = UnitRegistry()
13-
ureg.default_format = "~C" # compact, abbreviated (symbol).
13+
ureg.formatter.default_format = "~C" # compact, abbreviated (symbol).
1414

1515

1616
class LengthUnitEnum(UnitEnum):

src/mdio/builder/xarray_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ def to_xarray_dataset(mdio_ds: Dataset) -> xr_Dataset: # noqa: PLR0912
222222

223223
encoding = {
224224
"chunks": original_chunks,
225-
"compressor": _convert_compressor(v.compressor),
225+
"compressors": _convert_compressor(v.compressor),
226226
fill_value_key: fill_value,
227227
}
228228

src/mdio/core/zarr_io.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""Utilities to open/write Zarr files."""
2+
3+
from __future__ import annotations
4+
5+
import warnings
6+
from contextlib import contextmanager
7+
from typing import TYPE_CHECKING
8+
9+
from zarr.errors import UnstableSpecificationWarning
10+
11+
if TYPE_CHECKING:
12+
from collections.abc import Generator
13+
14+
15+
@contextmanager
16+
def zarr_warnings_suppress_unstable_structs_v3() -> Generator[None, None, None]:
17+
"""Context manager to suppress Zarr V3 unstable structured array warning."""
18+
warn = r"The data type \((.*?)\) does not have a Zarr V3 specification\."
19+
warnings.filterwarnings("ignore", message=warn, category=UnstableSpecificationWarning)
20+
try:
21+
yield
22+
finally:
23+
pass

src/mdio/segy/utilities.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ def find_trailing_ones_index(dim_blocks: tuple[int, ...]) -> int:
112112
return total_dims - trailing_ones_count
113113

114114

115+
# TODO (Dmitriy Repin): Investigate the following warning generated at test_3d_export
116+
# https://github.com/TGSAI/mdio-python/issues/657
117+
# "The specified chunks separate the stored chunks along dimension "inline" starting at index 256.
118+
# This could degrade performance. Instead, consider rechunking after loading."
115119
def segy_export_rechunker(
116120
chunks: dict[str, int],
117121
sizes: dict[str, int],

tests/unit/test_indexing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ def mock_trace_worker(
9191
# We used a 2D selection with 2D index_slices
9292
assert grid_map.shape == (3, 4, 20)
9393
# We used a 3D selection with isel()
94-
assert tuple(dataset.dims[d] for d in region) == (3, 4, 5)
94+
assert tuple(dataset.sizes[d] for d in region) == (3, 4, 5)
9595

96-
dimension_names = list(dataset.dims)
96+
dimension_names = list(dataset.sizes)
9797

9898
slice0 = region[dimension_names[0]]
9999
slice1 = region[dimension_names[1]]

tests/unit/v1/test_dataset_serializer.py

Lines changed: 1 addition & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
import numpy as np
66
import pytest
7-
from xarray import DataArray as xr_DataArray
8-
from zarr import zeros as zarr_zeros
97
from zarr.codecs import BloscCodec
108

119
from mdio import to_mdio
@@ -295,7 +293,7 @@ def test_to_xarray_dataset(tmp_path: Path) -> None:
295293
xr_ds = to_xarray_dataset(dataset)
296294

297295
file_path = output_path(tmp_path, f"{xr_ds.attrs['name']}", debugging=False)
298-
xr_ds.to_zarr(store=file_path, mode="w", compute=False)
296+
to_mdio(dataset=xr_ds, output_path=file_path, mode="w", compute=False)
299297

300298

301299
def test_seismic_poststack_3d_acceptance_to_xarray_dataset(tmp_path: Path) -> None:
@@ -306,62 +304,3 @@ def test_seismic_poststack_3d_acceptance_to_xarray_dataset(tmp_path: Path) -> No
306304

307305
file_path = output_path(tmp_path, f"{xr_ds.attrs['name']}", debugging=False)
308306
to_mdio(xr_ds, output_path=file_path, mode="w-", compute=False)
309-
310-
311-
def test_to_zarr_from_zarr_zeros_1(tmp_path: Path) -> None:
312-
"""Test writing XArray dataset with data as Zarr zero array to Zarr.
313-
314-
Set encoding in as DataArray attributes
315-
"""
316-
# Create a data type and the fill value
317-
dtype = np.dtype([("inline", "int32"), ("cdp_x", "float64")])
318-
319-
my_attr_encoding = {"fill_value": np.void((0, 0), dtype=dtype)}
320-
321-
# Create a zarr array using the data type,
322-
# Specify encoding as the array attribute
323-
data = zarr_zeros((36, 36), dtype=dtype)
324-
aa = xr_DataArray(name="myattr", data=data)
325-
aa.encoding = my_attr_encoding
326-
327-
file_path = output_path(tmp_path, "to_zarr/zarr_zarr_zerros_1", debugging=False)
328-
aa.to_zarr(file_path, mode="w", compute=False)
329-
330-
331-
def test_to_zarr_from_zarr_zeros_2(tmp_path: Path) -> None:
332-
"""Test writing XArray dataset with data as Zarr zero array to Zarr.
333-
334-
Set encoding in the to_zar method
335-
"""
336-
# Create a data type and the fill value
337-
dtype = np.dtype([("inline", "int32"), ("cdp_x", "float64")])
338-
339-
my_attr_encoding = {"fill_value": np.void((0, 0), dtype=dtype)}
340-
341-
# Create a zarr array using the data type,
342-
# Do not specify encoding as the array attribute
343-
data = zarr_zeros((36, 36), dtype=dtype)
344-
aa = xr_DataArray(name="myattr", data=data)
345-
346-
file_path = output_path(tmp_path, "to_zarr/zarr_zarr_zerros_2", debugging=False)
347-
# Specify encoding per array
348-
encoding = {"myattr": my_attr_encoding}
349-
aa.to_zarr(file_path, mode="w", encoding=encoding, compute=False)
350-
351-
352-
def test_to_zarr_from_np(tmp_path: Path) -> None:
353-
"""Test writing XArray dataset with data as NumPy array to Zarr."""
354-
# Create a data type and the fill value
355-
dtype = np.dtype([("inline", "int32"), ("cdp_x", "float64")])
356-
357-
my_attr_encoding = {"fill_value": np.void((0, 0), dtype=dtype)}
358-
359-
# Create a zarr array using the data type
360-
# Do not specify encoding as the array attribute
361-
data = np.zeros((36, 36), dtype=dtype)
362-
aa = xr_DataArray(name="myattr", data=data)
363-
364-
file_path = output_path(tmp_path, "to_zarr/zarr_np", debugging=False)
365-
# Specify encoding per array
366-
encoding = {"myattr": my_attr_encoding}
367-
aa.to_zarr(file_path, mode="w", encoding=encoding, compute=False)

0 commit comments

Comments
 (0)