Skip to content

Commit ffb4590

Browse files
committed
fix codec related issues and warning spamming
1 parent 0916f7d commit ffb4590

16 files changed

+55
-83
lines changed

src/mdio/api/convenience.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
from typing import TYPE_CHECKING
66

77
import zarr
8-
from numcodecs.zarr3 import Blosc
98
from tqdm.auto import tqdm
9+
from zarr.codecs import BloscCodec
1010

1111
from mdio import MDIOReader
1212
from mdio import MDIOWriter
@@ -134,8 +134,8 @@ def create_rechunk_plan(
134134
metadata_arrs = []
135135
data_arrs = []
136136

137-
header_compressor = Blosc(cname="zstd")
138-
trace_compressor = Blosc(cname="zstd") if compressors is None else compressors
137+
header_compressor = BloscCodec(cname="zstd")
138+
trace_compressor = BloscCodec(cname="zstd") if compressors is None else compressors
139139

140140
for chunks, suffix in zip(chunks_list, suffix_list, strict=True):
141141
norm_chunks = tuple(min(chunk, size) for chunk, size in zip(chunks, source.shape, strict=True))

src/mdio/core/factory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@
2525
from typing import Any
2626

2727
import zarr
28-
from numcodecs.zarr3 import Blosc
2928
from numpy.typing import DTypeLike
3029
from zarr import Group
3130
from zarr import open_group
31+
from zarr.codecs import BloscCodec
3232
from zarr.core.array import CompressorsLike
3333

3434
from mdio.api.accessor import MDIOWriter
@@ -160,7 +160,7 @@ def create_empty(
160160
name=f"{variable.name}_trace_headers",
161161
shape=config.grid.shape[:-1], # Same spatial shape as data
162162
chunks=variable.chunks[:-1], # Same spatial chunks as data
163-
compressors=Blosc(cname="zstd"),
163+
compressors=BloscCodec(cname="zstd"),
164164
dtype=header_dtype,
165165
)
166166

src/mdio/schemas/compressors.py

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -7,51 +7,25 @@
77

88
from __future__ import annotations
99

10-
from enum import IntEnum
1110
from enum import StrEnum
1211

1312
from pydantic import Field
1413
from pydantic import model_validator
14+
from zarr.codecs import BloscCname
15+
from zarr.codecs import BloscShuffle
1516

1617
from mdio.schemas.core import CamelCaseStrictModel
1718

1819

19-
class BloscAlgorithm(StrEnum):
20-
"""Enum for Blosc algorithm options."""
21-
22-
BLOSCLZ = "blosclz"
23-
LZ4 = "lz4"
24-
LZ4HC = "lz4hc"
25-
ZLIB = "zlib"
26-
ZSTD = "zstd"
27-
28-
29-
class BloscShuffle(IntEnum):
30-
"""Enum for Blosc shuffle options."""
31-
32-
NOSHUFFLE = 0
33-
SHUFFLE = 1
34-
BITSHUFFLE = 2
35-
AUTOSHUFFLE = -1
36-
37-
3820
class Blosc(CamelCaseStrictModel):
3921
"""Data Model for Blosc options."""
4022

4123
name: str = Field(default="blosc", description="Name of the compressor.")
42-
algorithm: BloscAlgorithm = Field(
43-
default=BloscAlgorithm.LZ4,
44-
description="The Blosc compression algorithm to be used.",
45-
)
46-
level: int = Field(default=5, ge=0, le=9, description="The compression level.")
47-
shuffle: BloscShuffle = Field(
48-
default=BloscShuffle.SHUFFLE,
49-
description="The shuffle strategy to be applied before compression.",
50-
)
51-
blocksize: int = Field(
52-
default=0,
53-
description="The size of the block to be used for compression.",
54-
)
24+
cname: BloscCname = Field(default=BloscCname.zstd, description="Compression algorithm name.")
25+
clevel: int = Field(default=5, ge=0, le=9, description="Compression level (integer 0–9)")
26+
shuffle: BloscShuffle | None = Field(default=None, description="Shuffling mode before compression.")
27+
typesize: int | None = Field(default=None, description="The size in bytes that the shuffle is performed over.")
28+
blocksize: int = Field(default=0, description="The size (in bytes) of blocks to divide data before compression.")
5529

5630

5731
zfp_mode_map = {

src/mdio/schemas/v1/dataset_serializer.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
import numpy as np
44
from dask import array as dask_array
5-
from numcodecs.zarr3 import Blosc as nc_Blosc
65
from xarray import DataArray as xr_DataArray
76
from xarray import Dataset as xr_Dataset
7+
from zarr.codecs import BloscCodec
88

99
from mdio.converters.type_converter import to_numpy_dtype
1010

@@ -120,18 +120,13 @@ def _get_zarr_chunks(var: Variable, all_named_dims: dict[str, NamedDimension]) -
120120

121121
def _convert_compressor(
122122
compressor: mdio_Blosc | mdio_ZFP | None,
123-
) -> nc_Blosc | zfpy_ZFPY | None:
123+
) -> BloscCodec | zfpy_ZFPY | None:
124124
"""Convert a compressor to a numcodecs compatible format."""
125125
if compressor is None:
126126
return None
127127

128128
if isinstance(compressor, mdio_Blosc):
129-
return nc_Blosc(
130-
cname=compressor.algorithm,
131-
clevel=compressor.level,
132-
shuffle=compressor.shuffle.value,
133-
blocksize=compressor.blocksize if compressor.blocksize > 0 else 0,
134-
)
129+
return BloscCodec(**compressor.model_dump(exclude={"name"}))
135130

136131
if isinstance(compressor, mdio_ZFP):
137132
if zfpy_ZFPY is None:

src/mdio/schemas/v1/templates/abstract_dataset_template.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def _add_trace_mask(self) -> None:
199199
name="trace_mask",
200200
dimensions=self._dim_names[:-1], # All dimensions except vertical (the last one)
201201
data_type=ScalarType.BOOL,
202-
compressor=compressors.Blosc(algorithm=compressors.BloscAlgorithm.ZSTD),
202+
compressor=compressors.Blosc(cname=compressors.BloscCname.zstd), # also default in zarr3
203203
coordinates=self._coord_names,
204204
metadata_info=None,
205205
)
@@ -212,7 +212,7 @@ def _add_trace_headers(self, headers: StructuredType) -> None:
212212
name="headers",
213213
dimensions=self._dim_names[:-1], # All dimensions except vertical (the last one)
214214
data_type=headers,
215-
compressor=compressors.Blosc(algorithm=compressors.BloscAlgorithm.ZSTD),
215+
compressor=compressors.Blosc(cname=compressors.BloscCname.zstd), # also default in zarr3
216216
coordinates=self._coord_names,
217217
metadata_info=[
218218
ChunkGridMetadata(
@@ -231,7 +231,7 @@ def _add_variables(self) -> None:
231231
name=self.default_variable_name,
232232
dimensions=self._dim_names,
233233
data_type=ScalarType.FLOAT32,
234-
compressor=compressors.Blosc(algorithm=compressors.BloscAlgorithm.ZSTD),
234+
compressor=compressors.Blosc(cname=compressors.BloscCname.zstd), # also default in zarr3
235235
coordinates=self._coord_names,
236236
metadata_info=[
237237
ChunkGridMetadata(

src/mdio/segy/blocked_io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def to_zarr( # noqa: PLR0913, PLR0915
5757
grid_map: zarr_Array,
5858
dataset: xr_Dataset,
5959
data_variable_name: str,
60-
) -> None:
60+
) -> SummaryStatistics:
6161
"""Blocked I/O from SEG-Y to chunked `xarray.Dataset`.
6262
6363
Args:

tests/unit/test_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"name": "actual_variable",
1414
"data_type": "float32",
1515
"dimensions": ["dim0", "dim1"],
16-
"compressor": {"name": "blosc", "level": 3},
16+
"compressor": {"name": "blosc", "clevel": 3},
1717
"coordinates": ["coord"],
1818
"metadata": {
1919
"chunk_grid": {

tests/unit/v1/helpers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from mdio.schemas.chunk_grid import RegularChunkGrid
44
from mdio.schemas.chunk_grid import RegularChunkShape
55
from mdio.schemas.compressors import Blosc
6+
from mdio.schemas.compressors import BloscCname
67
from mdio.schemas.dtype import ScalarType
78
from mdio.schemas.dtype import StructuredField
89
from mdio.schemas.dtype import StructuredType
@@ -200,7 +201,7 @@ def make_seismic_poststack_3d_acceptance_dataset(dataset_name: str) -> Dataset:
200201
name="image",
201202
dimensions=["inline", "crossline", "depth"],
202203
data_type=ScalarType.FLOAT32,
203-
compressor=Blosc(algorithm="zstd"),
204+
compressor=Blosc(cname=BloscCname.zstd), # also default in zarr3
204205
coordinates=["cdp_x", "cdp_y"],
205206
metadata_info=[
206207
ChunkGridMetadata(
@@ -238,7 +239,7 @@ def make_seismic_poststack_3d_acceptance_dataset(dataset_name: str) -> Dataset:
238239
long_name="inline optimized version of 3d_stack",
239240
dimensions=["inline", "crossline", "depth"],
240241
data_type=ScalarType.FLOAT32,
241-
compressor=Blosc(algorithm="zstd"),
242+
compressor=Blosc(cname=BloscCname.zstd), # also default in zarr3
242243
coordinates=["cdp_x", "cdp_y"],
243244
metadata_info=[
244245
ChunkGridMetadata(chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=[4, 512, 512])))

tests/unit/v1/templates/test_seismic_3d_poststack.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from mdio.schemas.chunk_grid import RegularChunkGrid
66
from mdio.schemas.compressors import Blosc
7+
from mdio.schemas.compressors import BloscCname
78
from mdio.schemas.dtype import ScalarType
89
from mdio.schemas.dtype import StructuredType
910
from mdio.schemas.v1.dataset import Dataset
@@ -181,7 +182,7 @@ def test_build_dataset_depth(self, structured_headers: StructuredType) -> None:
181182
dtype=ScalarType.FLOAT32,
182183
)
183184
assert isinstance(seismic.compressor, Blosc)
184-
assert seismic.compressor.algorithm == "zstd"
185+
assert seismic.compressor.cname == BloscCname.zstd
185186
assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid)
186187
assert seismic.metadata.chunk_grid.configuration.chunk_shape == [128, 128, 128]
187188
assert seismic.metadata.stats_v1 is None
@@ -214,7 +215,7 @@ def test_build_dataset_time(self, structured_headers: StructuredType) -> None:
214215
dtype=ScalarType.FLOAT32,
215216
)
216217
assert isinstance(seismic.compressor, Blosc)
217-
assert seismic.compressor.algorithm == "zstd"
218+
assert seismic.compressor.cname == BloscCname.zstd
218219
assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid)
219220
assert seismic.metadata.chunk_grid.configuration.chunk_shape == [128, 128, 128]
220221
assert seismic.metadata.stats_v1 is None

tests/unit/v1/templates/test_seismic_3d_prestack_cdp.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from mdio.schemas.chunk_grid import RegularChunkGrid
66
from mdio.schemas.compressors import Blosc
7+
from mdio.schemas.compressors import BloscCname
78
from mdio.schemas.dtype import ScalarType
89
from mdio.schemas.dtype import StructuredType
910
from mdio.schemas.v1.dataset import Dataset
@@ -192,7 +193,7 @@ def test_build_dataset_depth(self, structured_headers: StructuredType) -> None:
192193
dtype=ScalarType.FLOAT32,
193194
)
194195
assert isinstance(seismic.compressor, Blosc)
195-
assert seismic.compressor.algorithm == "zstd"
196+
assert seismic.compressor.cname == BloscCname.zstd
196197
assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid)
197198
assert seismic.metadata.chunk_grid.configuration.chunk_shape == [1, 1, 512, 4096]
198199
assert seismic.metadata.stats_v1 is None
@@ -225,7 +226,7 @@ def test_build_dataset_time(self, structured_headers: StructuredType) -> None:
225226
dtype=ScalarType.FLOAT32,
226227
)
227228
assert isinstance(seismic.compressor, Blosc)
228-
assert seismic.compressor.algorithm == "zstd"
229+
assert seismic.compressor.cname == BloscCname.zstd
229230
assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid)
230231
assert seismic.metadata.chunk_grid.configuration.chunk_shape == [1, 1, 512, 4096]
231232
assert seismic.metadata.stats_v1 is None

0 commit comments

Comments
 (0)