diff --git a/.github/workflows/python-package-binaries.yml b/.github/workflows/python-package-binaries.yml index 4ad3a5c..e573d45 100644 --- a/.github/workflows/python-package-binaries.yml +++ b/.github/workflows/python-package-binaries.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-22.04", "ubuntu-24.04"] - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.13"] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/.github/workflows/python-package-cython.yml b/.github/workflows/python-package-cython.yml index 6af0a6c..91d906a 100644 --- a/.github/workflows/python-package-cython.yml +++ b/.github/workflows/python-package-cython.yml @@ -8,13 +8,14 @@ on: jobs: build-and-test: - name: Test on ${{ matrix.os }} - Python ${{ matrix.python-version }} + name: Test on ${{ matrix.os }} - Python ${{ matrix.python-version }} - Zarr ${{ matrix.zarr-version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest"] - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.11", "3.13"] + zarr-version: ["2.18.4", "3.1.5"] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -45,6 +46,9 @@ jobs: - name: Install dependencies run: | pip install .[test] + - name: Install Zarr + run: | + pip install zarr==${{ matrix.zarr-version }} - name: Test imports and version run: | pytest -s tests/test_imports.py diff --git a/.github/workflows/python-package-multi-threading.yml b/.github/workflows/python-package-multi-threading.yml index 535bf59..90b002d 100644 --- a/.github/workflows/python-package-multi-threading.yml +++ b/.github/workflows/python-package-multi-threading.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest"] - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.13"] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/.github/workflows/python-package-no-cython.yml b/.github/workflows/python-package-no-cython.yml index b8db324..337ea67 100644 --- a/.github/workflows/python-package-no-cython.yml +++ b/.github/workflows/python-package-no-cython.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.13"] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/README.md b/README.md index 5b5fea0..021c564 100644 --- a/README.md +++ b/README.md @@ -68,16 +68,24 @@ data = ... # any numpy array # instantiate WavPack compressor wv_compressor = WavPack(level=2, bps=None) -z = zarr.array(data, compressor=wv_compressor) +# v2 +group = zarr.group() +z = group.create_dataset(name="wv_dset", data=data, compressor=wv_compressor) + +# v3 +zarr.config.set({"default_zarr_version": 3}) +group = zarr.group() +z = group.create(name="wv_dset3", data=data, codecs=[wv_compressor]) data_read = z[:] ``` Available `**kwargs` can be browsed with: `WavPack?` -**NOTE:** -In order to reload in zarr an array saved with the `WavPack`, you just need to have the `wavpack_numcodecs` package +> **_NOTE 1:_** In order to reload in zarr an array saved with the `WavPack`, you just need to have the `wavpack_numcodecs` package installed. +> **_NOTE 2:_** The Zarr v3 implementation is an `ArrayBytesCodec`. The `zarr.create_array` function only supports `ArrayArrayCodec` objects for `filters` and `BytesBytesCodecs` for `compressors`. Hence, we need to use the `zarr.create` function instead, which support any list of codecs (including `ArrayBytesCodec` objects). + # Developmers guide ## How to upgrade WavPack installation and make a new release diff --git a/src/wavpack_numcodecs/__init__.py b/src/wavpack_numcodecs/__init__.py index f665ab2..a2fd792 100644 --- a/src/wavpack_numcodecs/__init__.py +++ b/src/wavpack_numcodecs/__init__.py @@ -1,4 +1,28 @@ -from wavpack_numcodecs.wavpack import WavPack, wavpack_version +import importlib.metadata +import importlib.util +import numcodecs +from packaging.version import parse + +from wavpack_numcodecs.wavpack import wavpack_version + + +HAVE_ZARR = importlib.util.find_spec("zarr") is not None + +USE_ZARR_V3 = False +if HAVE_ZARR: + import zarr + + if parse(zarr.__version__) >= parse("3.0.0"): + USE_ZARR_V3 = True + +if USE_ZARR_V3: + from zarr.registry import register_codec + from wavpack_numcodecs.wavpackv3 import WavPack +else: + from numcodecs import register_codec + from wavpack_numcodecs.wavpack import WavPack + +register_codec("wavpack", WavPack) from .globals import ( get_num_decoding_threads, @@ -8,6 +32,5 @@ set_num_decoding_threads, set_num_encoding_threads, ) -import importlib.metadata -__version__ = importlib.metadata.version("wavpack_numcodecs") \ No newline at end of file +__version__ = importlib.metadata.version("wavpack_numcodecs") diff --git a/src/wavpack_numcodecs/wavpack.pyx b/src/wavpack_numcodecs/wavpack.pyx index e92e0fd..e8c8274 100644 --- a/src/wavpack_numcodecs/wavpack.pyx +++ b/src/wavpack_numcodecs/wavpack.pyx @@ -329,6 +329,3 @@ class WavPack(Codec): def decode(self, buf, out=None): buf = ensure_contiguous_ndarray(buf, self.max_buffer_size) return decompress(buf, out, self.num_decoding_threads) - - -numcodecs.register_codec(WavPack) diff --git a/src/wavpack_numcodecs/wavpackv3.py b/src/wavpack_numcodecs/wavpackv3.py new file mode 100644 index 0000000..3649561 --- /dev/null +++ b/src/wavpack_numcodecs/wavpackv3.py @@ -0,0 +1,97 @@ +from zarr.abc.codec import ArrayBytesCodec +from zarr.core.buffer import Buffer, BufferPrototype +from zarr.core.common import BytesLike +from wavpack_numcodecs.wavpack import WavPack as WavPackV2 +import numpy as np +import asyncio +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from zarr.core.array_spec import ArraySpec + + +class WavPack(ArrayBytesCodec): + def __init__( + self, + level: int = 1, + bps: int | None = None, + dynamic_noise_shaping: bool = True, + shaping_weight: float = 0.0, + num_encoding_threads: int = 1, + num_decoding_threads: int = 8, + ): + self._codec = WavPackV2( + level=level, + bps=bps, + dynamic_noise_shaping=dynamic_noise_shaping, + shaping_weight=shaping_weight, + num_encoding_threads=num_encoding_threads, + num_decoding_threads=num_decoding_threads, + ) + + async def _encode_single( + self, + chunk_array: np.ndarray, + chunk_spec: "ArraySpec", + ) -> Buffer | None: + """Encode a single chunk.""" + # Convert to numpy array if it's an NDBuffer + if hasattr(chunk_array, "as_numpy_array"): + chunk_array = chunk_array.as_numpy_array() + elif not isinstance(chunk_array, np.ndarray): + chunk_array = np.asarray(chunk_array) + + encoded = await asyncio.to_thread(self._codec.encode, chunk_array) + return chunk_spec.prototype.buffer.from_bytes(encoded) + + async def _decode_single( + self, + chunk_bytes: Buffer, + chunk_spec: "ArraySpec", + ) -> np.ndarray: + """Decode a single chunk.""" + decoded = await asyncio.to_thread(self._codec.decode, chunk_bytes.to_bytes()) + + # Convert to numpy array if it's bytes + if isinstance(decoded, bytes): + np_dtype = chunk_spec.dtype.to_native_dtype() + decoded = np.frombuffer(decoded, dtype=np_dtype) + + # Ensure it's a numpy array with correct shape + if isinstance(decoded, np.ndarray): + return decoded.reshape(chunk_spec.shape) + else: + raise TypeError(f"Expected numpy array from decode, got {type(decoded)}") + + def compute_encoded_size(self, input_byte_length: int, chunk_spec: "ArraySpec") -> int: + # WavPack compression ratio is variable, so we can't predict the exact size + # Return a conservative estimate + return input_byte_length + + @classmethod + def from_dict(cls, data: dict) -> "WavPack": + """Create codec from configuration dictionary.""" + config = data.get("configuration", {}) + return cls( + level=config.get("level", 1), + bps=config.get("bps"), + dynamic_noise_shaping=config.get("dynamic_noise_shaping", True), + shaping_weight=config.get("shaping_weight", 0.0), + num_encoding_threads=config.get("num_encoding_threads", 1), + num_decoding_threads=config.get("num_decoding_threads", 8), + ) + + def to_dict(self) -> dict: + """Convert codec to configuration dictionary.""" + config = self._codec.get_config() + return { + "name": "wavpack", + "configuration": { + "level": config.get("level", 1), + "bps": config.get("bps"), + "dynamic_noise_shaping": config.get("dynamic_noise_shaping", True), + "shaping_weight": config.get("shaping_weight", 0.0), + "num_encoding_threads": config.get("num_encoding_threads", 1), + "num_decoding_threads": config.get("num_decoding_threads", 8), + }, + } diff --git a/tests/test_imports.py b/tests/test_imports.py index 8a778b8..5de1641 100644 --- a/tests/test_imports.py +++ b/tests/test_imports.py @@ -2,7 +2,7 @@ def test_imports(): from wavpack_numcodecs import wavpack_version print(f"\nWavpack library verison: {wavpack_version}") - from wavpack_numcodecs import WavPack + from wavpack_numcodecs.wavpack import WavPack wv0 = WavPack(level=2) print(wv0) @@ -11,8 +11,8 @@ def test_imports(): def test_global_settings(): + from wavpack_numcodecs.wavpack import WavPack from wavpack_numcodecs import ( - WavPack, get_num_decoding_threads, get_num_encoding_threads, reset_num_decoding_threads, diff --git a/tests/test_wavpack_codec.py b/tests/test_wavpack_codec.py index 092fdc2..52720bb 100644 --- a/tests/test_wavpack_codec.py +++ b/tests/test_wavpack_codec.py @@ -5,10 +5,17 @@ import zarr from packaging.version import parse -from wavpack_numcodecs import WavPack, wavpack_version +from wavpack_numcodecs import wavpack_version +# this is the numcodecs implementation +from wavpack_numcodecs.wavpack import WavPack as WavPackNumcodecs +from wavpack_numcodecs import WavPack DEBUG = False +ZARR_V3 = False +if parse(zarr.__version__) >= parse("3.0.0"): + zarr.config.set({"default_zarr_version": 3}) + ZARR_V3 = True if parse(wavpack_version) >= parse("5.6.4"): print("Multi-threading available") @@ -20,6 +27,8 @@ decode_threads = [1] dtypes = ["int8", "int16", "int32", "float32"] +# dtypes = ["int16", "int32", "float32"] + @pytest.fixture(scope="module") @@ -36,7 +45,7 @@ def run_option(data, level, bps, dns, shaping_weight, e_thr, d_thr): f"Dtype {dtype} - level {level} - bps {bps} - dns {dns} - shaping_weight {shaping_weight} - " f"e. threads {e_thr} - d. threads {d_thr}" ) - cod = WavPack( + cod = WavPackNumcodecs( level=level, bps=bps, dynamic_noise_shaping=dns, @@ -94,20 +103,20 @@ def test_wavpack_multi_threading_enabled(): # Should NOT warn! with warnings.catch_warnings(): warnings.simplefilter("error") - wv = WavPack(num_encoding_threads=4, num_decoding_threads=1) - wv = WavPack(num_encoding_threads=1, num_decoding_threads=4) - wv = WavPack(num_encoding_threads=4, num_decoding_threads=4) + wv = WavPackNumcodecs(num_encoding_threads=4, num_decoding_threads=1) + wv = WavPackNumcodecs(num_encoding_threads=1, num_decoding_threads=4) + wv = WavPackNumcodecs(num_encoding_threads=4, num_decoding_threads=4) @pytest.mark.numcodecs @pytest.mark.skipif(parse(wavpack_version) >= parse("5.6.4"), reason="Multi-threading available") def test_wavpack_multi_threading_disabled(): # Should warn! with pytest.warns(UserWarning) as w: - wv = WavPack(num_encoding_threads=4, num_decoding_threads=1) + wv = WavPackNumcodecs(num_encoding_threads=4, num_decoding_threads=1) with pytest.warns(UserWarning) as w: - wv = WavPack(num_encoding_threads=1, num_decoding_threads=4) + wv = WavPackNumcodecs(num_encoding_threads=1, num_decoding_threads=4) with pytest.warns(UserWarning) as w: - wv = WavPack(num_encoding_threads=4, num_decoding_threads=4) + wv = WavPackNumcodecs(num_encoding_threads=4, num_decoding_threads=4) @pytest.mark.parametrize("dtype", dtypes) @@ -140,8 +149,9 @@ def test_wavpack_noise_shaping(generate_test_data, dtype, dns, shaping_weight): @pytest.mark.parametrize("dtype", dtypes) @pytest.mark.parametrize("bps", [None, 3]) +@pytest.mark.skipif(ZARR_V3, reason="These are tests for Zarr V2") @pytest.mark.zarr -def test_wavpack_zarr(generate_test_data, bps, dtype): +def test_wavpack_zarr_v2(generate_test_data, bps, dtype): print(f"\n\nZARR: testing dtype {dtype}\n\n") test_signals = generate_test_data[dtype] @@ -204,8 +214,123 @@ def test_wavpack_zarr(generate_test_data, bps, dtype): np.testing.assert_array_equal(z[:], test_sig) +@pytest.mark.parametrize("dtype", dtypes) +@pytest.mark.parametrize("bps", [None, 3]) +@pytest.mark.skipif(not ZARR_V3, reason="These are tests for Zarr V3") +@pytest.mark.zarr +def test_wavpack_zarr_v2(generate_test_data, bps, dtype): + print(f"\n\nZARR: testing dtype {dtype}\n\n") + test_signals = generate_test_data[dtype] + + for test_sig in test_signals: + compressor = WavPack(bps=bps) + + print(f"signal shape: {test_sig.shape} - bps: {bps}") + if test_sig.ndim == 1: + z = zarr.create( + shape=test_sig.shape, + chunks=test_sig.shape, + dtype=test_sig.dtype, + codecs=[compressor], + ) + z[:] = test_sig + assert z[:].shape == test_sig.shape + assert z[:100].shape == test_sig[:100].shape + assert z.nbytes > z.nbytes_stored() + if bps is None: + np.testing.assert_array_equal(z[:], test_sig) + + z = zarr.create( + shape=test_sig.shape, + chunks=(1000,), + dtype=test_sig.dtype, + codecs=[compressor], + ) + z[:] = test_sig + assert z[:].shape == test_sig.shape + assert z[:100].shape == test_sig[:100].shape + if bps is None: + np.testing.assert_array_equal(z[:], test_sig) + + elif test_sig.ndim == 2: + test_sig_shape = test_sig.shape + z = zarr.create( + shape=test_sig_shape, + chunks=test_sig_shape, + dtype=test_sig.dtype, + codecs=[compressor], + ) + z[:] = test_sig + assert z[:].shape == test_sig.shape + assert z[:100, :10].shape == test_sig[:100, :10].shape + assert z.nbytes > z.nbytes_stored() + if bps is None: + np.testing.assert_array_equal(z[:], test_sig) + + z = zarr.create( + shape=test_sig_shape, + chunks=(1000, test_sig_shape[1]), + dtype=test_sig.dtype, + codecs=[compressor], + ) + z[:] = test_sig + assert z[:].shape == test_sig.shape + assert z[:100, :10].shape == test_sig[:100, :10].shape + if bps is None: + np.testing.assert_array_equal(z[:], test_sig) + + z = zarr.create( + shape=test_sig_shape, + chunks=(test_sig_shape[0], 10), + dtype=test_sig.dtype, + codecs=[compressor], + ) + z[:] = test_sig + assert z[:].shape == test_sig.shape + assert z[:100, :10].shape == test_sig[:100, :10].shape + if bps is None: + np.testing.assert_array_equal(z[:], test_sig) + + else: # 3d + test_sig_shape = test_sig.shape + z = zarr.create( + shape=test_sig_shape, + chunks=test_sig_shape, + dtype=test_sig.dtype, + codecs=[compressor], + ) + z[:] = test_sig + assert z[:].shape == test_sig.shape + assert z[:100, :2, :2].shape == test_sig[:100, :2, :2].shape + assert z.nbytes > z.nbytes_stored() + if bps is None: + np.testing.assert_array_equal(z[:], test_sig) + + z = zarr.create( + shape=test_sig_shape, + chunks=(1000, 2, test_sig_shape[2]), + dtype=test_sig.dtype, + codecs=[compressor], + ) + z[:] = test_sig + assert z[:].shape == test_sig.shape + assert z[:100, :2, :2].shape == test_sig[:100, :2, :2].shape + if bps is None: + np.testing.assert_array_equal(z[:], test_sig) + + z = zarr.create( + shape=test_sig_shape, + chunks=(test_sig_shape[0], 2, 3), + dtype=test_sig.dtype, + codecs=[compressor], + ) + z[:] = test_sig + assert z[:].shape == test_sig.shape + assert z[:100, :2, :2].shape == test_sig[:100, :2, :2].shape + if bps is None: + np.testing.assert_array_equal(z[:], test_sig) + if __name__ == "__main__": test_wavpack_numcodecs() test_wavpack_multi_threading_enabled() test_wavpack_multi_threading_disabled() - test_wavpack_zarr()