Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/2962.fix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Internally use `typesize` constructor parameter for :class:`numcodecs.blosc.Blosc` to improve compression ratios back to the v2-package levels.
4 changes: 2 additions & 2 deletions docs/user-guide/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ prints additional diagnostics, e.g.::
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
No. bytes : 400000000 (381.5M)
No. bytes stored : 9696520
Storage ratio : 41.3
No. bytes stored : 3558573
Storage ratio : 112.4
Chunks Initialized : 100

.. note::
Expand Down
4 changes: 4 additions & 0 deletions src/zarr/codecs/blosc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numcodecs
from numcodecs.blosc import Blosc
from packaging.version import Version

from zarr.abc.codec import BytesBytesCodec
from zarr.core.buffer.cpu import as_numpy_array_wrapper
Expand Down Expand Up @@ -163,6 +164,9 @@ def _blosc_codec(self) -> Blosc:
"shuffle": map_shuffle_str_to_int[self.shuffle],
"blocksize": self.blocksize,
}
# See https://github.com/zarr-developers/numcodecs/pull/713
if Version(numcodecs.__version__) >= Version("0.16.0"):
config_dict["typesize"] = self.typesize
return Blosc.from_config(config_dict)

async def _decode_single(
Expand Down
19 changes: 19 additions & 0 deletions tests/test_codecs/test_blosc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json

import numcodecs
import numpy as np
import pytest
from packaging.version import Version

import zarr
from zarr.abc.store import Store
Expand Down Expand Up @@ -54,3 +56,20 @@ async def test_blosc_evolve(store: Store, dtype: str) -> None:
assert blosc_configuration_json["shuffle"] == "bitshuffle"
else:
assert blosc_configuration_json["shuffle"] == "shuffle"


async def test_typesize() -> None:
a = np.arange(1000000, dtype=np.uint64)
codecs = [zarr.codecs.BytesCodec(), zarr.codecs.BloscCodec()]
z = zarr.array(a, chunks=(10000), codecs=codecs)
data = await z.store.get("c/0", prototype=default_buffer_prototype())
assert data is not None
bytes = data.to_bytes()
size = len(bytes)
msg = f"Blosc size mismatch. First 10 bytes: {bytes[:20]!r} and last 10 bytes: {bytes[-20:]!r}"
if Version(numcodecs.__version__) >= Version("0.16.0"):
expected_size = 402
assert size == expected_size, msg
else:
expected_size = 10216
assert size == expected_size, msg