diff --git a/doc/usage.rst b/doc/usage.rst index 9fabd108..814b5934 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -130,6 +130,34 @@ Zstd :members: :undoc-members: + +Get dataset compression ++++++++++++++++++++++++ + +For compression filters provided by HDF5 and `h5py`_ (i.e., GZIP, LZF, SZIP), +dataset compression configuration can be retrieved with `h5py.Dataset`_'s +`compression `_ and +`compression_opts `_ properties. + +For third-party compression filters such as the one supported by `hdf5plugin`, +the dataset compression configuration is stored in HDF5 +`filter pipeline `_. +This filter pipeline configuration can be retrieved with `h5py.Dataset`_ "low level" API. +For a given `h5py.Dataset`_, ``dataset``: + +.. code-block:: python + + create_plist = dataset.id.get_create_plist() + + for index in range(create_plist.get_nfilters()): + filter_id, _, filter_options, _ = create_plist.get_filter(index) + print(filter_id, filter_options) + +For compression filters supported by `hdf5plugin`, +:func:`hdf5plugin.from_filter_options` instantiates the filter configuration from the filter id and options. + +.. autofunction:: from_filter_options + Get information about hdf5plugin ++++++++++++++++++++++++++++++++ @@ -176,3 +204,4 @@ Setting the ``HDF5_PLUGIN_PATH`` environment variable allows already existing pr .. _h5py: https://www.h5py.org .. _h5py.h5z: https://github.com/h5py/h5py/blob/master/h5py/h5z.pyx .. _h5py.Group.create_dataset: https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset +.. _h5py.Dataset: https://docs.h5py.org/en/stable/high/dataset.html diff --git a/src/hdf5plugin/__init__.py b/src/hdf5plugin/__init__.py index 23e0287c..8043b67e 100644 --- a/src/hdf5plugin/__init__.py +++ b/src/hdf5plugin/__init__.py @@ -52,7 +52,13 @@ Zfp, Zstd, ) -from ._utils import PLUGIN_PATH, get_config, get_filters, register # noqa +from ._utils import ( # noqa + PLUGIN_PATH, + from_filter_options, + get_config, + get_filters, + register, +) from ._version import version # noqa # Backward compatibility diff --git a/src/hdf5plugin/_filters.py b/src/hdf5plugin/_filters.py index babc52dc..245e7515 100644 --- a/src/hdf5plugin/_filters.py +++ b/src/hdf5plugin/_filters.py @@ -73,6 +73,22 @@ class FilterBase(h5py.filters.FilterRefBase): filter_id: int filter_name: str + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> FilterBase: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :raises ValueError: Unsupported filter_options + :raises NotImplementedError: Support of filter_options version is not implemented + """ + raise NotImplementedError() + + +def _cname_from_id(compression_id: int, compressions: dict[str, int]) -> str: + for cname, cid in compressions.items(): + if compression_id == cid: + return cname + raise ValueError(f"Unsupported compression id: {compression_id}") + class Bitshuffle(FilterBase): """``h5py.Group.create_dataset``'s compression arguments for using bitshuffle filter. @@ -147,6 +163,32 @@ def __init__( else: self.filter_options = (nelems, self.__COMPRESSIONS[cname]) + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> Bitshuffle: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :param filter_options: Expected format: + + - Zstd: (_, _, _, nelems, compression_id=3, compression_level) + - LZ4 and no compression: (_, _, _, nelems, compression_id) + + :raises ValueError: Unsupported filter_options + """ + if len(filter_options) <= 3: + return cls(cname="none") + + nelems = filter_options[3] + + if len(filter_options) <= 4: + return cls(nelems, cname="none") + + cname = _cname_from_id(filter_options[4], cls.__COMPRESSIONS) + + if cname == "zstd" and len(filter_options) > 5: + return cls(nelems, cname, clevel=filter_options[5]) + + return cls(nelems, cname) + class Blosc(FilterBase): """``h5py.Group.create_dataset``'s compression arguments for using blosc filter. @@ -203,6 +245,31 @@ def __init__(self, cname: str = "lz4", clevel: int = 5, shuffle: int = SHUFFLE): raise ValueError(f"shuffle={shuffle} is not supported") self.filter_options = (0, 0, 0, 0, clevel, shuffle, compression) + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> Blosc: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :param filter_options: Expected format: (_, _, _, _, clevel*, shuffle*, compression*) + :raises ValueError: Unsupported filter_options + """ + default_cname = "blosclz" + + if len(filter_options) <= 4: + return cls(default_cname) + + clevel = filter_options[4] + + if len(filter_options) <= 5: + return cls(default_cname, clevel) + + shuffle = filter_options[5] + + if len(filter_options) <= 6: + return cls(default_cname, clevel, shuffle) + + cname = _cname_from_id(filter_options[6], cls.__COMPRESSIONS) + return cls(cname, clevel, shuffle) + class Blosc2(FilterBase): """``h5py.Group.create_dataset``'s compression arguments for using blosc2 filter. @@ -271,6 +338,31 @@ def __init__(self, cname: str = "blosclz", clevel: int = 5, filters: int = SHUFF raise ValueError(f"filters={filters} is not supported") self.filter_options = (0, 0, 0, 0, clevel, filters, compression) + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> Blosc2: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :param filter_options: Expected format: (_, _, _, _, clevel*, filters*, compression*) + :raises ValueError: Unsupported filter_options + """ + default_cname = "blosclz" + + if len(filter_options) <= 4: + return cls(default_cname) + + clevel = filter_options[4] + + if len(filter_options) <= 5: + return cls(default_cname, clevel) + + filters = filter_options[5] + + if len(filter_options) <= 6: + return cls(default_cname, clevel, filters) + + cname = _cname_from_id(filter_options[6], cls.__COMPRESSIONS) + return cls(cname=cname, clevel=clevel, filters=filters) + class BZip2(FilterBase): """``h5py.Group.create_dataset``'s compression arguments for using BZip2 filter. @@ -296,6 +388,18 @@ def __init__(self, blocksize: int = 9): raise ValueError("blocksize must be in the range [1, 9]") self.filter_options = (blocksize,) + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> BZip2: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :param filter_options: Expected format: (blocksize,) + :raises ValueError: Unsupported filter_options + """ + if len(filter_options) == 0: + return cls() + else: + return cls(blocksize=filter_options[0]) + class FciDecomp(FilterBase): """``h5py.Group.create_dataset``'s compression arguments for using FciDecomp filter. @@ -321,6 +425,14 @@ def __init__(self) -> None: "You may need to reinstall hdf5plugin with a recent version of pip, or rebuild it with a newer compiler." ) + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> FciDecomp: + """Returns compression arguments from HDF5 compression filters options + + :raises ValueError: Unsupported filter_options + """ + return cls() + class LZ4(FilterBase): """``h5py.Group.create_dataset``'s compression arguments for using lz4 filter. @@ -347,6 +459,18 @@ def __init__(self, nbytes: int = 0): raise ValueError("clevel must be in the range [0, 2113929216]") self.filter_options = (nbytes,) + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> LZ4: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :param filter_options: Expected format: (nbytes,) + :raises ValueError: Unsupported filter_options + """ + if len(filter_options) == 0: + return cls() + else: + return cls(nbytes=filter_options[0]) + class Zfp(FilterBase): """``h5py.Group.create_dataset``'s compression arguments for using ZFP filter. @@ -479,6 +603,82 @@ def __init__( logger.info(f"filter options = {self.filter_options}") + # From zfp.h + _ZFP_MIN_BITS = 1 # minimum number of bits per block + _ZFP_MAX_BITS = 16658 # maximum number of bits per block + _ZFP_MAX_PREC = 64 # maximum precision supported + _ZFP_MIN_EXP = -1074 # minimum floating-point base-2 exponent + _ZFP_MODE_SHORT_BITS = 12 + _ZFP_MODE_SHORT_MAX = (1 << _ZFP_MODE_SHORT_BITS) - 2 + + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> Zfp: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :param filter_options: Expected format: (info, magic, meta, meta&short_mode, long_mode, long_mode) + :raises ValueError: Unsupported filter_options + :raises NotImplementedError: Support of filter_options version is not implemented + """ + # ZFP header parsing reference: + # zfp.c zfp_read_header() and zfp_stream_mode() functions + + if len(filter_options) < 4: + raise ValueError(f"Expected at least 4 values, got {len(filter_options)}") + + magic = filter_options[1] + if struct.pack("I", magic).startswith(b"zfp"): + endianness = ">" + else: + raise ValueError("Unsupported options: Wrong Zfp magic number") + + codec_version = int(struct.pack(f"{endianness}I", magic)[-1]) + if codec_version != 5: + raise NotImplementedError( + f"Unsupported version of Zfp codec: {codec_version}" + ) + + # Last 12 bits contains the "short" config value + short_mode = struct.unpack( + "I", struct.pack(f"{endianness}I", filter_options[3] >> 20) + )[0] + if short_mode < cls._ZFP_MODE_SHORT_MAX: + # 12 bits encoding + if short_mode < 2048: # Fixed rate + # Fixed rate is converted to ZFP parameters taking chunk's ndim into account + # this cannot be reverted here, it returns the corresponding "expert" mode config + # See zfp.c zfp_stream_set_rate() + return cls( + minbits=short_mode, + maxbits=short_mode, + maxprec=cls._ZFP_MAX_PREC, + minexp=cls._ZFP_MIN_EXP, + ) + elif short_mode < (2048 + 128): # Fixed precision + return cls(precision=short_mode + 1 - 2048) + elif short_mode == (2048 + 128): # Reversible + return cls(reversible=True) + else: # Fixed accuracy + minexp = short_mode + cls._ZFP_MIN_EXP - (2048 + 128 + 1) + return cls(accuracy=2**minexp) + + # 64 bits encoding + if len(filter_options) < 6: + raise ValueError(f"Expected at least 6 values, got {len(filter_options)}") + + long_mode = struct.unpack( + "Q", struct.pack(f"{endianness}II", filter_options[4], filter_options[5]) + )[0] + minbits = (long_mode & 0x7FFF) + 1 + long_mode >>= 15 + maxbits = (long_mode & 0x7FFF) + 1 + long_mode >>= 15 + maxprec = (long_mode & 0x007F) + 1 + long_mode >>= 7 + minexp = (long_mode & 0x7FFF) - 16495 + return cls(minbits=minbits, maxbits=maxbits, maxprec=maxprec, minexp=minexp) + class Sperr(FilterBase): """``h5py.Group.create_dataset``'s compression arguments for using SPERR filter. @@ -588,6 +788,35 @@ def __init__( mode, quality, swap, missing_value_mode ) + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> Sperr: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :param filter_options: Expected format: (extra info, compression config) + :raises ValueError: Unsupported filter_options + """ + if len(filter_options) < 2: + raise ValueError(f"Expected at least 2 values, got {len(filter_options)}") + + mode, quality, swap, missing_value_mode = cls.__unpack_options( + meta=filter_options[0], ret=filter_options[1] + ) + + if mode == 2: + return cls( + peak_signal_to_noise_ratio=quality, + swap=swap, + missing_value_mode=missing_value_mode, + ) + if mode == 3: + return cls( + absolute=quality, swap=swap, missing_value_mode=missing_value_mode + ) + if mode == 1: + return cls(rate=quality, swap=swap, missing_value_mode=missing_value_mode) + + raise ValueError(f"Mode must be in [1, 3], got {mode}") + @classmethod def __pack_options( cls, mode: int, quality: float, swap: bool, missing_value_mode: int @@ -623,6 +852,57 @@ def __pack_options( return ret, missing_value_mode + @classmethod + def __unpack_options(cls, meta: int, ret: int) -> tuple[int, float, bool, int]: + # Unpack missing value mode from packed_info bits 6-9 + # See h5zsperr_unpack_extra_info + missing_value_mode = (meta >> 6) & 0b1111 + + # Unpack other fields from ret + # See H5Z_SPERR_decode_cd_values + swap = bool(ret >> (cls._INTEGER_BITS + cls._FRACTIONAL_BITS + 3)) + + bit1 = (ret >> (cls._INTEGER_BITS + cls._FRACTIONAL_BITS)) & 1 + bit2 = (ret >> (cls._INTEGER_BITS + cls._FRACTIONAL_BITS + 1)) & 1 + if bit1 and not bit2: + mode = 1 + elif not bit1 and bit2: + mode = 2 + elif bit1 and bit2: + mode = 3 + else: + raise ValueError("Mode must be in [1, 3], got 0") + + negative = bool((ret >> (cls._INTEGER_BITS + cls._FRACTIONAL_BITS - 1)) & 1) + + mask = 1 << (cls._INTEGER_BITS + cls._FRACTIONAL_BITS - 1) + masked_ret = ret & (mask - 1) + + quality = float(masked_ret) / float(1 << cls._FRACTIONAL_BITS) + if negative: + quality *= -1.0 + if mode == 3: + quality = 2**quality + + return mode, quality, swap, missing_value_mode + + +def _sz_pack_float64(value: float) -> tuple[int, int]: + # Pack as big-endian IEEE 754 double + packed = struct.pack(">d", value) + # Unpack most-significant bits as unsigned int + high = struct.unpack(">I", packed[0:4])[0] + # Unpack least-significant bits as unsigned int + low = struct.unpack(">I", packed[4:8])[0] + return high, low + + +def _sz_unpack_float64(high: int, low: int) -> float: + # Pack most-significant & least-significant bits + packed = struct.pack(">II", high, low) + # Unpack as big-endian IEEE 754 double + return float(struct.unpack(">d", packed)[0]) + class SZ(FilterBase): """``h5py.Group.create_dataset``'s compression arguments for using SZ2 filter. @@ -671,7 +951,7 @@ class SZ(FilterBase): For more details about the compressor, see `SZ2 compressor `_. - .. warning:: The SZ2 compressor is deprecated, see `SZ `_ + .. warning:: The SZ2 compressor is deprecated, see `SZ repository `_ """ filter_name = "sz" @@ -698,10 +978,10 @@ def __init__( compression_opts = ( sz_mode, - *self.__pack_float64(absolute or 0.0), - *self.__pack_float64(relative or 0.0), - *self.__pack_float64(pointwise_relative or 0.0), - *self.__pack_float64(0.0), # psnr + *_sz_pack_float64(absolute or 0.0), + *_sz_pack_float64(relative or 0.0), + *_sz_pack_float64(pointwise_relative or 0.0), + *_sz_pack_float64(0.0), # psnr ) logger.info(f"SZ mode {sz_mode} used.") @@ -709,15 +989,34 @@ def __init__( self.filter_options = compression_opts - @staticmethod - def __pack_float64(error: float) -> tuple[int, int]: - # Pack as big-endian IEEE 754 double - packed = struct.pack(">d", error) - # Unpack most-significant bits as unsigned int - high = struct.unpack(">I", packed[0:4])[0] - # Unpack least-significant bits as unsigned int - low = struct.unpack(">I", packed[4:8])[0] - return high, low + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> SZ: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :param filter_options: Expected format: + (_, _, _, _, mode, absolute1, absolute2, relative1, relative2, pointwise1, pointwise2) + :raises ValueError: Unsupported filter_options + """ + if len(filter_options) < 13: + raise ValueError(f"Expected 13 values, got {len(filter_options)}") + + sz_mode = filter_options[4] + if sz_mode == 0: + return cls( + absolute=_sz_unpack_float64(filter_options[5], filter_options[6]) + ) + if sz_mode == 1: + return cls( + relative=_sz_unpack_float64(filter_options[7], filter_options[8]) + ) + if sz_mode == 10: + return cls( + pointwise_relative=_sz_unpack_float64( + filter_options[9], filter_options[10] + ) + ) + + raise ValueError(f"Unsupported sz_mode: {sz_mode}") class SZ3(FilterBase): @@ -774,10 +1073,10 @@ def __init__( compression_opts = ( sz_mode, - *self.__pack_float64(absolute or 0.0), - *self.__pack_float64(relative or 0.0), - *self.__pack_float64(norm2 or 0.0), - *self.__pack_float64(peak_signal_to_noise_ratio or 0.0), + *_sz_pack_float64(absolute or 0.0), + *_sz_pack_float64(relative or 0.0), + *_sz_pack_float64(norm2 or 0.0), + *_sz_pack_float64(peak_signal_to_noise_ratio or 0.0), ) logger.info(f"SZ3 mode {sz_mode} used.") logger.info(f"filter options {compression_opts}") @@ -787,15 +1086,33 @@ def __init__( self.filter_options = compression_opts - @staticmethod - def __pack_float64(error: float) -> tuple[int, int]: - # Pack as big-endian IEEE 754 double - packed = struct.pack(">d", error) - # Unpack most-significant bits as unsigned int - high = struct.unpack(">I", packed[0:4])[0] - # Unpack least-significant bits as unsigned int - low = struct.unpack(">I", packed[4:8])[0] - return high, low + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> SZ3: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :param filter_options: Expected format: + (_, _, _, _, mode, absolute1, absolute2, relative1, relative2, norm2_1, norm2_2, pointwise1, pointwise2) + :raises ValueError: Unsupported filter_options + """ + if len(filter_options) < 13: + raise ValueError(f"Expected 13 values, got {len(filter_options)}") + + sz_mode = filter_options[4] + if sz_mode == 0: + return cls( + absolute=_sz_unpack_float64(filter_options[5], filter_options[6]) + ) + if sz_mode == 1: + return cls( + relative=_sz_unpack_float64(filter_options[7], filter_options[8]) + ) + if sz_mode == 2: + return cls(norm2=_sz_unpack_float64(filter_options[9], filter_options[10])) + if sz_mode == 3: + psnr = _sz_unpack_float64(filter_options[11], filter_options[12]) + return cls(peak_signal_to_noise_ratio=psnr) + + raise ValueError(f"Unsupported sz_mode: {sz_mode}") class Zstd(FilterBase): @@ -822,6 +1139,18 @@ def __init__(self, clevel: int = 3): raise ValueError("clevel must be in the range [1, 22]") self.filter_options = (clevel,) + @classmethod + def _from_filter_options(cls, filter_options: tuple[int, ...]) -> Zstd: + """Returns compression arguments from HDF5 compression filters "cd_values" options + + :param filter_options: Expected format: (clevel,) + :raises ValueError: Unsupported filter_options + """ + if len(filter_options) == 0: + return cls() + else: + return cls(clevel=filter_options[0]) + FILTER_CLASSES: tuple[type[FilterBase], ...] = ( Bitshuffle, diff --git a/src/hdf5plugin/_utils.py b/src/hdf5plugin/_utils.py index cedafa3f..6d415e1d 100644 --- a/src/hdf5plugin/_utils.py +++ b/src/hdf5plugin/_utils.py @@ -230,6 +230,40 @@ def get_filters( return tuple(filter_classes) +def from_filter_options( + filter_id: int | str, filter_options: tuple[int, ...] +) -> FilterBase: + """Returns corresponding compression filter configuration instance. + + .. code-block:: python + + create_plist = dataset.id.get_create_plist() + + compression_filters = [] + + for index in range(create_plist.get_nfilters()): + filter_id, _, filter_options, _ = create_plist.get_filter(index) + if filter_id in hdf5plugin.FILTERS.values(): + compression_filters.append(hdf5plugin.from_filter_options(filter_id, filter_options)) + + :param filter_id: HDF5 compression filter ID + :param filter_options: Compression filter configuration as stored in HDF5 datasets + :raises ValueError: Unsupported or invalid filter_id, filter_options combination + :raises NotImplementedError: Given filter or version of the filter is not supported + """ + if isinstance(filter_id, str): + try: + filter_id = FILTERS[filter_id] + except KeyError: + raise ValueError(f"Unsupported filter id: {filter_id}") + + for filter_cls in FILTER_CLASSES: + if filter_id == filter_cls.filter_id: + return filter_cls._from_filter_options(filter_options) + + raise ValueError(f"Unsupported filter id: {filter_id}") + + def register( filters: int | str | tuple[int | str, ...] = tuple(FILTERS.keys()), force: bool = True, diff --git a/src/hdf5plugin/test.py b/src/hdf5plugin/test.py index e285db53..59676444 100644 --- a/src/hdf5plugin/test.py +++ b/src/hdf5plugin/test.py @@ -499,6 +499,299 @@ def testStringsZstd(self): self._test_strings("zstd") +class TestFromFilterOptionsMethods(unittest.TestCase): + """Test _from_filter_options methods""" + + def testBitshuffle(self): + for filter_options, expected_options in ( + # (_, _, _, nelems, compression_id, clevel) + ((), (0, 0)), # Default: no compression + ((0, 2, 4, 256), (256, 0)), # custom nelems + ((0, 2, 4, 0, 2), (0, 2)), # LZ4 + ((0, 2, 4, 0, 3), (0, 3, 3)), # Zstd with default clevel + ((0, 2, 4, 0, 3, 5), (0, 3, 5)), # Zstd with custom clevel + ): + with self.subTest(filter_options=filter_options): + compression_filter = hdf5plugin.Bitshuffle._from_filter_options( + filter_options + ) + self.assertEqual(compression_filter.filter_options, expected_options) + + def testBlosc(self): + for filter_options, expected_options in ( + # (_, _, _, _, clevel, shuffle, compression_id) + ((), (0, 0, 0, 0, 5, 1, 0)), # Default: no compression + ((2, 2, 4, 40000, 3), (0, 0, 0, 0, 3, 1, 0)), # custom clevel + ( + (2, 2, 4, 40000, 3, 2), + (0, 0, 0, 0, 3, 2, 0), + ), # custom clevel and shuffle + ((2, 2, 4, 40000, 8, 2, 1), (0, 0, 0, 0, 8, 2, 1)), # all custom + ): + with self.subTest(filter_options=filter_options): + compression_filter = hdf5plugin.Blosc._from_filter_options( + filter_options + ) + self.assertEqual(compression_filter.filter_options, expected_options) + + def testBlosc2(self): + for filter_options, expected_options in ( + # (_, _, _, _, clevel, filters, compression_id) + ((), (0, 0, 0, 0, 5, 1, 0)), # Default: no compression + ((2, 2, 4, 40000, 3), (0, 0, 0, 0, 3, 1, 0)), # custom clevel + ( + (2, 2, 4, 40000, 3, 2), + (0, 0, 0, 0, 3, 2, 0), + ), # custom clevel and filters + ((2, 2, 4, 40000, 8, 2, 1), (0, 0, 0, 0, 8, 2, 1)), # all custom + ): + with self.subTest(filter_options=filter_options): + compression_filter = hdf5plugin.Blosc2._from_filter_options( + filter_options + ) + self.assertEqual(compression_filter.filter_options, expected_options) + + def testBZip2(self): + for filter_options, expected_options in ( + # (blocksize,) + ((), (9,)), + ((5,), (5,)), + ): + with self.subTest(filter_options=filter_options): + compression_filter = hdf5plugin.BZip2._from_filter_options( + filter_options + ) + self.assertEqual(compression_filter.filter_options, expected_options) + + def testFciDecomp(self): + compression_filter = hdf5plugin.FciDecomp._from_filter_options((1, 2, 3)) + self.assertEqual(compression_filter.filter_options, ()) + + def testLZ4(self): + for filter_options, expected_options in ( + # (nbytes,) + ((), (0,)), + ((1024,), (1024,)), + ): + with self.subTest(filter_options=filter_options): + compression_filter = hdf5plugin.LZ4._from_filter_options(filter_options) + self.assertEqual(compression_filter.filter_options, expected_options) + + def testSperr(self): + for filter_options, expected_filter in ( + ((1043, 269484032, 128, 0, 0), hdf5plugin.Sperr()), + ( + (1107, 2418016256, 256, 0, 0), + hdf5plugin.Sperr(rate=32, swap=True, missing_value_mode=1), + ), + ((1043, 940177214, 256, 0, 0), hdf5plugin.Sperr(absolute=1e-3)), + ( + (1171, 537001984, 256, 0, 0), + hdf5plugin.Sperr(peak_signal_to_noise_ratio=2.0, missing_value_mode=2), + ), + ): + with self.subTest(filter_options=filter_options): + compression_filter = hdf5plugin.Sperr._from_filter_options( + filter_options + ) + self.assertEqual( + compression_filter.filter_options, expected_filter.filter_options + ) + + def testSZ(self): + for filter_options, expected_filter in ( + ( + (1, 0, 0, 256, 10, 0, 0, 0, 0, 1055193269, 2296604913, 0, 0), + hdf5plugin.SZ(), + ), + ( + (1, 0, 0, 256, 0, 1062232653, 3539053052, 0, 0, 0, 0, 0, 0), + hdf5plugin.SZ(absolute=1e-3), + ), + ( + (1, 0, 0, 256, 1, 0, 0, 1062232653, 3539053052, 0, 0, 0, 0), + hdf5plugin.SZ(relative=1e-3), + ), + ( + (1, 0, 0, 256, 10, 0, 0, 0, 0, 1062232653, 3539053052, 0, 0), + hdf5plugin.SZ(pointwise_relative=1e-3), + ), + ): + with self.subTest(filter_options=filter_options): + compression_filter = hdf5plugin.SZ._from_filter_options(filter_options) + self.assertEqual( + compression_filter.filter_options, expected_filter.filter_options + ) + + def testSZ3(self): + for filter_options, expected_filter in ( + ( + (1, 0, 0, 256, 0, 1058682594, 3944497965, 0, 0, 0, 0, 0, 0), + hdf5plugin.SZ3(), + ), + ( + (1, 0, 0, 256, 0, 1051772663, 2696277389, 0, 0, 0, 0, 0, 0), + hdf5plugin.SZ3(absolute=1e-6), + ), + ( + (1, 0, 0, 256, 1, 0, 0, 1062232653, 3539053052, 0, 0, 0, 0), + hdf5plugin.SZ3(relative=1e-3), + ), + ( + (1, 0, 0, 256, 2, 0, 0, 0, 0, 1062232653, 3539053052, 0, 0), + hdf5plugin.SZ3(norm2=1e-3), + ), + ( + (1, 0, 0, 256, 3, 0, 0, 0, 0, 0, 0, 1062232653, 3539053052), + hdf5plugin.SZ3(peak_signal_to_noise_ratio=1e-3), + ), + ): + with self.subTest(filter_options=filter_options): + compression_filter = hdf5plugin.SZ3._from_filter_options(filter_options) + self.assertEqual( + compression_filter.filter_options, expected_filter.filter_options + ) + + def testZfp(self): + for filter_options, expected_filter in ( + ( + (269504785, 91252346, 4026532854, 2167406593), + hdf5plugin.Zfp(precision=20), + ), + ( + (269504785, 91252346, 4026532854, 3404726273), + hdf5plugin.Zfp(accuracy=2**-4), + ), + ( + (269504785, 91252346, 4026532854, 2281701377), + hdf5plugin.Zfp(reversible=True), + ), + ( + (269504785, 91252346, 4026532854, 4293918721, 3767009280, 494351), + hdf5plugin.Zfp(minbits=1, maxbits=16657, maxprec=64, minexp=-1047), + ), + ): + with self.subTest(filter_options=filter_options): + compression_filter = hdf5plugin.Zfp._from_filter_options(filter_options) + self.assertEqual( + compression_filter.filter_options, expected_filter.filter_options + ) + + def testZstd(self): + for filter_options, expected_options in ( + # (clevel,) + ((), (3,)), + ((10,), (10,)), + ): + with self.subTest(filter_options=filter_options): + compression_filter = hdf5plugin.Zstd._from_filter_options( + filter_options + ) + self.assertEqual(compression_filter.filter_options, expected_options) + + +class TestFromFilterOptions(unittest.TestCase): + """Test from_filter_options function""" + + def test_filter_name(self): + compression_filter = hdf5plugin.from_filter_options("bzip2", (5,)) + self.assertEqual(compression_filter, hdf5plugin.BZip2(blocksize=5)) + + +class TestFromFilterOptionsRoundtrip(unittest.TestCase): + """Test from_filter_options function roundtrip""" + + def _test( + self, compression_filter: _filters.FilterBase, data: numpy.ndarray[Any, Any] + ): + with h5py.File("in_memory", "w", driver="core", backing_store=False) as h5f: + h5f.create_dataset( + "data", + data=data, + chunks=data.shape, + compression=compression_filter, + ) + h5f.flush() + + plist = h5f["data"].id.get_create_plist() + filters = [plist.get_filter(i) for i in range(plist.get_nfilters())] + + self.assertEqual(len(filters), 1) + filter_id, _, filter_options, _ = filters[0] + + retrieved_filter = hdf5plugin.from_filter_options(filter_id, filter_options) + + self.assertEqual( + compression_filter, + retrieved_filter, + msg=f"{(compression_filter.filter_id, compression_filter.filter_options)} != {(retrieved_filter.filter_id, retrieved_filter.filter_options)}", + ) + + @unittest.skipUnless(should_test("bshuf"), "Bitshuffle filter not available") + def testBitshuffle(self): + data = numpy.arange(256**2, dtype=numpy.float32).reshape(256, 256) + self._test(hdf5plugin.Bitshuffle(), data) + + @unittest.skipUnless(should_test("blosc"), "Blosc filter not available") + def testBlosc(self): + data = numpy.arange(256**2, dtype=numpy.float32).reshape(256, 256) + self._test(hdf5plugin.Blosc(), data) + + @unittest.skipUnless(should_test("blosc2"), "Blosc2 filter not available") + def testBlosc2(self): + data = numpy.arange(256**2, dtype=numpy.float32).reshape(256, 256) + self._test(hdf5plugin.Blosc2(), data) + + @unittest.skipUnless(should_test("bzip2"), "BZip2 filter not available") + def testBZip2(self): + data = numpy.arange(256**2, dtype=numpy.float32).reshape(256, 256) + self._test(hdf5plugin.BZip2(), data) + + @unittest.skipUnless(should_test("fcidecomp"), "FCIDECOMP filter not available") + def testFciDecomp(self): + data = numpy.arange(256**2, dtype=numpy.uint16).reshape(256, 256) + self._test(hdf5plugin.FciDecomp(), data) + + @unittest.skipUnless(should_test("lz4"), "LZ4 filter not available") + def testLZ4(self): + data = numpy.arange(256**2, dtype=numpy.float32).reshape(256, 256) + self._test(hdf5plugin.LZ4(), data) + + @unittest.skipUnless(should_test("sperr"), "Sperr filter not available") + def testSperr(self): + data = numpy.arange(256**2, dtype=numpy.float32).reshape(256, 256) + self._test(hdf5plugin.Sperr(), data) + + @unittest.skipUnless(should_test("sz"), "SZ filter not available") + def testSZ(self): + data = numpy.arange(256**2, dtype=numpy.float32).reshape(256, 256) + self._test(hdf5plugin.SZ(), data) + + @unittest.skipUnless(should_test("sz3"), "SZ3 filter not available") + def testSZ3(self): + data = numpy.arange(256**2, dtype=numpy.float32).reshape(256, 256) + self._test(hdf5plugin.SZ3(), data) + + @unittest.skipUnless(should_test("zfp"), "Zfp filter not available") + def testZfp(self): + data = numpy.arange(256**2, dtype=numpy.float32).reshape(256, 256) + # Roundtrip does not work for all parameters including the default + for mode_name, compression_filter in { + # rate does not roundtrip + "precision": hdf5plugin.Zfp(precision=10), + "accuracy": hdf5plugin.Zfp(accuracy=2**-3), # roundtrip only for 2^n + "reversible": hdf5plugin.Zfp(reversible=True), + "expert": hdf5plugin.Zfp(minbits=2, maxbits=100, maxprec=32, minexp=-10), + }.items(): + with self.subTest(mode_name): + self._test(compression_filter, data) + + @unittest.skipUnless(should_test("zstd"), "Zstd filter not available") + def testZstd(self): + data = numpy.arange(256**2, dtype=numpy.float32).reshape(256, 256) + self._test(hdf5plugin.Zstd(), data) + + class TestPackage(unittest.TestCase): """Test general features of the hdf5plugin package""" @@ -742,6 +1035,9 @@ def suite() -> unittest.TestSuite: for cls in ( TestHDF5PluginRW, TestStrings, + TestFromFilterOptionsMethods, + TestFromFilterOptions, + TestFromFilterOptionsRoundtrip, TestPackage, TestRegisterFilter, TestGetFilters,