diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0b5ecb5c..bb8e8af9 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -61,7 +61,7 @@ jobs: conda activate env export DISABLE_NUMCODECS_AVX2="" # TODO: put back zfpy import when it supports numpy 2.0 - python -m pip install -v -e .[test,test_extras,msgpack] + python -m pip install -v -e .[test,test_extras,msgpack,crc32c] - name: Install pcodec if: matrix.python-version != '3.13.0' diff --git a/docs/checksum32.rst b/docs/checksum32.rst index 6f255141..fafceca2 100644 --- a/docs/checksum32.rst +++ b/docs/checksum32.rst @@ -13,6 +13,17 @@ CRC32 .. automethod:: from_config +CRC32C +------ +.. autoclass:: CRC32C + + .. autoattribute:: codec_id + .. automethod:: encode + .. automethod:: decode + .. automethod:: get_config + .. automethod:: from_config + + Adler32 ------- .. autoclass:: Adler32 diff --git a/docs/release.rst b/docs/release.rst index 8ee0ff9d..bb9225e5 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -16,6 +16,10 @@ Fix * Fix in-place mutation of input array in `BitRound`. By :user:`Sam Levang `, :issue:`608` +Enhancements +~~~~~~~~~~~~ +* Add Crc32c checksum codec + By :user:`Norman Rzepka `, :issue:`613`. .. _release_0.13.1: diff --git a/fixture/adler32/array.05.npy b/fixture/adler32/array.05.npy new file mode 100644 index 00000000..3d1e86b0 Binary files /dev/null and b/fixture/adler32/array.05.npy differ diff --git a/fixture/adler32/array.06.npy b/fixture/adler32/array.06.npy new file mode 100644 index 00000000..dc539322 Binary files /dev/null and b/fixture/adler32/array.06.npy differ diff --git a/fixture/adler32/array.07.npy b/fixture/adler32/array.07.npy new file mode 100644 index 00000000..c0a9f53b Binary files /dev/null and b/fixture/adler32/array.07.npy differ diff --git a/fixture/adler32/array.08.npy b/fixture/adler32/array.08.npy new file mode 100644 index 00000000..bbb20a4b Binary files /dev/null and b/fixture/adler32/array.08.npy differ diff --git a/fixture/adler32/array.09.npy b/fixture/adler32/array.09.npy new file mode 100644 index 00000000..1ff12503 Binary files /dev/null and b/fixture/adler32/array.09.npy differ diff --git a/fixture/adler32/array.10.npy b/fixture/adler32/array.10.npy new file mode 100644 index 00000000..20a67f20 Binary files /dev/null and b/fixture/adler32/array.10.npy differ diff --git a/fixture/adler32/array.11.npy b/fixture/adler32/array.11.npy new file mode 100644 index 00000000..1a80b0e2 Binary files /dev/null and b/fixture/adler32/array.11.npy differ diff --git a/fixture/adler32/array.12.npy b/fixture/adler32/array.12.npy new file mode 100644 index 00000000..99fd9e21 Binary files /dev/null and b/fixture/adler32/array.12.npy differ diff --git a/fixture/adler32/codec.00/encoded.05.dat b/fixture/adler32/codec.00/encoded.05.dat new file mode 100644 index 00000000..35a9491f Binary files /dev/null and b/fixture/adler32/codec.00/encoded.05.dat differ diff --git a/fixture/adler32/codec.00/encoded.06.dat b/fixture/adler32/codec.00/encoded.06.dat new file mode 100644 index 00000000..d3e99aee Binary files /dev/null and b/fixture/adler32/codec.00/encoded.06.dat differ diff --git a/fixture/adler32/codec.00/encoded.07.dat b/fixture/adler32/codec.00/encoded.07.dat new file mode 100644 index 00000000..2afd0700 Binary files /dev/null and b/fixture/adler32/codec.00/encoded.07.dat differ diff --git a/fixture/adler32/codec.00/encoded.08.dat b/fixture/adler32/codec.00/encoded.08.dat new file mode 100644 index 00000000..bd6c3b6a Binary files /dev/null and b/fixture/adler32/codec.00/encoded.08.dat differ diff --git a/fixture/adler32/codec.00/encoded.09.dat b/fixture/adler32/codec.00/encoded.09.dat new file mode 100644 index 00000000..a91fc82b Binary files /dev/null and b/fixture/adler32/codec.00/encoded.09.dat differ diff --git a/fixture/adler32/codec.00/encoded.10.dat b/fixture/adler32/codec.00/encoded.10.dat new file mode 100644 index 00000000..2c2a0cd3 Binary files /dev/null and b/fixture/adler32/codec.00/encoded.10.dat differ diff --git a/fixture/adler32/codec.00/encoded.11.dat b/fixture/adler32/codec.00/encoded.11.dat new file mode 100644 index 00000000..e7653fc0 Binary files /dev/null and b/fixture/adler32/codec.00/encoded.11.dat differ diff --git a/fixture/adler32/codec.00/encoded.12.dat b/fixture/adler32/codec.00/encoded.12.dat new file mode 100644 index 00000000..367aae0f Binary files /dev/null and b/fixture/adler32/codec.00/encoded.12.dat differ diff --git a/fixture/crc32/array.05.npy b/fixture/crc32/array.05.npy new file mode 100644 index 00000000..3d1e86b0 Binary files /dev/null and b/fixture/crc32/array.05.npy differ diff --git a/fixture/crc32/array.06.npy b/fixture/crc32/array.06.npy new file mode 100644 index 00000000..dc539322 Binary files /dev/null and b/fixture/crc32/array.06.npy differ diff --git a/fixture/crc32/array.07.npy b/fixture/crc32/array.07.npy new file mode 100644 index 00000000..c0a9f53b Binary files /dev/null and b/fixture/crc32/array.07.npy differ diff --git a/fixture/crc32/array.08.npy b/fixture/crc32/array.08.npy new file mode 100644 index 00000000..bbb20a4b Binary files /dev/null and b/fixture/crc32/array.08.npy differ diff --git a/fixture/crc32/array.09.npy b/fixture/crc32/array.09.npy new file mode 100644 index 00000000..1ff12503 Binary files /dev/null and b/fixture/crc32/array.09.npy differ diff --git a/fixture/crc32/array.10.npy b/fixture/crc32/array.10.npy new file mode 100644 index 00000000..20a67f20 Binary files /dev/null and b/fixture/crc32/array.10.npy differ diff --git a/fixture/crc32/array.11.npy b/fixture/crc32/array.11.npy new file mode 100644 index 00000000..1a80b0e2 Binary files /dev/null and b/fixture/crc32/array.11.npy differ diff --git a/fixture/crc32/array.12.npy b/fixture/crc32/array.12.npy new file mode 100644 index 00000000..99fd9e21 Binary files /dev/null and b/fixture/crc32/array.12.npy differ diff --git a/fixture/crc32/codec.00/encoded.05.dat b/fixture/crc32/codec.00/encoded.05.dat new file mode 100644 index 00000000..0130010a Binary files /dev/null and b/fixture/crc32/codec.00/encoded.05.dat differ diff --git a/fixture/crc32/codec.00/encoded.06.dat b/fixture/crc32/codec.00/encoded.06.dat new file mode 100644 index 00000000..268cd4ce Binary files /dev/null and b/fixture/crc32/codec.00/encoded.06.dat differ diff --git a/fixture/crc32/codec.00/encoded.07.dat b/fixture/crc32/codec.00/encoded.07.dat new file mode 100644 index 00000000..1262a4e7 Binary files /dev/null and b/fixture/crc32/codec.00/encoded.07.dat differ diff --git a/fixture/crc32/codec.00/encoded.08.dat b/fixture/crc32/codec.00/encoded.08.dat new file mode 100644 index 00000000..bcdcce39 Binary files /dev/null and b/fixture/crc32/codec.00/encoded.08.dat differ diff --git a/fixture/crc32/codec.00/encoded.09.dat b/fixture/crc32/codec.00/encoded.09.dat new file mode 100644 index 00000000..07301d24 Binary files /dev/null and b/fixture/crc32/codec.00/encoded.09.dat differ diff --git a/fixture/crc32/codec.00/encoded.10.dat b/fixture/crc32/codec.00/encoded.10.dat new file mode 100644 index 00000000..c3a4fb15 Binary files /dev/null and b/fixture/crc32/codec.00/encoded.10.dat differ diff --git a/fixture/crc32/codec.00/encoded.11.dat b/fixture/crc32/codec.00/encoded.11.dat new file mode 100644 index 00000000..c5ad47b6 Binary files /dev/null and b/fixture/crc32/codec.00/encoded.11.dat differ diff --git a/fixture/crc32/codec.00/encoded.12.dat b/fixture/crc32/codec.00/encoded.12.dat new file mode 100644 index 00000000..ee279f79 Binary files /dev/null and b/fixture/crc32/codec.00/encoded.12.dat differ diff --git a/fixture/crc32c/array.00.npy b/fixture/crc32c/array.00.npy new file mode 100644 index 00000000..a69ba6cb Binary files /dev/null and b/fixture/crc32c/array.00.npy differ diff --git a/fixture/crc32c/array.01.npy b/fixture/crc32c/array.01.npy new file mode 100644 index 00000000..0ca9a6cd Binary files /dev/null and b/fixture/crc32c/array.01.npy differ diff --git a/fixture/crc32c/array.02.npy b/fixture/crc32c/array.02.npy new file mode 100644 index 00000000..13c9850a Binary files /dev/null and b/fixture/crc32c/array.02.npy differ diff --git a/fixture/crc32c/array.03.npy b/fixture/crc32c/array.03.npy new file mode 100644 index 00000000..433e7ecb Binary files /dev/null and b/fixture/crc32c/array.03.npy differ diff --git a/fixture/crc32c/array.04.npy b/fixture/crc32c/array.04.npy new file mode 100644 index 00000000..b8d82303 Binary files /dev/null and b/fixture/crc32c/array.04.npy differ diff --git a/fixture/crc32c/array.05.npy b/fixture/crc32c/array.05.npy new file mode 100644 index 00000000..65092ded Binary files /dev/null and b/fixture/crc32c/array.05.npy differ diff --git a/fixture/crc32c/array.06.npy b/fixture/crc32c/array.06.npy new file mode 100644 index 00000000..88e46ff2 Binary files /dev/null and b/fixture/crc32c/array.06.npy differ diff --git a/fixture/crc32c/array.07.npy b/fixture/crc32c/array.07.npy new file mode 100644 index 00000000..e3036eec Binary files /dev/null and b/fixture/crc32c/array.07.npy differ diff --git a/fixture/crc32c/array.08.npy b/fixture/crc32c/array.08.npy new file mode 100644 index 00000000..97ad9756 Binary files /dev/null and b/fixture/crc32c/array.08.npy differ diff --git a/fixture/crc32c/array.09.npy b/fixture/crc32c/array.09.npy new file mode 100644 index 00000000..b01c6c33 Binary files /dev/null and b/fixture/crc32c/array.09.npy differ diff --git a/fixture/crc32c/array.10.npy b/fixture/crc32c/array.10.npy new file mode 100644 index 00000000..c1c23e8a Binary files /dev/null and b/fixture/crc32c/array.10.npy differ diff --git a/fixture/crc32c/array.11.npy b/fixture/crc32c/array.11.npy new file mode 100644 index 00000000..37848680 Binary files /dev/null and b/fixture/crc32c/array.11.npy differ diff --git a/fixture/crc32c/array.12.npy b/fixture/crc32c/array.12.npy new file mode 100644 index 00000000..a7380dbf Binary files /dev/null and b/fixture/crc32c/array.12.npy differ diff --git a/fixture/crc32c/codec.00/config.json b/fixture/crc32c/codec.00/config.json new file mode 100644 index 00000000..72f18cd5 --- /dev/null +++ b/fixture/crc32c/codec.00/config.json @@ -0,0 +1,3 @@ +{ + "id": "crc32c" +} \ No newline at end of file diff --git a/fixture/crc32c/codec.00/encoded.00.dat b/fixture/crc32c/codec.00/encoded.00.dat new file mode 100644 index 00000000..0d184d5e Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.00.dat differ diff --git a/fixture/crc32c/codec.00/encoded.01.dat b/fixture/crc32c/codec.00/encoded.01.dat new file mode 100644 index 00000000..4396fada Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.01.dat differ diff --git a/fixture/crc32c/codec.00/encoded.02.dat b/fixture/crc32c/codec.00/encoded.02.dat new file mode 100644 index 00000000..df2800c2 Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.02.dat differ diff --git a/fixture/crc32c/codec.00/encoded.03.dat b/fixture/crc32c/codec.00/encoded.03.dat new file mode 100644 index 00000000..c046eec5 Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.03.dat differ diff --git a/fixture/crc32c/codec.00/encoded.04.dat b/fixture/crc32c/codec.00/encoded.04.dat new file mode 100644 index 00000000..913d201d Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.04.dat differ diff --git a/fixture/crc32c/codec.00/encoded.05.dat b/fixture/crc32c/codec.00/encoded.05.dat new file mode 100644 index 00000000..520963d5 Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.05.dat differ diff --git a/fixture/crc32c/codec.00/encoded.06.dat b/fixture/crc32c/codec.00/encoded.06.dat new file mode 100644 index 00000000..d1cb4716 Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.06.dat differ diff --git a/fixture/crc32c/codec.00/encoded.07.dat b/fixture/crc32c/codec.00/encoded.07.dat new file mode 100644 index 00000000..c254c636 Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.07.dat differ diff --git a/fixture/crc32c/codec.00/encoded.08.dat b/fixture/crc32c/codec.00/encoded.08.dat new file mode 100644 index 00000000..523e1e0c Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.08.dat differ diff --git a/fixture/crc32c/codec.00/encoded.09.dat b/fixture/crc32c/codec.00/encoded.09.dat new file mode 100644 index 00000000..3766d42e Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.09.dat differ diff --git a/fixture/crc32c/codec.00/encoded.10.dat b/fixture/crc32c/codec.00/encoded.10.dat new file mode 100644 index 00000000..1e92f247 Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.10.dat differ diff --git a/fixture/crc32c/codec.00/encoded.11.dat b/fixture/crc32c/codec.00/encoded.11.dat new file mode 100644 index 00000000..94176987 Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.11.dat differ diff --git a/fixture/crc32c/codec.00/encoded.12.dat b/fixture/crc32c/codec.00/encoded.12.dat new file mode 100644 index 00000000..1e4485c9 Binary files /dev/null and b/fixture/crc32c/codec.00/encoded.12.dat differ diff --git a/fixture/delta/bool/array.00.npy b/fixture/delta/bool/array.00.npy new file mode 100644 index 00000000..edf40adb Binary files /dev/null and b/fixture/delta/bool/array.00.npy differ diff --git a/fixture/delta/bool/codec.00/config.json b/fixture/delta/bool/codec.00/config.json new file mode 100644 index 00000000..a123a8a2 --- /dev/null +++ b/fixture/delta/bool/codec.00/config.json @@ -0,0 +1,5 @@ +{ + "astype": "|b1", + "dtype": "|b1", + "id": "delta" +} \ No newline at end of file diff --git a/fixture/delta/bool/codec.00/encoded.00.dat b/fixture/delta/bool/codec.00/encoded.00.dat new file mode 100644 index 00000000..57e198b0 Binary files /dev/null and b/fixture/delta/bool/codec.00/encoded.00.dat differ diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py index aae5671b..c7401665 100644 --- a/numcodecs/__init__.py +++ b/numcodecs/__init__.py @@ -117,9 +117,10 @@ register_codec(MsgPack) -from numcodecs.checksum32 import CRC32, Adler32, JenkinsLookup3 +from numcodecs.checksum32 import CRC32, CRC32C, Adler32, JenkinsLookup3 register_codec(CRC32) +register_codec(CRC32C) register_codec(Adler32) register_codec(JenkinsLookup3) diff --git a/numcodecs/checksum32.py b/numcodecs/checksum32.py index ab36dd93..6246ab1c 100644 --- a/numcodecs/checksum32.py +++ b/numcodecs/checksum32.py @@ -1,5 +1,6 @@ import struct import zlib +from typing import Literal import numpy as np @@ -7,36 +8,104 @@ from .compat import ensure_contiguous_ndarray, ndarray_copy from .jenkins import jenkins_lookup3 +CHECKSUM_LOCATION = Literal['start', 'end'] + class Checksum32(Codec): # override in sub-class checksum = None + location: CHECKSUM_LOCATION = 'start' + + def __init__(self, location: CHECKSUM_LOCATION | None = None): + if location is not None: + self.location = location + if self.location not in ['start', 'end']: + raise ValueError(f"Invalid checksum location: {self.location}") def encode(self, buf): arr = ensure_contiguous_ndarray(buf).view('u1') checksum = self.checksum(arr) & 0xFFFFFFFF enc = np.empty(arr.nbytes + 4, dtype='u1') - enc[:4].view('=0.2.0", ] +crc32c = [ + "crc32c>=2.7", +] [tool.setuptools] license-files = ["LICENSE.txt"]