Skip to content

Commit 72600e6

Browse files
committed
adds crc32c codec
1 parent fed46e5 commit 72600e6

File tree

7 files changed

+158
-0
lines changed

7 files changed

+158
-0
lines changed

docs/other/crc32c.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Crc32c
2+
======
3+
.. automodule:: numcodecs.crc32c_
4+
5+
.. autoclass:: Crc32c
6+
7+
.. autoattribute:: codec_id
8+
.. automethod:: encode
9+
.. automethod:: decode
10+
.. automethod:: get_config
11+
.. automethod:: from_config

docs/other/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Other codecs
55
.. toctree::
66
:maxdepth: 2
77

8+
crc32c
89
json
910
pickles
1011
msgpacks

docs/release.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ Fix
1616
* Fix in-place mutation of input array in `BitRound`.
1717
By :user:`Sam Levang <slevang>`, :issue:`608`
1818

19+
Enhancements
20+
~~~~~~~~~~~~
21+
* Add Crc32c checksum codec
22+
By :user:`Norman Rzepka <normanrz>`, :issue:`1`.
1923

2024
.. _release_0.13.1:
2125

numcodecs/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,7 @@
141141
from numcodecs.pcodec import PCodec
142142

143143
register_codec(PCodec)
144+
145+
from numcodecs.crc32c_ import Crc32c
146+
147+
register_codec(Crc32c)

numcodecs/crc32c_.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import numpy as np
2+
from crc32c import crc32c
3+
4+
from .abc import Codec
5+
from .compat import ensure_bytes, ensure_contiguous_ndarray
6+
7+
8+
class Crc32c(Codec):
9+
"""Codec that adds a CRC32C checksum to the encoded data."""
10+
11+
codec_id = 'crc32c'
12+
13+
def encode(self, buf):
14+
# normalise inputs
15+
buf = ensure_contiguous_ndarray(buf)
16+
buf = ensure_bytes(buf)
17+
print(buf)
18+
checksum = crc32c(buf)
19+
print(checksum)
20+
21+
checksum_arr = np.zeros(1, dtype="<u4")
22+
checksum_arr[0] = checksum
23+
return buf + checksum_arr.tobytes()
24+
25+
def decode(self, buf, out=None):
26+
# normalise inputs
27+
buf = ensure_bytes(buf)
28+
computed_checksum = crc32c(memoryview(buf)[:-4])
29+
30+
if len(buf) < 4:
31+
raise ValueError("Input buffer is too short to contain a CRC32C checksum.")
32+
33+
if out is not None:
34+
out_view = ensure_contiguous_ndarray(out).view("b")
35+
if len(out_view) < len(buf) - 4:
36+
raise ValueError("Output buffer is too small to contain decoded data.")
37+
elif len(out_view) > len(buf) - 4:
38+
raise ValueError("Output buffer is too large to contain decoded data.")
39+
out_view[:] = np.frombuffer(buf, "b", count=(len(buf) - 4), offset=0)
40+
else:
41+
out = buf[:-4]
42+
43+
stored_checksum = np.frombuffer(buf, "<u4", offset=(len(buf) - 4))[0]
44+
if computed_checksum != stored_checksum:
45+
raise ValueError(
46+
f"Stored and computed checksum do not match. Stored: {stored_checksum}. Computed: {computed_checksum}."
47+
)
48+
49+
return out

numcodecs/tests/test_crc32c.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import numpy as np
2+
import pytest
3+
4+
from numcodecs.crc32c_ import Crc32c
5+
from numcodecs.tests.common import (
6+
check_encode_decode,
7+
check_err_decode_object_buffer,
8+
check_err_encode_object_buffer,
9+
check_repr,
10+
)
11+
12+
# mix of dtypes: integer, float, bool, string
13+
# mix of shapes: 1D, 2D, 3D
14+
# mix of orders: C, F
15+
arrays = [
16+
np.arange(1000, dtype='i4'),
17+
np.linspace(1000, 1001, 1000, dtype='f8'),
18+
np.random.normal(loc=1000, scale=1, size=(100, 10)),
19+
np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
20+
np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10),
21+
np.random.randint(0, 2**60, size=1000, dtype='u8').view('M8[ns]'),
22+
np.random.randint(0, 2**60, size=1000, dtype='u8').view('m8[ns]'),
23+
np.random.randint(0, 2**25, size=1000, dtype='u8').view('M8[m]'),
24+
np.random.randint(0, 2**25, size=1000, dtype='u8').view('m8[m]'),
25+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[ns]'),
26+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[ns]'),
27+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[m]'),
28+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[m]'),
29+
]
30+
31+
32+
def test_encode_decode():
33+
for arr in arrays:
34+
check_encode_decode(arr, Crc32c())
35+
36+
37+
def test_checksum():
38+
arr = np.arange(0, 64, dtype="uint8")
39+
buf = Crc32c().encode(arr)
40+
assert np.frombuffer(buf, dtype="<u4", offset=(len(buf) - 4))[0] == np.uint32(4218238699)
41+
42+
43+
def test_repr():
44+
check_repr("Crc32c()")
45+
46+
47+
def test_eq():
48+
assert Crc32c() == Crc32c()
49+
assert not Crc32c() != Crc32c()
50+
assert Crc32c() != 'foo'
51+
assert 'foo' != Crc32c()
52+
assert not Crc32c() == 'foo'
53+
54+
55+
def test_err_decode_object_buffer():
56+
check_err_decode_object_buffer(Crc32c())
57+
58+
59+
def test_err_encode_object_buffer():
60+
check_err_encode_object_buffer(Crc32c())
61+
62+
63+
def test_err_encode_list():
64+
data = ['foo', 'bar', 'baz']
65+
with pytest.raises(TypeError):
66+
Crc32c().encode(data)
67+
68+
69+
def test_err_encode_non_contiguous():
70+
# non-contiguous memory
71+
arr = np.arange(1000, dtype='i4')[::2]
72+
with pytest.raises(ValueError):
73+
Crc32c().encode(arr)
74+
75+
76+
def test_err_out_too_small():
77+
arr = np.arange(10, dtype='i4')
78+
out = np.empty_like(arr)[:-1]
79+
with pytest.raises(ValueError):
80+
Crc32c().decode(Crc32c().encode(arr), out)
81+
82+
83+
def test_err_out_too_large():
84+
out = np.empty((10,), dtype='i4')
85+
arr = out[:-1]
86+
arr[:] = 5
87+
with pytest.raises(ValueError):
88+
Crc32c().decode(Crc32c().encode(arr), out)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ requires = [
55
"Cython",
66
"py-cpuinfo",
77
"numpy",
8+
"crc32c",
89
]
910
build-backend = "setuptools.build_meta"
1011

0 commit comments

Comments
 (0)