Skip to content

Commit 25fac86

Browse files
committed
add 32-bit checksums
1 parent 07f715e commit 25fac86

File tree

6 files changed

+114
-0
lines changed

6 files changed

+114
-0
lines changed

docs/checksum32.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
32-bit checksums
2+
================
3+
.. module:: numcodecs.checksum32
4+
5+
.. autoclass:: CRC32
6+
.. autoclass:: Adler32

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ Contents
6161
fixedscaleoffset
6262
packbits
6363
categorize
64+
checksum32
6465
release
6566

6667
Acknowledgments

numcodecs/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,8 @@
4242

4343
from numcodecs.categorize import Categorize
4444
register_codec(Categorize)
45+
46+
47+
from numcodecs.checksum32 import CRC32, Adler32
48+
register_codec(CRC32)
49+
register_codec(Adler32)

numcodecs/checksum32.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, print_function, division
3+
import zlib
4+
5+
6+
import numpy as np
7+
8+
9+
from numcodecs.abc import Codec
10+
from numcodecs.compat import ndarray_from_buffer, buffer_copy
11+
12+
13+
class Checksum32(Codec):
14+
15+
checksum = None
16+
17+
def encode(self, buf):
18+
arr = ndarray_from_buffer(buf, dtype='u1')
19+
checksum = self.checksum(arr) & 0xffffffff
20+
enc = np.empty(arr.nbytes + 4, dtype='u1')
21+
enc[:4].view('<u4')[0] = checksum
22+
enc[4:] = arr
23+
return enc
24+
25+
def decode(self, buf, out=None):
26+
arr = ndarray_from_buffer(buf, dtype='u1')
27+
expect = arr[:4].view('<u4')[0]
28+
checksum = self.checksum(arr[4:]) & 0xffffffff
29+
if expect != checksum:
30+
raise RuntimeError('CRC32 checksum error')
31+
return buffer_copy(arr[4:], out)
32+
33+
34+
class CRC32(Checksum32):
35+
36+
codec_id = 'crc32'
37+
checksum = zlib.crc32
38+
39+
40+
class Adler32(Checksum32):
41+
42+
codec_id = 'adler32'
43+
checksum = zlib.adler32

numcodecs/compat.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,17 @@ def buffer_copy(buf, out=None):
8383
return out
8484

8585

86+
# def buffer_size(v):
87+
# from array import array as _stdlib_array
88+
# if PY2 and isinstance(v, _stdlib_array): # pragma: no cover
89+
# # special case array.array because does not support buffer
90+
# # interface in PY2
91+
# return v.buffer_info()[1] * v.itemsize
92+
# else:
93+
# v = memoryview(v)
94+
# return reduce(operator.mul, v.shape) * v.itemsize
95+
96+
8697
def ndarray_from_buffer(buf, dtype):
8798
if isinstance(buf, np.ndarray):
8899
arr = buf.reshape(-1, order='A').view(dtype)

numcodecs/tests/test_checksum32.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, print_function, division
3+
import itertools
4+
5+
6+
import numpy as np
7+
from nose.tools import assert_raises
8+
9+
10+
from numcodecs.checksum32 import CRC32, Adler32
11+
from numcodecs.tests.common import check_encode_decode, check_config, \
12+
check_repr
13+
14+
15+
# mix of dtypes: integer, float, bool, string
16+
# mix of shapes: 1D, 2D, 3D
17+
# mix of orders: C, F
18+
arrays = [
19+
np.arange(1000, dtype='i4'),
20+
np.linspace(1000, 1001, 1000, dtype='f8'),
21+
np.random.normal(loc=1000, scale=1, size=(100, 10)),
22+
np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
23+
np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10)
24+
]
25+
26+
codecs = [CRC32(), Adler32()]
27+
28+
29+
def test_encode_decode():
30+
for codec, arr in itertools.product(codecs, arrays):
31+
check_encode_decode(arr, codec)
32+
33+
34+
def test_errors():
35+
for codec, arr in itertools.product(codecs, arrays):
36+
enc = codec.encode(arr)
37+
with assert_raises(RuntimeError):
38+
codec.decode(enc[:-1])
39+
40+
41+
def test_config():
42+
for codec in codecs:
43+
check_config(codec)
44+
45+
46+
def test_repr():
47+
check_repr("CRC32()")
48+
check_repr("Adler32()")

0 commit comments

Comments
 (0)