Skip to content

Commit 63e820e

Browse files
authored
Add jenkins lookup3 32-bit checksum (#446)
* Add initial version of Cython jenkins lookup3 32-bit checksum * Add release notes and docs
1 parent 948da6d commit 63e820e

File tree

7 files changed

+575
-2
lines changed

7 files changed

+575
-2
lines changed

docs/checksum32.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,13 @@ Fletcher32
3333
.. automethod:: encode
3434
.. automethod:: decode
3535

36+
JenkinsLookup3
37+
--------------
38+
39+
.. autoclass:: JenkinsLookup3
40+
41+
.. autoattribute:: codec_id
42+
.. autoattribute:: initval
43+
.. autoattribute:: prefix
44+
.. automethod:: encode
45+
.. automethod:: decode

docs/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ Enhancements
1717

1818
* Add ``fletcher32`` checksum codec
1919
By :user:`Martin Durant <martindurant>`, :issue:`410`.
20+
* Add ``jenkins_lookup3`` checksum codec
21+
By :user:`Mark Kittisopkul <mkitti>`, :issue:`445`.
2022

2123
Fix
2224
~~~

numcodecs/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,10 @@
9898
from numcodecs.msgpacks import MsgPack
9999
register_codec(MsgPack)
100100

101-
from numcodecs.checksum32 import CRC32, Adler32
101+
from numcodecs.checksum32 import CRC32, Adler32, JenkinsLookup3
102102
register_codec(CRC32)
103103
register_codec(Adler32)
104+
register_codec(JenkinsLookup3)
104105

105106
from numcodecs.json import JSON
106107
register_codec(JSON)

numcodecs/checksum32.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33

44
import numpy as np
5+
import struct
56

67

78
from .abc import Codec
89
from .compat import ensure_contiguous_ndarray, ndarray_copy
10+
from .jenkins import jenkins_lookup3
911

1012

1113
class Checksum32(Codec):
@@ -40,3 +42,58 @@ class Adler32(Checksum32):
4042

4143
codec_id = 'adler32'
4244
checksum = zlib.adler32
45+
46+
47+
class JenkinsLookup3(Checksum32):
48+
"""Bob Jenkin's lookup3 checksum with 32-bit output
49+
50+
This is the HDF5 implementation.
51+
https://github.com/HDFGroup/hdf5/blob/577c192518598c7e2945683655feffcdbdf5a91b/src/H5checksum.c#L378-L472
52+
53+
With this codec, the checksum is concatenated on the end of the data
54+
bytes when encoded. At decode time, the checksum is performed on
55+
the data portion and compared with the four-byte checksum, raising
56+
RuntimeError if inconsistent.
57+
58+
Attributes:
59+
initval: initial seed passed to the hash algorithm, default: 0
60+
prefix: bytes prepended to the buffer before evaluating the hash, default: None
61+
"""
62+
63+
checksum = jenkins_lookup3
64+
codec_id = "jenkins_lookup3"
65+
66+
def __init__(self, initval: int = 0, prefix=None):
67+
self.initval = initval
68+
if prefix is None:
69+
self.prefix = None
70+
else:
71+
self.prefix = np.frombuffer(prefix, dtype='uint8')
72+
73+
def encode(self, buf):
74+
"""Return buffer plus 4-byte Bob Jenkin's lookup3 checksum"""
75+
buf = ensure_contiguous_ndarray(buf).ravel().view('uint8')
76+
if self.prefix is None:
77+
val = jenkins_lookup3(buf, self.initval)
78+
else:
79+
val = jenkins_lookup3(np.hstack((self.prefix, buf)), self.initval)
80+
return buf.tobytes() + struct.pack("<I", val)
81+
82+
def decode(self, buf, out=None):
83+
"""Check Bob Jenkin's lookup3 checksum, and return buffer without it"""
84+
b = ensure_contiguous_ndarray(buf).view('uint8')
85+
if self.prefix is None:
86+
val = jenkins_lookup3(b[:-4], self.initval)
87+
else:
88+
val = jenkins_lookup3(np.hstack((self.prefix, b[:-4])), self.initval)
89+
found = b[-4:].view("<u4")[0]
90+
if val != found:
91+
raise RuntimeError(
92+
f"The Bob Jenkin's lookup3 checksum of the data ({val}) did not"
93+
f" match the expected checksum ({found}).\n"
94+
"This could be a sign that the data has been corrupted."
95+
)
96+
if out is not None:
97+
out.view("uint8")[:] = b[:-4]
98+
return out
99+
return memoryview(b[:-4])

0 commit comments

Comments
 (0)