|
2 | 2 |
|
3 | 3 |
|
4 | 4 | import numpy as np
|
| 5 | +import struct |
5 | 6 |
|
6 | 7 |
|
7 | 8 | from .abc import Codec
|
8 | 9 | from .compat import ensure_contiguous_ndarray, ndarray_copy
|
| 10 | +from .jenkins import jenkins_lookup3 |
9 | 11 |
|
10 | 12 |
|
11 | 13 | class Checksum32(Codec):
|
@@ -40,3 +42,58 @@ class Adler32(Checksum32):
|
40 | 42 |
|
41 | 43 | codec_id = 'adler32'
|
42 | 44 | checksum = zlib.adler32
|
| 45 | + |
| 46 | + |
| 47 | +class JenkinsLookup3(Checksum32): |
| 48 | + """Bob Jenkin's lookup3 checksum with 32-bit output |
| 49 | +
|
| 50 | + This is the HDF5 implementation. |
| 51 | + https://github.com/HDFGroup/hdf5/blob/577c192518598c7e2945683655feffcdbdf5a91b/src/H5checksum.c#L378-L472 |
| 52 | +
|
| 53 | + With this codec, the checksum is concatenated on the end of the data |
| 54 | + bytes when encoded. At decode time, the checksum is performed on |
| 55 | + the data portion and compared with the four-byte checksum, raising |
| 56 | + RuntimeError if inconsistent. |
| 57 | +
|
| 58 | + Attributes: |
| 59 | + initval: initial seed passed to the hash algorithm, default: 0 |
| 60 | + prefix: bytes prepended to the buffer before evaluating the hash, default: None |
| 61 | + """ |
| 62 | + |
| 63 | + checksum = jenkins_lookup3 |
| 64 | + codec_id = "jenkins_lookup3" |
| 65 | + |
| 66 | + def __init__(self, initval: int = 0, prefix=None): |
| 67 | + self.initval = initval |
| 68 | + if prefix is None: |
| 69 | + self.prefix = None |
| 70 | + else: |
| 71 | + self.prefix = np.frombuffer(prefix, dtype='uint8') |
| 72 | + |
| 73 | + def encode(self, buf): |
| 74 | + """Return buffer plus 4-byte Bob Jenkin's lookup3 checksum""" |
| 75 | + buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') |
| 76 | + if self.prefix is None: |
| 77 | + val = jenkins_lookup3(buf, self.initval) |
| 78 | + else: |
| 79 | + val = jenkins_lookup3(np.hstack((self.prefix, buf)), self.initval) |
| 80 | + return buf.tobytes() + struct.pack("<I", val) |
| 81 | + |
| 82 | + def decode(self, buf, out=None): |
| 83 | + """Check Bob Jenkin's lookup3 checksum, and return buffer without it""" |
| 84 | + b = ensure_contiguous_ndarray(buf).view('uint8') |
| 85 | + if self.prefix is None: |
| 86 | + val = jenkins_lookup3(b[:-4], self.initval) |
| 87 | + else: |
| 88 | + val = jenkins_lookup3(np.hstack((self.prefix, b[:-4])), self.initval) |
| 89 | + found = b[-4:].view("<u4")[0] |
| 90 | + if val != found: |
| 91 | + raise RuntimeError( |
| 92 | + f"The Bob Jenkin's lookup3 checksum of the data ({val}) did not" |
| 93 | + f" match the expected checksum ({found}).\n" |
| 94 | + "This could be a sign that the data has been corrupted." |
| 95 | + ) |
| 96 | + if out is not None: |
| 97 | + out.view("uint8")[:] = b[:-4] |
| 98 | + return out |
| 99 | + return memoryview(b[:-4]) |
0 commit comments