Skip to content

Commit 7bc8022

Browse files
committed
Avoid excess copies in fletcher32
During encoding preallocate a `bytes` object for the final result and write everything directly into it. This avoids unnecessary staging and copying of intermediate results. Make use of Cython typed-`memoryview`s throughout encode and decode for efficient access of the underlying data. Further leverage the `store_le32` and `load_le32` functions to quickly pack and unpack little-endian 32-bit unsigned integers from buffers when encoding and decoding.
1 parent d0a7721 commit 7bc8022

File tree

1 file changed

+20
-7
lines changed

1 file changed

+20
-7
lines changed

numcodecs/fletcher32.pyx

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
# cython: language_level=3
22
# cython: overflowcheck=False
33
# cython: cdivision=True
4-
import struct
54

65
from numcodecs.abc import Codec
76
from numcodecs.compat import ensure_contiguous_ndarray
87

98
from libc.stdint cimport uint8_t, uint16_t, uint32_t
109

10+
from ._utils cimport store_le32, load_le32
11+
1112

1213
cdef uint32_t _fletcher32(const uint8_t[::1] _data):
1314
# converted from
@@ -64,22 +65,34 @@ class Fletcher32(Codec):
6465
"""Return buffer plus 4-byte fletcher checksum"""
6566
buf = ensure_contiguous_ndarray(buf).ravel().view('uint8')
6667
cdef const uint8_t[::1] b_mv = buf
67-
val = _fletcher32(b_mv)
68-
return buf.tobytes() + struct.pack("<I", val)
68+
cdef Py_ssize_t b_len = len(b_mv)
69+
70+
cdef Py_ssize_t out_len = b_len + 4
71+
cdef bytes out = PyBytes_FromStringAndSize(NULL, out_len)
72+
cdef uint8_t* out_ptr = <uint8_t*>out
73+
cdef uint8_t[::1] out_mv = (<uint8_t[:(out_len + 1):1]>out_ptr)[:out_len]
74+
75+
out_mv[:-4] = b_mv
76+
store_le32(&out_mv[-4], _fletcher32(b_mv))
77+
78+
return out
6979

7080
def decode(self, buf, out=None):
7181
"""Check fletcher checksum, and return buffer without it"""
7282
b = ensure_contiguous_ndarray(buf).view('uint8')
73-
cdef const uint8_t[::1] b_mv = b[:-4]
74-
val = _fletcher32(b_mv)
75-
found = b[-4:].view("<u4")[0]
83+
cdef const uint8_t[::1] b_mv = b
84+
val = _fletcher32(b_mv[:-4])
85+
found = load_le32(&b_mv[-4])
7686
if val != found:
7787
raise RuntimeError(
7888
f"The fletcher32 checksum of the data ({val}) did not"
7989
f" match the expected checksum ({found}).\n"
8090
"This could be a sign that the data has been corrupted."
8191
)
92+
93+
cdef uint8_t[::1] out_mv
8294
if out is not None:
83-
out.view("uint8")[:] = b[:-4]
95+
out_mv = ensure_contiguous_ndarray(out).view("uint8")
96+
out_mv[:] = b_mv[:-4]
8497
return out
8598
return memoryview(b[:-4])

0 commit comments

Comments
 (0)