Skip to content

Commit 8ebb7a2

Browse files
committed
Add support for multiple zstd frames in decompression
1 parent 506c89b commit 8ebb7a2

File tree

2 files changed

+49
-4
lines changed

2 files changed

+49
-4
lines changed

numcodecs/tests/test_zstd.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,6 @@ def test_native_functions():
8787
assert Zstd.min_level() == -131072
8888
assert Zstd.max_level() == 22
8989

90-
9190
def test_streaming_decompression():
9291
# Test input frames with unknown frame content size
9392
codec = Zstd()
@@ -156,3 +155,16 @@ def zstd_cli_available() -> bool:
156155
return not subprocess.run(
157156
["zstd", "-V"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
158157
).returncode
158+
159+
def test_multi_frame():
160+
codec = Zstd()
161+
162+
hello_world = codec.encode(b"Hello world!")
163+
assert codec.decode(hello_world) == b"Hello world!"
164+
assert codec.decode(hello_world*2) == b"Hello world!Hello world!"
165+
166+
hola = codec.encode(b"Hola ")
167+
mundo = codec.encode(b"Mundo!")
168+
assert codec.decode(hola) == b"Hola "
169+
assert codec.decode(mundo) == b"Mundo!"
170+
assert codec.decode(hola+mundo) == b"Hola Mundo!"

numcodecs/zstd.pyx

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,12 @@ cdef extern from "zstd.h":
6868
size_t ZSTD_freeDStream(ZSTD_DStream* zds) nogil
6969
size_t ZSTD_initDStream(ZSTD_DStream* zds) nogil
7070

71-
cdef long ZSTD_CONTENTSIZE_UNKNOWN
72-
cdef long ZSTD_CONTENTSIZE_ERROR
71+
cdef unsigned long long ZSTD_CONTENTSIZE_UNKNOWN
72+
cdef unsigned long long ZSTD_CONTENTSIZE_ERROR
73+
7374
unsigned long long ZSTD_getFrameContentSize(const void* src,
7475
size_t srcSize) nogil
76+
size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) nogil
7577

7678
int ZSTD_minCLevel() nogil
7779
int ZSTD_maxCLevel() nogil
@@ -216,7 +218,11 @@ def decompress(source, dest=None):
216218
try:
217219

218220
# determine uncompressed size
219-
dest_size = ZSTD_getFrameContentSize(source_ptr, source_size)
221+
try:
222+
dest_size = findTotalContentSize(source_ptr, source_size)
223+
except RuntimeError:
224+
raise RuntimeError('Zstd decompression error: invalid input data')
225+
220226
if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_ERROR:
221227
raise RuntimeError('Zstd decompression error: invalid input data')
222228

@@ -353,6 +359,33 @@ cdef stream_decompress(const Py_buffer* source_pb):
353359

354360
return dest
355361

362+
cdef findTotalContentSize(const void* source_ptr, size_t source_size):
363+
cdef:
364+
unsigned long long frame_content_size = 0
365+
unsigned long long total_content_size = 0
366+
size_t frame_compressed_size = 0
367+
size_t offset = 0
368+
369+
while offset < source_size:
370+
frame_compressed_size = ZSTD_findFrameCompressedSize(source_ptr + offset, source_size - offset);
371+
372+
if ZSTD_isError(frame_compressed_size):
373+
error = ZSTD_getErrorName(frame_compressed_size)
374+
raise RuntimeError('Could not set determine zstd frame size: %s' % error)
375+
376+
frame_content_size = ZSTD_getFrameContentSize(source_ptr + offset, frame_compressed_size);
377+
378+
if frame_content_size == ZSTD_CONTENTSIZE_ERROR:
379+
return ZSTD_CONTENTSIZE_ERROR
380+
381+
if frame_content_size == ZSTD_CONTENTSIZE_UNKNOWN:
382+
return ZSTD_CONTENTSIZE_UNKNOWN
383+
384+
total_content_size += frame_content_size
385+
offset += frame_compressed_size
386+
387+
return total_content_size
388+
356389
class Zstd(Codec):
357390
"""Codec providing compression using Zstandard.
358391

0 commit comments

Comments
 (0)