Skip to content

Commit d32ec2d

Browse files
committed
Add support for multiple zstd frames in decompression
1 parent 4fdb625 commit d32ec2d

File tree

2 files changed

+48
-3
lines changed

2 files changed

+48
-3
lines changed

numcodecs/tests/test_zstd.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,16 @@ def test_native_functions():
9090
assert Zstd.default_level() == 3
9191
assert Zstd.min_level() == -131072
9292
assert Zstd.max_level() == 22
93+
94+
def test_multi_frame():
95+
codec = Zstd()
96+
97+
hello_world = codec.encode(b"Hello world!")
98+
assert codec.decode(hello_world) == b"Hello world!"
99+
assert codec.decode(hello_world*2) == b"Hello world!Hello world!"
100+
101+
hola = codec.encode(b"Hola ")
102+
mundo = codec.encode(b"Mundo!")
103+
assert codec.decode(hola) == b"Hola "
104+
assert codec.decode(mundo) == b"Mundo!"
105+
assert codec.decode(hola+mundo) == b"Hola Mundo!"

numcodecs/zstd.pyx

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,11 @@ cdef extern from "zstd.h":
4242
const void* src,
4343
size_t compressedSize) nogil
4444

45-
cdef long ZSTD_CONTENTSIZE_UNKNOWN
46-
cdef long ZSTD_CONTENTSIZE_ERROR
45+
cdef unsigned long long ZSTD_CONTENTSIZE_UNKNOWN
46+
cdef unsigned long long ZSTD_CONTENTSIZE_ERROR
4747
unsigned long long ZSTD_getFrameContentSize(const void* src,
4848
size_t srcSize) nogil
49+
size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize) nogil
4950

5051
int ZSTD_minCLevel() nogil
5152
int ZSTD_maxCLevel() nogil
@@ -186,7 +187,11 @@ def decompress(source, dest=None):
186187
try:
187188

188189
# determine uncompressed size
189-
dest_size = ZSTD_getFrameContentSize(source_ptr, source_size)
190+
try:
191+
dest_size = findTotalContentSize(source_ptr, source_size)
192+
except RuntimeError:
193+
raise RuntimeError('Zstd decompression error: invalid input data')
194+
190195
if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR:
191196
raise RuntimeError('Zstd decompression error: invalid input data')
192197

@@ -226,6 +231,33 @@ def decompress(source, dest=None):
226231
return dest
227232

228233

234+
cdef findTotalContentSize(const void* source_ptr, size_t source_size):
235+
cdef:
236+
unsigned long long frame_content_size = 0
237+
unsigned long long total_content_size = 0
238+
size_t frame_compressed_size = 0
239+
size_t offset = 0
240+
241+
while offset < source_size:
242+
frame_compressed_size = ZSTD_findFrameCompressedSize(source_ptr + offset, source_size - offset);
243+
244+
if ZSTD_isError(frame_compressed_size):
245+
error = ZSTD_getErrorName(frame_compressed_size)
246+
raise RuntimeError('Could not set determine zstd frame size: %s' % error)
247+
248+
frame_content_size = ZSTD_getFrameContentSize(source_ptr + offset, frame_compressed_size);
249+
250+
if frame_content_size == ZSTD_CONTENTSIZE_ERROR:
251+
return ZSTD_CONTENTSIZE_ERROR
252+
253+
if frame_content_size == ZSTD_CONTENTSIZE_UNKNOWN:
254+
return ZSTD_CONTENTSIZE_UNKNOWN
255+
256+
total_content_size += frame_content_size
257+
offset += frame_compressed_size
258+
259+
return total_content_size
260+
229261
class Zstd(Codec):
230262
"""Codec providing compression using Zstandard.
231263

0 commit comments

Comments
 (0)