@@ -13,6 +13,7 @@ from .compat_ext cimport PyBytes_RESIZE, ensure_continguous_memoryview
13
13
from .compat import ensure_contiguous_ndarray
14
14
from .abc import Codec
15
15
16
+ from libc.stdlib cimport malloc, realloc, free
16
17
17
18
cdef extern from " zstd.h" :
18
19
@@ -21,6 +22,23 @@ cdef extern from "zstd.h":
21
22
struct ZSTD_CCtx_s:
22
23
pass
23
24
ctypedef ZSTD_CCtx_s ZSTD_CCtx
25
+
26
+ struct ZSTD_DStream_s:
27
+ pass
28
+ ctypedef ZSTD_DStream_s ZSTD_DStream
29
+
30
+ struct ZSTD_inBuffer_s:
31
+ const void * src
32
+ size_t size
33
+ size_t pos
34
+ ctypedef ZSTD_inBuffer_s ZSTD_inBuffer
35
+
36
+ struct ZSTD_outBuffer_s:
37
+ void * dst
38
+ size_t size
39
+ size_t pos
40
+ ctypedef ZSTD_outBuffer_s ZSTD_outBuffer
41
+
24
42
cdef enum ZSTD_cParameter:
25
43
ZSTD_c_compressionLevel= 100
26
44
ZSTD_c_checksumFlag= 201
@@ -36,12 +54,20 @@ cdef extern from "zstd.h":
36
54
size_t dstCapacity,
37
55
const void * src,
38
56
size_t srcSize) nogil
39
-
40
57
size_t ZSTD_decompress(void * dst,
41
58
size_t dstCapacity,
42
59
const void * src,
43
60
size_t compressedSize) nogil
44
61
62
+ size_t ZSTD_decompressStream(ZSTD_DStream* zds,
63
+ ZSTD_outBuffer* output,
64
+ ZSTD_inBuffer* input ) nogil
65
+
66
+ size_t ZSTD_DStreamOutSize() nogil
67
+ ZSTD_DStream* ZSTD_createDStream() nogil
68
+ size_t ZSTD_freeDStream(ZSTD_DStream* zds) nogil
69
+ size_t ZSTD_initDStream(ZSTD_DStream* zds) nogil
70
+
45
71
cdef long ZSTD_CONTENTSIZE_UNKNOWN
46
72
cdef long ZSTD_CONTENTSIZE_ERROR
47
73
unsigned long long ZSTD_getFrameContentSize(const void * src,
@@ -55,7 +81,7 @@ cdef extern from "zstd.h":
55
81
56
82
unsigned ZSTD_isError(size_t code) nogil
57
83
58
- const char * ZSTD_getErrorName(size_t code)
84
+ const char * ZSTD_getErrorName(size_t code) nogil
59
85
60
86
61
87
VERSION_NUMBER = ZSTD_versionNumber()
@@ -157,7 +183,10 @@ def decompress(source, dest=None):
157
183
source : bytes-like
158
184
Compressed data. Can be any object supporting the buffer protocol.
159
185
dest : array-like, optional
160
- Object to decompress into.
186
+ Object to decompress into. If the content size is unknown, the
187
+ length of dest must match the decompressed size. If the content size
188
+ is unknown and dest is not provided, streaming decompression will be
189
+ used.
161
190
162
191
Returns
163
192
-------
@@ -174,6 +203,7 @@ def decompress(source, dest=None):
174
203
char * dest_ptr
175
204
size_t source_size, dest_size, decompressed_size
176
205
size_t nbytes, cbytes, blocksize
206
+ size_t dest_nbytes
177
207
178
208
# obtain source memoryview
179
209
source_mv = ensure_continguous_memoryview(source)
@@ -187,9 +217,12 @@ def decompress(source, dest=None):
187
217
188
218
# determine uncompressed size
189
219
dest_size = ZSTD_getFrameContentSize(source_ptr, source_size)
190
- if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR:
220
+ if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_ERROR:
191
221
raise RuntimeError (' Zstd decompression error: invalid input data' )
192
222
223
+ if dest_size == ZSTD_CONTENTSIZE_UNKNOWN and dest is None :
224
+ return stream_decompress(source_pb)
225
+
193
226
# setup destination buffer
194
227
if dest is None :
195
228
# allocate memory
@@ -203,6 +236,9 @@ def decompress(source, dest=None):
203
236
dest_ptr = < char * > dest_pb.buf
204
237
dest_nbytes = dest_pb.len
205
238
239
+ if dest_size == ZSTD_CONTENTSIZE_UNKNOWN:
240
+ dest_size = dest_nbytes
241
+
206
242
# validate output buffer
207
243
if dest_nbytes < dest_size:
208
244
raise ValueError (' destination buffer too small; expected at least %s , '
@@ -225,6 +261,97 @@ def decompress(source, dest=None):
225
261
226
262
return dest
227
263
264
+ cdef stream_decompress(const Py_buffer* source_pb):
265
+ """ Decompress data of unknown size
266
+
267
+ Parameters
268
+ ----------
269
+ source : Py_buffer
270
+ Compressed data buffer
271
+
272
+ Returns
273
+ -------
274
+ dest : bytes
275
+ Object containing decompressed data.
276
+ """
277
+
278
+ cdef:
279
+ const char * source_ptr
280
+ void * dest_ptr
281
+ void * new_dst
282
+ size_t source_size, dest_size, decompressed_size
283
+ size_t DEST_GROWTH_SIZE, status
284
+ ZSTD_inBuffer input
285
+ ZSTD_outBuffer output
286
+ ZSTD_DStream * zds
287
+
288
+ # Recommended size for output buffer, guaranteed to flush at least
289
+ # one completely block in all circumstances
290
+ DEST_GROWTH_SIZE = ZSTD_DStreamOutSize();
291
+
292
+ source_ptr = < const char * > source_pb.buf
293
+ source_size = source_pb.len
294
+
295
+ # unknown content size, guess it is twice the size as the source
296
+ dest_size = source_size * 2
297
+
298
+ if dest_size < DEST_GROWTH_SIZE:
299
+ # minimum dest_size is DEST_GROWTH_SIZE
300
+ dest_size = DEST_GROWTH_SIZE
301
+
302
+ dest_ptr = < char * > malloc(dest_size)
303
+ zds = ZSTD_createDStream()
304
+
305
+ try :
306
+
307
+ with nogil:
308
+
309
+ status = ZSTD_initDStream(zds)
310
+ if ZSTD_isError(status):
311
+ error = ZSTD_getErrorName(status)
312
+ ZSTD_freeDStream(zds);
313
+ raise RuntimeError (' Zstd stream decompression error on ZSTD_initDStream: %s ' % error)
314
+
315
+ input = ZSTD_inBuffer(source_ptr, source_size, 0 )
316
+ output = ZSTD_outBuffer(dest_ptr, dest_size, 0 )
317
+
318
+ # Initialize to 1 to force a loop iteration
319
+ status = 1
320
+ while (status > 0 or input .pos < input .size):
321
+ # Possible returned values of ZSTD_decompressStream:
322
+ # 0: frame is completely decoded and fully flushed
323
+ # error (<0)
324
+ # >0: suggested next input size
325
+ status = ZSTD_decompressStream(zds, & output, & input )
326
+
327
+ if ZSTD_isError(status):
328
+ error = ZSTD_getErrorName(status)
329
+ raise RuntimeError (' Zstd stream decompression error on ZSTD_decompressStream: %s ' % error)
330
+
331
+ # There is more to decompress, grow the buffer
332
+ if status > 0 and output.pos == output.size:
333
+ new_size = output.size + DEST_GROWTH_SIZE
334
+
335
+ if new_size < output.size or new_size < DEST_GROWTH_SIZE:
336
+ raise RuntimeError (' Zstd stream decompression error: output buffer overflow' )
337
+
338
+ new_dst = realloc(output.dst, new_size)
339
+
340
+ if new_dst == NULL :
341
+ # output.dst freed in finally block
342
+ raise RuntimeError (' Zstd stream decompression error on realloc: could not expand output buffer' )
343
+
344
+ output.dst = new_dst
345
+ output.size = new_size
346
+
347
+ # Copy the output to a bytes object
348
+ dest = PyBytes_FromStringAndSize(< char * > output.dst, output.pos)
349
+
350
+ finally :
351
+ ZSTD_freeDStream(zds)
352
+ free(output.dst)
353
+
354
+ return dest
228
355
229
356
class Zstd (Codec ):
230
357
""" Codec providing compression using Zstandard.
0 commit comments