@@ -43,20 +43,11 @@ typedef struct {
4343 PyObject * unused_data ;
4444
4545 /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
46- char needs_input ;
47-
48- /* For decompress(), 0 or 1.
49- 1 when both input and output streams are at a frame edge, means a
50- frame is completely decoded and fully flushed, or the decompressor
51- just be initialized. */
52- char at_frame_edge ;
46+ bool needs_input ;
5347
5448 /* For ZstdDecompressor, 0 or 1.
5549 1 means the end of the first frame has been reached. */
56- char eof ;
57-
58- /* Used for fast reset above three variables */
59- char _unused_char_for_align ;
50+ bool eof ;
6051
6152 /* __init__ has been called, 0 or 1. */
6253 bool initialized ;
@@ -258,19 +249,13 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
258249 return 0 ;
259250}
260251
261- typedef enum {
262- TYPE_DECOMPRESSOR , // <D>, ZstdDecompressor class
263- TYPE_ENDLESS_DECOMPRESSOR , // <E>, decompress() function
264- } decompress_type ;
265-
266252/*
267- Given the two types of decompressors (defined above),
268- decompress implementation for <D>, <E>, pseudo code:
253+ Decompress implementation in pseudo code:
269254
270255 initialize_output_buffer
271256 while True:
272257 decompress_data
273- set_object_flag # .eof for <D>, .at_frame_edge for <E>.
258+ set_object_flag # .eof
274259
275260 if output_buffer_exhausted:
276261 if output_buffer_reached_max_length:
@@ -287,63 +272,19 @@ typedef enum {
287272 flushing to do to complete current frame.
288273
289274 Note, decompressing "an empty input" in any case will make it > 0.
290-
291- <E> supports multiple frames, has an .at_frame_edge flag, it means both the
292- input and output streams are at a frame edge. The flag can be set by this
293- statement:
294-
295- .at_frame_edge = (zstd_ret == 0) ? 1 : 0
296-
297- But if decompressing "an empty input" at "a frame edge", zstd_ret will be
298- non-zero, then .at_frame_edge will be wrongly set to false. To solve this
299- problem, two AFE checks are needed to ensure that: when at "a frame edge",
300- empty input will not be decompressed.
301-
302- // AFE check
303- if (self->at_frame_edge && in->pos == in->size) {
304- finish
305- }
306-
307- In <E>, if .at_frame_edge is eventually set to true, but input stream has
308- unconsumed data (in->pos < in->size), then the outer function
309- stream_decompress() will set .at_frame_edge to false. In this case,
310- although the output stream is at a frame edge, for the caller, the input
311- stream is not at a frame edge, see below diagram. This behavior does not
312- affect the next AFE check, since (in->pos < in->size).
313-
314- input stream: --------------|---
315- ^
316- output stream: ====================|
317- ^
318275*/
319276static PyObject *
320277decompress_impl (ZstdDecompressor * self , ZSTD_inBuffer * in ,
321- Py_ssize_t max_length ,
322- Py_ssize_t initial_size ,
323- decompress_type type )
278+ Py_ssize_t max_length )
324279{
325280 size_t zstd_ret ;
326281 ZSTD_outBuffer out ;
327282 _BlocksOutputBuffer buffer = {.list = NULL };
328283 PyObject * ret ;
329284
330- /* The first AFE check for setting .at_frame_edge flag */
331- if (type == TYPE_ENDLESS_DECOMPRESSOR ) {
332- if (self -> at_frame_edge && in -> pos == in -> size ) {
333- return Py_GetConstant (Py_CONSTANT_EMPTY_BYTES );
334- }
335- }
336-
337285 /* Initialize the output buffer */
338- if (initial_size >= 0 ) {
339- if (_OutputBuffer_InitWithSize (& buffer , & out , max_length , initial_size ) < 0 ) {
340- goto error ;
341- }
342- }
343- else {
344- if (_OutputBuffer_InitAndGrow (& buffer , & out , max_length ) < 0 ) {
345- goto error ;
346- }
286+ if (_OutputBuffer_InitAndGrow (& buffer , & out , max_length ) < 0 ) {
287+ goto error ;
347288 }
348289 assert (out .pos == 0 );
349290
@@ -362,22 +303,11 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
362303 goto error ;
363304 }
364305
365- /* Set .eof/.af_frame_edge flag */
366- if (type == TYPE_DECOMPRESSOR ) {
367- /* ZstdDecompressor class stops when a frame is decompressed */
368- if (zstd_ret == 0 ) {
369- self -> eof = 1 ;
370- break ;
371- }
372- }
373- else if (type == TYPE_ENDLESS_DECOMPRESSOR ) {
374- /* decompress() function supports multiple frames */
375- self -> at_frame_edge = (zstd_ret == 0 ) ? 1 : 0 ;
376-
377- /* The second AFE check for setting .at_frame_edge flag */
378- if (self -> at_frame_edge && in -> pos == in -> size ) {
379- break ;
380- }
306+ /* Set .eof flag */
307+ if (zstd_ret == 0 ) {
308+ /* Stop when a frame is decompressed */
309+ self -> eof = 1 ;
310+ break ;
381311 }
382312
383313 /* Need to check out before in. Maybe zstd's internal buffer still has
@@ -415,8 +345,7 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
415345}
416346
417347static void
418- decompressor_reset_session (ZstdDecompressor * self ,
419- decompress_type type )
348+ decompressor_reset_session (ZstdDecompressor * self )
420349{
421350 // TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here
422351 // and ensure lock is always held
@@ -425,56 +354,28 @@ decompressor_reset_session(ZstdDecompressor *self,
425354 self -> in_begin = 0 ;
426355 self -> in_end = 0 ;
427356
428- if (type == TYPE_DECOMPRESSOR ) {
429- Py_CLEAR (self -> unused_data );
430- }
357+ Py_CLEAR (self -> unused_data );
431358
432359 /* Reset variables in one operation */
433360 self -> needs_input = 1 ;
434- self -> at_frame_edge = 1 ;
435361 self -> eof = 0 ;
436- self -> _unused_char_for_align = 0 ;
437362
438- /* Resetting session never fail */
363+ /* Resetting session is guaranteed to never fail */
439364 ZSTD_DCtx_reset (self -> dctx , ZSTD_reset_session_only );
440365}
441366
442367static PyObject *
443- stream_decompress (ZstdDecompressor * self , Py_buffer * data , Py_ssize_t max_length ,
444- decompress_type type )
368+ stream_decompress (ZstdDecompressor * self , Py_buffer * data , Py_ssize_t max_length )
445369{
446- Py_ssize_t initial_buffer_size = -1 ;
447370 ZSTD_inBuffer in ;
448371 PyObject * ret = NULL ;
449372 int use_input_buffer ;
450373
451- if (type == TYPE_DECOMPRESSOR ) {
452- /* Check .eof flag */
453- if (self -> eof ) {
454- PyErr_SetString (PyExc_EOFError , "Already at the end of a zstd frame." );
455- assert (ret == NULL );
456- goto success ;
457- }
458- }
459- else if (type == TYPE_ENDLESS_DECOMPRESSOR ) {
460- /* Fast path for the first frame */
461- if (self -> at_frame_edge && self -> in_begin == self -> in_end ) {
462- /* Read decompressed size */
463- uint64_t decompressed_size = ZSTD_getFrameContentSize (data -> buf , data -> len );
464-
465- /* These two zstd constants always > PY_SSIZE_T_MAX:
466- ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1)
467- ZSTD_CONTENTSIZE_ERROR is (0ULL - 2)
468-
469- Use ZSTD_findFrameCompressedSize() to check complete frame,
470- prevent allocating too much memory for small input chunk. */
471-
472- if (decompressed_size <= (uint64_t ) PY_SSIZE_T_MAX &&
473- !ZSTD_isError (ZSTD_findFrameCompressedSize (data -> buf , data -> len )) )
474- {
475- initial_buffer_size = (Py_ssize_t ) decompressed_size ;
476- }
477- }
374+ /* Check .eof flag */
375+ if (self -> eof ) {
376+ PyErr_SetString (PyExc_EOFError , "Already at the end of a zstd frame." );
377+ assert (ret == NULL );
378+ return NULL ;
478379 }
479380
480381 /* Prepare input buffer w/wo unconsumed data */
@@ -561,30 +462,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
561462 assert (in .pos == 0 );
562463
563464 /* Decompress */
564- ret = decompress_impl (self , & in ,
565- max_length , initial_buffer_size ,
566- type );
465+ ret = decompress_impl (self , & in , max_length );
567466 if (ret == NULL ) {
568467 goto error ;
569468 }
570469
571470 /* Unconsumed input data */
572471 if (in .pos == in .size ) {
573- if (type == TYPE_DECOMPRESSOR ) {
574- if (Py_SIZE (ret ) == max_length || self -> eof ) {
575- self -> needs_input = 0 ;
576- }
577- else {
578- self -> needs_input = 1 ;
579- }
472+ if (Py_SIZE (ret ) == max_length || self -> eof ) {
473+ self -> needs_input = 0 ;
580474 }
581- else if (type == TYPE_ENDLESS_DECOMPRESSOR ) {
582- if (Py_SIZE (ret ) == max_length && !self -> at_frame_edge ) {
583- self -> needs_input = 0 ;
584- }
585- else {
586- self -> needs_input = 1 ;
587- }
475+ else {
476+ self -> needs_input = 1 ;
588477 }
589478
590479 if (use_input_buffer ) {
@@ -598,10 +487,6 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
598487
599488 self -> needs_input = 0 ;
600489
601- if (type == TYPE_ENDLESS_DECOMPRESSOR ) {
602- self -> at_frame_edge = 0 ;
603- }
604-
605490 if (!use_input_buffer ) {
606491 /* Discard buffer if it's too small
607492 (resizing it may needlessly copy the current contents) */
@@ -634,16 +519,14 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
634519 }
635520 }
636521
637- goto success ;
522+ return ret ;
638523
639524error :
640525 /* Reset decompressor's states/session */
641- decompressor_reset_session (self , type );
526+ decompressor_reset_session (self );
642527
643528 Py_CLEAR (ret );
644- success :
645-
646- return ret ;
529+ return NULL ;
647530}
648531
649532
@@ -668,9 +551,6 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
668551 /* needs_input flag */
669552 self -> needs_input = 1 ;
670553
671- /* at_frame_edge flag */
672- self -> at_frame_edge = 1 ;
673-
674554 /* Decompression context */
675555 self -> dctx = ZSTD_createDCtx ();
676556 if (self -> dctx == NULL ) {
@@ -837,7 +717,7 @@ _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
837717 /* Thread-safe code */
838718 Py_BEGIN_CRITICAL_SECTION (self );
839719
840- ret = stream_decompress (self , data , max_length , TYPE_DECOMPRESSOR );
720+ ret = stream_decompress (self , data , max_length );
841721 Py_END_CRITICAL_SECTION ();
842722 return ret ;
843723}
0 commit comments