24
24
"""
25
25
Implementation of the zlib module using the ISA-L libraries.
26
26
"""
27
+ # ##############################################################################
28
+ # ## README FOR DEVELOPERS: IMPLEMENTATION OF THIS MODULE ###
29
+ # ##############################################################################
30
+ #
31
+ # This module is implemented with zlibmodule.c as example. Since there is a lot
32
+ # of duplication in zlibmodule.c there is a lot of duplication in this module.
33
+ # It is not always possible to route repetitive calls to a subroutine.
34
+ # Therefore the main methods are explained here.
35
+ #
36
+ # All compress, decompress and flush implementations are basically the same.
37
+ # 1. Get a buffer from the input data
38
+ # 2. Initialise an output buffer
39
+ # 3. Initialise a isal_zstream(for compression) or inflate_state (for
40
+ # decompression). Hereafter referred as stream.
41
+ # 4. The amount of available input bytes is set on the stream. This is either
42
+ # the maximum amount possible (in the case the input data is equal or larger
43
+ # than the maximum amount). Or the length of the (remaining) input data.
44
+ # 5. The amount of available output bytes is set on the stream. Buffer is
45
+ # enlarged as needed.
46
+ # 6. inflate/deflate/flush action
47
+ # 7. Check for errors in the action.
48
+ # 8. Was the output buffer completely filled? (stream.avail_out == 0). If so go
49
+ # to 5. Special case: decompressobj. If the output buffer is at max_length
50
+ # continue.
51
+ # 9. Was all the input read? if not go to 4. Alternatively in the case of
52
+ # decompression: was the end of the stream reached? if not go to 4.
53
+ # 10. In case of decompression with leftover input data. For a decompressobj
54
+ # this is stored in unconsumed_tail / unused_data.
55
+ # 11. Convert output buffer to bytes object and return.
56
+ #
57
+ # Errors are raised in the main functions as much as possible to prevent cdef
58
+ # functions returning PyObjects that need to be transformed in C variables.
59
+ # In cases where this is not possible, C variables were set using pointers.
60
+ # Allowing repeated use of functions while limiting the number of python
61
+ # interactions.
62
+ #
63
+ # ##############################################################################
64
+
65
+
27
66
import warnings
28
67
import zlib
29
68
@@ -139,6 +178,9 @@ cdef Py_ssize_t arrange_output_buffer_with_maximum(stream_or_state *stream,
139
178
unsigned char ** buffer ,
140
179
Py_ssize_t length,
141
180
Py_ssize_t max_length):
181
+ # The zlibmodule.c function builds a PyBytes object. A unsigned char *
182
+ # is build here because building raw PyObject * stuff in cython is somewhat
183
+ # harder due to cython's interference. FIXME
142
184
cdef Py_ssize_t occupied
143
185
cdef Py_ssize_t new_length
144
186
cdef unsigned char * new_buffer
@@ -225,31 +267,21 @@ def compress(data,
225
267
# initialise helper variables
226
268
cdef int err
227
269
228
- # Implementation imitated from CPython's zlibmodule.c
229
270
try :
230
271
while True :
231
- # This loop runs n times (at least twice). n-1 times to fill the input
232
- # buffer with data. The nth time the input is empty. In that case
233
- # stream.flush is set to FULL_FLUSH and the end_of_stream is activated.
234
272
arrange_input_buffer(& stream, & ibuflen)
235
273
if ibuflen == 0 :
236
274
stream.flush = FULL_FLUSH
237
275
stream.end_of_stream = 1
238
276
else :
239
277
stream.flush = NO_FLUSH
240
278
241
- # This loop reads all the input bytes. The check is at the end,
242
- # because when flush = FULL_FLUSH the input buffer is empty. But
243
- # this loop still needs to run one time.
244
279
while True :
245
280
bufsize = arrange_output_buffer(& stream, & obuf, bufsize)
246
281
if bufsize == - 1 :
247
282
raise MemoryError (" Unsufficient memory for buffer allocation" )
248
283
err = isal_deflate(& stream)
249
284
if err != COMP_OK:
250
- # There is some python interacting when possible exceptions
251
- # Are raised. So we remain in pure C code if we check for
252
- # COMP_OK first.
253
285
check_isal_deflate_rc(err)
254
286
if stream.avail_out != 0 :
255
287
break
@@ -302,23 +334,16 @@ def decompress(data,
302
334
cdef unsigned char * obuf = NULL
303
335
cdef int err
304
336
305
- # Implementation imitated from CPython's zlibmodule.c
306
337
try :
307
338
while True :
308
339
arrange_input_buffer(& stream, & ibuflen)
309
340
310
- # This loop reads all the input bytes. The check is at the end,
311
- # because when the block state is not at FINISH, the function needs
312
- # to be called again.
313
341
while True :
314
342
bufsize = arrange_output_buffer(& stream, & obuf, bufsize)
315
343
if bufsize == - 1 :
316
344
raise MemoryError (" Unsufficient memory for buffer allocation" )
317
345
err = isal_inflate(& stream)
318
346
if err != ISAL_DECOMP_OK:
319
- # There is some python interacting when possible exceptions
320
- # Are raised. So we remain in pure C code if we check for
321
- # COMP_OK first.
322
347
check_isal_inflate_rc(err)
323
348
if stream.avail_out != 0 :
324
349
break
@@ -445,19 +470,13 @@ cdef class Compress:
445
470
cdef int err
446
471
try :
447
472
while True :
448
- # This loop runs n times (at least twice). n-1 times to fill the input
449
- # buffer with data. The nth time the input is empty. In that case
450
- # stream.flush is set to FULL_FLUSH and the end_of_stream is activated.
451
473
arrange_input_buffer(& self .stream, & ibuflen)
452
474
while True :
453
475
obuflen = arrange_output_buffer(& self .stream, & obuf, obuflen)
454
476
if obuflen== - 1 :
455
477
raise MemoryError (" Unsufficient memory for buffer allocation" )
456
478
err = isal_deflate(& self .stream)
457
479
if err != COMP_OK:
458
- # There is some python interacting when possible exceptions
459
- # Are raised. So we remain in pure C code if we check for
460
- # COMP_OK first.
461
480
check_isal_deflate_rc(err)
462
481
if self .stream.avail_out != 0 :
463
482
break
@@ -504,11 +523,8 @@ cdef class Compress:
504
523
raise MemoryError (" Unsufficient memory for buffer allocation" )
505
524
err = isal_deflate(& self .stream)
506
525
if err != COMP_OK:
507
- # There is some python interacting when possible exceptions
508
- # Are raised. So we remain in pure C code if we check for
509
- # COMP_OK first.
510
526
check_isal_deflate_rc(err)
511
- if self .stream.avail_out != 0 : # All input is processed and therefore all output flushed.
527
+ if self .stream.avail_out != 0 :
512
528
break
513
529
if self .stream.avail_in != 0 :
514
530
raise AssertionError (" There should be no available input after flushing." )
@@ -629,8 +645,6 @@ cdef class Decompress:
629
645
obuflen = hard_limit
630
646
631
647
try :
632
- # This loop reads all the input bytes. If there are no input bytes
633
- # anymore the output is written.
634
648
while True :
635
649
arrange_input_buffer(& self .stream, & ibuflen)
636
650
while True :
@@ -643,9 +657,6 @@ cdef class Decompress:
643
657
break
644
658
err = isal_inflate(& self .stream)
645
659
if err != ISAL_DECOMP_OK:
646
- # There is some python interacting when possible exceptions
647
- # Are raised. So we remain in pure C code if we check for
648
- # COMP_OK first.
649
660
check_isal_inflate_rc(err)
650
661
if self .stream.block_state == ISAL_BLOCK_FINISH or self .stream.avail_out != 0 :
651
662
break
@@ -688,13 +699,7 @@ cdef class Decompress:
688
699
raise MemoryError (" Unsufficient memory for buffer allocation" )
689
700
err = isal_inflate(& self .stream)
690
701
if err != ISAL_DECOMP_OK:
691
- # There is some python interacting when possible exceptions
692
- # Are raised. So we remain in pure C code if we check for
693
- # COMP_OK first.
694
702
check_isal_inflate_rc(err)
695
- # Instead of output buffer resizing as the zlibmodule.c example
696
- # the data is appended to a list.
697
- # TODO: Improve this with the buffer protocol.
698
703
if self .stream.avail_out != 0 or self .stream.block_state == ISAL_BLOCK_FINISH:
699
704
break
700
705
if self .stream.block_state == ISAL_BLOCK_FINISH or ibuflen == 0 :
0 commit comments