@@ -29,7 +29,7 @@ import zlib
29
29
30
30
from .crc cimport crc32_gzip_refl
31
31
from .igzip_lib cimport *
32
- from libc.stdint cimport UINT64_MAX, UINT32_MAX, uint32_t
32
+ from libc.stdint cimport UINT64_MAX, UINT32_MAX
33
33
from cpython.mem cimport PyMem_Malloc, PyMem_Free
34
34
from cpython.buffer cimport PyBUF_READ, PyBUF_C_CONTIGUOUS, PyObject_GetBuffer, \
35
35
PyBuffer_Release
@@ -45,8 +45,6 @@ ISAL_DEFAULT_COMPRESSION = 2
45
45
Z_BEST_SPEED = ISAL_BEST_SPEED
46
46
Z_BEST_COMPRESSION = ISAL_BEST_COMPRESSION
47
47
Z_DEFAULT_COMPRESSION = ISAL_DEFAULT_COMPRESSION
48
- cdef int ISAL_DEFAULT_COMPRESSION_I = ISAL_DEFAULT_COMPRESSION
49
- cdef int ZLIB_DEFAULT_COMPRESSION_I = zlib.Z_DEFAULT_COMPRESSION
50
48
51
49
DEF_BUF_SIZE = zlib.DEF_BUF_SIZE
52
50
DEF_MEM_LEVEL = zlib.DEF_MEM_LEVEL
@@ -65,13 +63,10 @@ Z_FILTERED=zlib.Z_FILTERED
65
63
Z_FIXED= zlib.Z_FIXED
66
64
67
65
# Flush methods
68
- ISAL_NO_FLUSH= NO_FLUSH
69
- ISAL_SYNC_FLUSH= SYNC_FLUSH
70
- ISAL_FULL_FLUSH= FULL_FLUSH
71
-
72
- Z_NO_FLUSH= ISAL_NO_FLUSH
73
- Z_SYNC_FLUSH= ISAL_SYNC_FLUSH
74
- Z_FINISH= ISAL_FULL_FLUSH
66
+ Z_NO_FLUSH= zlib.Z_NO_FLUSH
67
+ Z_SYNC_FLUSH= zlib.Z_SYNC_FLUSH
68
+ Z_FULL_FLUSH= zlib.Z_FULL_FLUSH
69
+ Z_FINISH= zlib.Z_FINISH
75
70
76
71
class IsalError (OSError ):
77
72
""" Exception raised on compression and decompression errors."""
@@ -126,12 +121,6 @@ def crc32(data, value = 0):
126
121
finally :
127
122
PyBuffer_Release(buffer )
128
123
129
- cdef Py_ssize_t Py_ssize_t_min(Py_ssize_t a, Py_ssize_t b):
130
- if a <= b:
131
- return a
132
- else :
133
- return b
134
-
135
124
ctypedef fused stream_or_state:
136
125
isal_zstream
137
126
inflate_state
@@ -147,7 +136,7 @@ cdef void arrange_input_buffer(stream_or_state *stream, Py_ssize_t *remains):
147
136
remains[0 ] -= stream.avail_in
148
137
149
138
def compress (data ,
150
- int level = ISAL_DEFAULT_COMPRESSION_I ,
139
+ int level = ISAL_DEFAULT_COMPRESSION ,
151
140
int wbits = ISAL_DEF_MAX_HIST_BITS):
152
141
"""
153
142
Compresses the bytes in *data*. Returns a bytes object with the
@@ -165,9 +154,6 @@ def compress(data,
165
154
-9 to -15 will generate a raw compressed string with
166
155
no headers and trailers.
167
156
"""
168
- if level == ZLIB_DEFAULT_COMPRESSION_I:
169
- level = ISAL_DEFAULT_COMPRESSION_I
170
-
171
157
# Initialise stream
172
158
cdef isal_zstream stream
173
159
cdef unsigned int level_buf_size = zlib_mem_level_to_isal(level, DEF_MEM_LEVEL)
@@ -352,7 +338,7 @@ def compressobj(int level=ISAL_DEFAULT_COMPRESSION,
352
338
no headers and trailers.
353
339
:param memLevel: The amount of memory used for the internal compression
354
340
state. Higher values use more memory for better speed and
355
- smaller output.
341
+ smaller output. Values between 1 and 9 are supported.
356
342
:zdict: A predefined compression dictionary. A sequence of bytes
357
343
that are expected to occur frequently in the to be
358
344
compressed data. The most common subsequences should come
@@ -392,8 +378,6 @@ cdef class Compress:
392
378
err = isal_deflate_set_dict(& self .stream, zdict, zdict_length)
393
379
if err != COMP_OK:
394
380
check_isal_deflate_rc(err)
395
- if level == ZLIB_DEFAULT_COMPRESSION_I:
396
- level = ISAL_DEFAULT_COMPRESSION_I
397
381
self .stream.level = level
398
382
self .stream.level_buf_size = zlib_mem_level_to_isal(level, memLevel)
399
383
self .level_buf = < unsigned char * > PyMem_Malloc(self .stream.level_buf_size * sizeof(char ))
@@ -452,27 +436,34 @@ cdef class Compress:
452
436
finally :
453
437
PyBuffer_Release(buffer )
454
438
455
- def flush (self , int mode = FULL_FLUSH ):
439
+ def flush (self , mode = zlib.Z_FINISH ):
456
440
"""
457
441
All pending input is processed, and a bytes object containing the
458
442
remaining compressed output is returned.
459
443
460
- :param mode: Defaults to ISAL_FULL_FLUSH ( Z_FINISH equivalent) which
444
+ :param mode: Defaults to Z_FINISH which
461
445
finishes the compressed stream and prevents compressing
462
- any more data. The only other supported method is
463
- ISAL_SYNC_FLUSH ( Z_SYNC_FLUSH) equivalent .
446
+ any more data. The other supported methods are
447
+ Z_NO_FLUSH, Z_SYNC_FLUSH and Z_FULL_FLUSH .
464
448
"""
465
- if mode == NO_FLUSH:
449
+
450
+ if mode == zlib.Z_NO_FLUSH:
466
451
# Flushing with no_flush does nothing.
467
452
return b" "
468
-
469
- self .stream.end_of_stream = 1
470
- self .stream.flush = mode
453
+ elif mode == zlib.Z_FINISH:
454
+ self .stream.flush = FULL_FLUSH
455
+ self .stream.end_of_stream = 1
456
+ elif mode == zlib.Z_FULL_FLUSH:
457
+ self .stream.flush = FULL_FLUSH
458
+ elif mode == zlib.Z_SYNC_FLUSH:
459
+ self .stream.flush= SYNC_FLUSH
460
+ else :
461
+ raise IsalError(" Unsupported flush mode" )
471
462
472
463
# Initialise output buffer
473
464
out = []
474
465
475
- while self .stream.internal_state.state ! = ZSTATE_END :
466
+ while True :
476
467
self .stream.next_out = self .obuf # Reset output buffer.
477
468
self .stream.avail_out = self .obuflen
478
469
err = isal_deflate(& self .stream)
@@ -485,6 +476,10 @@ cdef class Compress:
485
476
# the data is appended to a list.
486
477
# TODO: Improve this with the buffer protocol.
487
478
out.append(self .obuf[:self .obuflen - self .stream.avail_out])
479
+ if self .stream.avail_out != 0 : # All input is processed and therefore all output flushed.
480
+ break
481
+ if self .stream.avail_in != 0 :
482
+ raise AssertionError (" There should be no available input after flushing." )
488
483
return b" " .join(out)
489
484
490
485
cdef class Decompress:
@@ -528,19 +523,41 @@ cdef class Decompress:
528
523
if self .obuf is not NULL :
529
524
PyMem_Free(self .obuf)
530
525
526
+ def _view_bitbuffer (self ):
527
+ """ Shows the 64-bitbuffer of the internal inflate_state. It contains
528
+ a maximum of 8 bytes. This data is already read-in so is not part
529
+ of the unconsumed tail."""
530
+ bits_in_buffer = self .stream.read_in_length
531
+ read_in_length = bits_in_buffer // 8
532
+ if read_in_length == 0 :
533
+ return b" "
534
+ remainder = bits_in_buffer % 8
535
+ read_in = self .stream.read_in
536
+ # The bytes are added by bitshifting, so in reverse order. Reading the
537
+ # 64-bit integer into 8 bytes little-endian provides the characters in
538
+ # the correct order.
539
+ return (read_in >> remainder).to_bytes(8 , " little" )[:read_in_length]
540
+
531
541
cdef save_unconsumed_input(self , Py_buffer * data):
532
542
cdef Py_ssize_t old_size, new_size, left_size
533
543
cdef bytes new_data
534
544
if self .stream.block_state == ISAL_BLOCK_FINISH:
535
545
self .eof = 1
536
546
if self .stream.avail_in > 0 :
537
- old_size = len (self .unused_data)
538
547
left_size = < unsigned char * > data.buf + data.len - self .stream.next_in
539
- if left_size > (PY_SSIZE_T_MAX - old_size):
540
- raise MemoryError ()
541
548
new_data = PyBytes_FromStringAndSize(< char * > self .stream.next_in, left_size)
542
- self .unused_data += new_data
543
- if self .stream.avail_in > 0 or self .unconsumed_tail:
549
+ else :
550
+ new_data = b" "
551
+ if not self .unused_data:
552
+ # The block is finished and this decompressobject can not be
553
+ # used anymore. Some unused data is in the bitbuffer and has to
554
+ # be recovered. Only when self.unused_data is empty. Otherwise
555
+ # we assume the bitbuffer data is already added.
556
+ self .unused_data = self ._view_bitbuffer()
557
+ self .unused_data += new_data
558
+ if self .unconsumed_tail:
559
+ self .unconsumed_tail = b" " # When there is unused_data unconsumed tail should be b""
560
+ elif self .stream.avail_in > 0 or self .unconsumed_tail:
544
561
left_size = < unsigned char * > data.buf + data.len - self .stream.next_in
545
562
new_data = PyBytes_FromStringAndSize(< char * > self .stream.next_in, left_size)
546
563
self .unconsumed_tail = new_data
@@ -646,9 +663,9 @@ cdef class Decompress:
646
663
cdef Py_ssize_t unused_bytes
647
664
648
665
try :
649
- while self .stream.block_state ! = ISAL_BLOCK_FINISH and ibuflen ! = 0 :
666
+ while True :
650
667
arrange_input_buffer(& self .stream, & ibuflen)
651
- while ( self .stream.block_state ! = ISAL_BLOCK_FINISH) :
668
+ while True :
652
669
self .stream.next_out = obuf # Reset output buffer.
653
670
self .stream.avail_out = obuflen
654
671
err = isal_inflate(& self .stream)
@@ -660,17 +677,20 @@ cdef class Decompress:
660
677
# Instead of output buffer resizing as the zlibmodule.c example
661
678
# the data is appended to a list.
662
679
# TODO: Improve this with the buffer protocol.
680
+ if self .stream.avail_out == obuflen:
681
+ break
663
682
bytes_written = obuflen - self .stream.avail_out
664
683
total_bytes += bytes_written
665
684
out.append(obuf[:bytes_written])
685
+ if self .stream.avail_out != 0 :
686
+ break
687
+ if self .stream.block_state == ISAL_BLOCK_FINISH or ibuflen == 0 :
688
+ break
666
689
self .save_unconsumed_input(buffer )
667
690
return b" " .join(out)
668
691
finally :
669
692
PyMem_Free(obuf)
670
693
671
- @property
672
- def crc (self ):
673
- return self .stream.crc
674
694
675
695
cdef wbits_to_flag_and_hist_bits_deflate(int wbits,
676
696
unsigned short * hist_bits,
@@ -707,9 +727,6 @@ cdef wbits_to_flag_and_hist_bits_inflate(int wbits,
707
727
elif 40 <= wbits <= 47 : # Accept gzip or zlib
708
728
hist_bits[0 ] = wbits - 32
709
729
crc_flag[0 ] = ISAL_GZIP if gzip else ISAL_ZLIB
710
- elif 72 <= wbits <= 79 :
711
- hist_bits[0 ] = wbits - 64
712
- crc_flag[0 ] = ISAL_GZIP_NO_HDR_VER
713
730
else :
714
731
raise ValueError (" Invalid wbits value" )
715
732
0 commit comments