Skip to content

Commit 1552e24

Browse files
authored
Merge pull request #2 from rhpvorderman/igzipapp
Create a working igzip app.
2 parents 4116c93 + 5f08f67 commit 1552e24

File tree

4 files changed

+95
-38
lines changed

4 files changed

+95
-38
lines changed

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11

22
### Checklist
3-
- [ ] Pull request details were added to CHANHELOG.rst
3+
- [ ] Pull request details were added to CHANGELOG.rst

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Changelog
99
1010
version 0.1.0-dev
1111
-----------------
12+
+ Added a working gzip app using ``python -m isal.igzip``
1213
+ Add test suite that tests all possible settings for functions on the
1314
isal_zlib module.
1415
+ Create igzip module which implements all gzip functions and methods.

src/isal/igzip.py

Lines changed: 86 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
Library to speed up its methods."""
2323

2424
import argparse
25+
import functools
2526
import gzip
2627
import io
2728
import os
@@ -37,6 +38,8 @@
3738
_COMPRESS_LEVEL_BEST = isal_zlib.ISAL_BEST_COMPRESSION
3839
_BLOCK_SIZE = 64*1024
3940

41+
BUFFER_SIZE = _compression.BUFFER_SIZE
42+
4043

4144
# The open method was copied from the python source with minor adjustments.
4245
def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_TRADEOFF,
@@ -145,9 +148,57 @@ def write(self, data):
145148
# to do so in pure python.
146149
class _IGzipReader(_compression.DecompressReader):
147150
def __init__(self, fp):
148-
super().__init__(fp, isal_zlib.decompressobj,
151+
super().__init__(gzip._PaddedFile(fp), isal_zlib.decompressobj,
152+
trailing_error=isal_zlib.IsalError,
149153
wbits=16 + isal_zlib.MAX_WBITS)
150154

155+
# Created by mixing and matching gzip._GzipReader and
156+
# _compression.DecompressReader
157+
def read(self, size=-1):
158+
if size < 0:
159+
return self.readall()
160+
# size=0 is special because decompress(max_length=0) is not supported
161+
if not size:
162+
return b""
163+
164+
# For certain input data, a single
165+
# call to decompress() may not return
166+
# any data. In this case, retry until we get some data or reach EOF.
167+
uncompress = b""
168+
while True:
169+
if self._decompressor.eof:
170+
buf = (self._decompressor.unused_data or
171+
self._fp.read(BUFFER_SIZE))
172+
if not buf:
173+
break
174+
# Continue to next stream.
175+
self._decompressor = self._decomp_factory(
176+
**self._decomp_args)
177+
try:
178+
uncompress = self._decompressor.decompress(buf, size)
179+
except self._trailing_error:
180+
# Trailing data isn't a valid compressed stream; ignore it.
181+
break
182+
else:
183+
# Read a chunk of data from the file
184+
buf = self._fp.read(BUFFER_SIZE)
185+
uncompress = self._decompressor.decompress(buf, size)
186+
if self._decompressor.unconsumed_tail != b"":
187+
self._fp.prepend(self._decompressor.unconsumed_tail)
188+
elif self._decompressor.unused_data != b"":
189+
# Prepend the already read bytes to the fileobj so they can
190+
# be seen by _read_eof() and _read_gzip_header()
191+
self._fp.prepend(self._decompressor.unused_data)
192+
193+
if uncompress != b"":
194+
break
195+
if buf == b"":
196+
raise EOFError("Compressed file ended before the "
197+
"end-of-stream marker was reached")
198+
199+
self._pos += len(uncompress)
200+
return uncompress
201+
151202

152203
# Plagiarized from gzip.py from python's stdlib.
153204
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
@@ -161,6 +212,7 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
161212
return buf.getvalue()
162213

163214

215+
# Unlike stdlib, do not use the roundabout way of doing this via a file.
164216
def decompress(data):
165217
"""Decompress a gzip compressed string in one shot.
166218
Return the decompressed string.
@@ -174,44 +226,49 @@ def main():
174226
"A simple command line interface for the igzip module. "
175227
"Acts like igzip.")
176228
parser.add_argument("file")
177-
parser.add_argument("--fast", action="store_true",
178-
help="use fastest compression")
179-
parser.add_argument("--best", action="store_true",
180-
help="use best compression")
181-
parser.add_argument("-d", "--decompress", action="store_false",
182-
dest="compress",
183-
help="Decompress the file instead of compressing.")
229+
compress_group = parser.add_mutually_exclusive_group()
230+
compress_group.add_argument(
231+
"-0", "--fast", action="store_const", dest="compresslevel",
232+
const=_COMPRESS_LEVEL_FAST,
233+
help="use compression level 0 (fastest)")
234+
compress_group.add_argument(
235+
"-1", action="store_const", dest="compresslevel",
236+
const=1,
237+
help="use compression level 1")
238+
compress_group.add_argument(
239+
"-2", action="store_const", dest="compresslevel",
240+
const=2,
241+
help="use compression level 2 (default)")
242+
compress_group.add_argument(
243+
"-3", "--best", action="store_const", dest="compresslevel",
244+
const=_COMPRESS_LEVEL_BEST,
245+
help="use compression level 3 (best)")
246+
compress_group.add_argument(
247+
"-d", "--decompress", action="store_false",
248+
dest="compress",
249+
help="Decompress the file instead of compressing.")
184250
args = parser.parse_args()
185251

186-
if args.fast:
187-
compresslevel = _COMPRESS_LEVEL_FAST
188-
elif args.best:
189-
compresslevel = _COMPRESS_LEVEL_BEST
190-
else:
191-
compresslevel = _COMPRESS_LEVEL_TRADEOFF
252+
compresslevel = args.compresslevel or _COMPRESS_LEVEL_TRADEOFF
192253

193254
if args.compress:
194255
out_filename = args.file + ".gz"
195-
with io.open(args.file, "rb") as in_file:
196-
with open(out_filename, mode="rb", compresslevel=compresslevel
197-
) as out_file:
198-
while True:
199-
block = in_file.read(_BLOCK_SIZE)
200-
if block == b"":
201-
break
202-
out_file.write(block)
256+
out_open = functools.partial(open, compresslevel=compresslevel)
257+
in_open = io.open
203258
else:
204259
base, extension = os.path.splitext(args.file)
205260
if extension != ".gz":
206261
raise ValueError("Can only decompress files with a .gz extension")
207262
out_filename = base
208-
with open(args.file, "rb") as in_file:
209-
with io.open(out_filename, mode="rb") as out_file:
210-
while True:
211-
block = in_file.read(_BLOCK_SIZE)
212-
if block == b"":
213-
break
214-
out_file.write(block)
263+
out_open = io.open
264+
in_open = open
265+
with in_open(args.file, "rb") as in_file:
266+
with out_open(out_filename, "wb") as out_file:
267+
while True:
268+
block = in_file.read(_BLOCK_SIZE)
269+
if block == b"":
270+
break
271+
out_file.write(block)
215272

216273

217274
if __name__ == "__main__":

src/isal/isal_zlib.pyx

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,8 @@ cdef class Compress:
352352

353353
cdef class Decompress:
354354
cdef public bytes unused_data
355-
cdef public unconsumed_tail
355+
cdef public bytes unconsumed_tail
356356
cdef public bint eof
357-
cdef public bint needs_input
358357
cdef bint is_initialised
359358
cdef inflate_state stream
360359
cdef unsigned char * obuf
@@ -381,7 +380,6 @@ cdef class Decompress:
381380
self.unconsumed_tail = b""
382381
self.eof = 0
383382
self.is_initialised = 1
384-
self.needs_input = 1
385383

386384
def __dealloc__(self):
387385
if self.obuf is not NULL:
@@ -420,8 +418,7 @@ cdef class Decompress:
420418
# This loop reads all the input bytes. If there are no input bytes
421419
# anymore the output is written.
422420
while (self.stream.avail_out == 0
423-
or self.stream.avail_in != 0
424-
or self.stream.block_state != ISAL_BLOCK_FINISH):
421+
or self.stream.avail_in != 0):
425422
self.stream.next_out = self.obuf # Reset output buffer.
426423
if total_bytes >= max_length:
427424
break
@@ -458,16 +455,18 @@ cdef class Decompress:
458455
# 1. Output limit was reached. Save leftover input in unconsumed_tail.
459456
# 2. All input data was consumed. Clear unconsumed_tail.
460457
unused_bytes = self.stream.avail_in
461-
self.unconsumed_tail = data[-unused_bytes:]
462-
self.needs_input = 0 if unused_bytes > 0 else 1
458+
if unused_bytes == 0:
459+
self.unconsumed_tail = b""
460+
else:
461+
self.unconsumed_tail = data[-unused_bytes:]
463462
return b"".join(out)
464463

465464
def flush(self, Py_ssize_t length = DEF_BUF_SIZE):
466465
if length <= 0:
467466
raise ValueError("Length must be greater than 0")
468467
if length > UINT32_MAX:
469468
raise ValueError("Length should not be larger than 4GB.")
470-
data = self.unconsumed_tail
469+
data = self.unconsumed_tail[:]
471470
cdef Py_ssize_t ibuflen = len(data)
472471
if ibuflen > UINT32_MAX:
473472
# This should never happen, because we check the input size in

0 commit comments

Comments
 (0)