22
22
Library to speed up its methods."""
23
23
24
24
import argparse
25
+ import functools
25
26
import gzip
26
27
import io
27
28
import os
37
38
_COMPRESS_LEVEL_BEST = isal_zlib .ISAL_BEST_COMPRESSION
38
39
_BLOCK_SIZE = 64 * 1024
39
40
41
+ BUFFER_SIZE = _compression .BUFFER_SIZE
42
+
40
43
41
44
# The open method was copied from the python source with minor adjustments.
42
45
def open (filename , mode = "rb" , compresslevel = _COMPRESS_LEVEL_TRADEOFF ,
@@ -145,9 +148,57 @@ def write(self, data):
145
148
# to do so in pure python.
146
149
class _IGzipReader (_compression .DecompressReader ):
147
150
def __init__ (self , fp ):
148
- super ().__init__ (fp , isal_zlib .decompressobj ,
151
+ super ().__init__ (gzip ._PaddedFile (fp ), isal_zlib .decompressobj ,
152
+ trailing_error = isal_zlib .IsalError ,
149
153
wbits = 16 + isal_zlib .MAX_WBITS )
150
154
155
+ # Created by mixing and matching gzip._GzipReader and
156
+ # _compression.DecompressReader
157
+ def read (self , size = - 1 ):
158
+ if size < 0 :
159
+ return self .readall ()
160
+ # size=0 is special because decompress(max_length=0) is not supported
161
+ if not size :
162
+ return b""
163
+
164
+ # For certain input data, a single
165
+ # call to decompress() may not return
166
+ # any data. In this case, retry until we get some data or reach EOF.
167
+ uncompress = b""
168
+ while True :
169
+ if self ._decompressor .eof :
170
+ buf = (self ._decompressor .unused_data or
171
+ self ._fp .read (BUFFER_SIZE ))
172
+ if not buf :
173
+ break
174
+ # Continue to next stream.
175
+ self ._decompressor = self ._decomp_factory (
176
+ ** self ._decomp_args )
177
+ try :
178
+ uncompress = self ._decompressor .decompress (buf , size )
179
+ except self ._trailing_error :
180
+ # Trailing data isn't a valid compressed stream; ignore it.
181
+ break
182
+ else :
183
+ # Read a chunk of data from the file
184
+ buf = self ._fp .read (BUFFER_SIZE )
185
+ uncompress = self ._decompressor .decompress (buf , size )
186
+ if self ._decompressor .unconsumed_tail != b"" :
187
+ self ._fp .prepend (self ._decompressor .unconsumed_tail )
188
+ elif self ._decompressor .unused_data != b"" :
189
+ # Prepend the already read bytes to the fileobj so they can
190
+ # be seen by _read_eof() and _read_gzip_header()
191
+ self ._fp .prepend (self ._decompressor .unused_data )
192
+
193
+ if uncompress != b"" :
194
+ break
195
+ if buf == b"" :
196
+ raise EOFError ("Compressed file ended before the "
197
+ "end-of-stream marker was reached" )
198
+
199
+ self ._pos += len (uncompress )
200
+ return uncompress
201
+
151
202
152
203
# Plagiarized from gzip.py from python's stdlib.
153
204
def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
@@ -161,6 +212,7 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
161
212
return buf .getvalue ()
162
213
163
214
215
+ # Unlike stdlib, do not use the roundabout way of doing this via a file.
164
216
def decompress (data ):
165
217
"""Decompress a gzip compressed string in one shot.
166
218
Return the decompressed string.
@@ -174,44 +226,49 @@ def main():
174
226
"A simple command line interface for the igzip module. "
175
227
"Acts like igzip." )
176
228
parser .add_argument ("file" )
177
- parser .add_argument ("--fast" , action = "store_true" ,
178
- help = "use fastest compression" )
179
- parser .add_argument ("--best" , action = "store_true" ,
180
- help = "use best compression" )
181
- parser .add_argument ("-d" , "--decompress" , action = "store_false" ,
182
- dest = "compress" ,
183
- help = "Decompress the file instead of compressing." )
229
+ compress_group = parser .add_mutually_exclusive_group ()
230
+ compress_group .add_argument (
231
+ "-0" , "--fast" , action = "store_const" , dest = "compresslevel" ,
232
+ const = _COMPRESS_LEVEL_FAST ,
233
+ help = "use compression level 0 (fastest)" )
234
+ compress_group .add_argument (
235
+ "-1" , action = "store_const" , dest = "compresslevel" ,
236
+ const = 1 ,
237
+ help = "use compression level 1" )
238
+ compress_group .add_argument (
239
+ "-2" , action = "store_const" , dest = "compresslevel" ,
240
+ const = 2 ,
241
+ help = "use compression level 2 (default)" )
242
+ compress_group .add_argument (
243
+ "-3" , "--best" , action = "store_const" , dest = "compresslevel" ,
244
+ const = _COMPRESS_LEVEL_BEST ,
245
+ help = "use compression level 3 (best)" )
246
+ compress_group .add_argument (
247
+ "-d" , "--decompress" , action = "store_false" ,
248
+ dest = "compress" ,
249
+ help = "Decompress the file instead of compressing." )
184
250
args = parser .parse_args ()
185
251
186
- if args .fast :
187
- compresslevel = _COMPRESS_LEVEL_FAST
188
- elif args .best :
189
- compresslevel = _COMPRESS_LEVEL_BEST
190
- else :
191
- compresslevel = _COMPRESS_LEVEL_TRADEOFF
252
+ compresslevel = args .compresslevel or _COMPRESS_LEVEL_TRADEOFF
192
253
193
254
if args .compress :
194
255
out_filename = args .file + ".gz"
195
- with io .open (args .file , "rb" ) as in_file :
196
- with open (out_filename , mode = "rb" , compresslevel = compresslevel
197
- ) as out_file :
198
- while True :
199
- block = in_file .read (_BLOCK_SIZE )
200
- if block == b"" :
201
- break
202
- out_file .write (block )
256
+ out_open = functools .partial (open , compresslevel = compresslevel )
257
+ in_open = io .open
203
258
else :
204
259
base , extension = os .path .splitext (args .file )
205
260
if extension != ".gz" :
206
261
raise ValueError ("Can only decompress files with a .gz extension" )
207
262
out_filename = base
208
- with open (args .file , "rb" ) as in_file :
209
- with io .open (out_filename , mode = "rb" ) as out_file :
210
- while True :
211
- block = in_file .read (_BLOCK_SIZE )
212
- if block == b"" :
213
- break
214
- out_file .write (block )
263
+ out_open = io .open
264
+ in_open = open
265
+ with in_open (args .file , "rb" ) as in_file :
266
+ with out_open (out_filename , "wb" ) as out_file :
267
+ while True :
268
+ block = in_file .read (_BLOCK_SIZE )
269
+ if block == b"" :
270
+ break
271
+ out_file .write (block )
215
272
216
273
217
274
if __name__ == "__main__" :
0 commit comments