Skip to content

Commit 5d5a073

Browse files
authored
Merge pull request #207 from pycompression/codesimplification
Simplify code in igzip.py
2 parents c49383f + 7667e6f commit 5d5a073

File tree

3 files changed

+12
-28
lines changed

3 files changed

+12
-28
lines changed

CHANGELOG.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ version 1.7.0-dev
1313
and wheelbuilding on MacOS ARM64 possible.
1414
+ Fix a bug where READ and WRITE in isal.igzip were inconsistent with the
1515
values in gzip on Python 3.13
16+
+ Small simplifications to the ``igzip.compress`` function, which should lead
17+
to less overhead.
1618

1719
version 1.6.1
1820
-----------------

src/isal/igzip.py

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,7 @@
99
# Changes compared to CPython:
1010
# - Subclassed GzipFile to IGzipFile. Methods that included calls to zlib have
1111
# been overwritten with the same methods, but now calling to isal_zlib.
12-
# - _GzipReader uses a igzip_lib.IgzipDecompressor. This Decompressor is
13-
# derived from the BZ2Decompressor as such it does not produce an unconsumed
14-
# tail but keeps the read data internally. This prevents unnecessary copying
15-
# of data. To accomodate this, the read method has been rewritten.
16-
# - _GzipReader._add_read_data uses isal_zlib.crc32 instead of zlib.crc32.
12+
# - _GzipReader is implemented in C in isal_zlib and allows dropping the GIL.
1713
# - Gzip.compress does not use a GzipFile to compress in memory, but creates a
1814
# simple header using _create_simple_gzip_header and compresses the data with
1915
# igzip_lib.compress using the DECOMP_GZIP_NO_HDR flag. This change was
@@ -33,6 +29,7 @@
3329
import gzip
3430
import io
3531
import os
32+
import shutil
3633
import struct
3734
import sys
3835
import time
@@ -225,13 +222,10 @@ def write(self, data):
225222
_IGzipReader = _GzipReader
226223

227224

228-
def _create_simple_gzip_header(compresslevel: int,
229-
mtime: Optional[SupportsInt] = None) -> bytes:
230-
"""
231-
Write a simple gzip header with no extra fields.
232-
:param compresslevel: Compresslevel used to determine the xfl bytes.
233-
:param mtime: The mtime (must support conversion to a 32-bit integer).
234-
:return: A bytes object representing the gzip header.
225+
def compress(data, compresslevel: int = _COMPRESS_LEVEL_BEST, *,
226+
mtime: Optional[SupportsInt] = None) -> bytes:
227+
"""Compress data in one shot and return the compressed string.
228+
Optional argument is the compression level, in range of 0-3.
235229
"""
236230
if mtime is None:
237231
mtime = time.time()
@@ -240,14 +234,7 @@ def _create_simple_gzip_header(compresslevel: int,
240234
xfl = 4 if compresslevel == _COMPRESS_LEVEL_FAST else 0
241235
# Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
242236
# fields added to header), mtime, xfl and os (255 for unknown OS).
243-
return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)
244-
245-
246-
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
247-
"""Compress data in one shot and return the compressed string.
248-
Optional argument is the compression level, in range of 0-3.
249-
"""
250-
header = _create_simple_gzip_header(compresslevel, mtime)
237+
header = struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)
251238
# use igzip_lib to compress the data without a gzip header but with a
252239
# gzip trailer.
253240
compressed = igzip_lib.compress(data, compresslevel,
@@ -368,11 +355,7 @@ def main():
368355
out_file = sys.stdout.buffer
369356

370357
try:
371-
while True:
372-
block = in_file.read(args.buffer_size)
373-
if block == b"":
374-
break
375-
out_file.write(block)
358+
shutil.copyfileobj(in_file, out_file, args.buffer_size)
376359
finally:
377360
if in_file is not sys.stdin.buffer:
378361
in_file.close()

src/isal/isal_zlibmodule.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,13 @@ Changes compared to CPython:
1414
- Zlib to ISA-L conversion functions were included.
1515
- All compression and checksum functions from zlib replaced with ISA-L
1616
compatible functions.
17-
- No locks in Compress and Decompress objects. These were deemed unnecessary
18-
as the ISA-L functions do not allocate memory, unlike the zlib
19-
counterparts.
2017
- zlib.compress also has a 'wbits' argument. This change was included in
2118
Python 3.11. It allows for faster gzip compression by using
2219
isal_zlib.compress(data, wbits=31).
2320
- Argument parsers were written using th CPython API rather than argument
2421
clinic.
22+
- Created a GzipReader class that implements gzip reading in C, reducing a lot
23+
of overhead compared to the gzip.py:_GzipReader class.
2524
*/
2625

2726
#include "isal_shared.h"

0 commit comments

Comments
 (0)