Skip to content

Commit c7cea52

Browse files
authored
GH-47591: [C++] Fix passing zlib compression level (#47594)
### Rationale for this change When passing the compression level to the zlib initiation method `deflateInit2`, we were actually passing it to the wrong parameter (the `memLevel` parameter). As a consequence, changing the zlib/gzip "compression level" in the Arrow APIs had little effect on actual compressed size. ### What changes are included in this PR? Pass compression level correctly. ### Are these changes tested? They are exercised by regular CI tests. In addition, I tested manually that changing the compression level now affects compressed size accordingly. ### Are there any user-facing changes? Yes, this fixes behavior so as to match the documented semantics. * GitHub Issue: #47591 Authored-by: Antoine Pitrou <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent cbd36b8 commit c7cea52

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

cpp/src/arrow/util/compression_zlib.cc

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,16 @@ constexpr int GZIP_CODEC = 16;
5858
// Determine if this is libz or gzip from header.
5959
constexpr int DETECT_CODEC = 32;
6060

61+
// Default "memory level"
62+
//
63+
// Memory consumption when compressing is given by the formula:
64+
// `(1 << (windowBits+2)) + (1 << (memLevel+9))`
65+
//
66+
// With windowBits=15 and memLevel=8 (default zlib values), 262 kB is used.
67+
//
68+
// (see `zconf.h` from zlib)
69+
constexpr int kGzipDefaultMemLevel = 8;
70+
6171
constexpr int kGZipMinCompressionLevel = 1;
6272
constexpr int kGZipMaxCompressionLevel = 9;
6373

@@ -196,8 +206,8 @@ class GZipCompressor : public Compressor {
196206
int ret;
197207
// Initialize to run specified format
198208
int window_bits = CompressionWindowBitsForFormat(format, input_window_bits);
199-
if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits,
200-
compression_level_, Z_DEFAULT_STRATEGY)) != Z_OK) {
209+
if ((ret = deflateInit2(&stream_, compression_level_, Z_DEFLATED, window_bits,
210+
kGzipDefaultMemLevel, Z_DEFAULT_STRATEGY)) != Z_OK) {
201211
return ZlibError("zlib deflateInit failed: ");
202212
} else {
203213
initialized_ = true;
@@ -343,8 +353,8 @@ class GZipCodec : public Codec {
343353
int ret;
344354
// Initialize to run specified format
345355
int window_bits = CompressionWindowBitsForFormat(format_, window_bits_);
346-
if ((ret = deflateInit2(&stream_, Z_DEFAULT_COMPRESSION, Z_DEFLATED, window_bits,
347-
compression_level_, Z_DEFAULT_STRATEGY)) != Z_OK) {
356+
if ((ret = deflateInit2(&stream_, compression_level_, Z_DEFLATED, window_bits,
357+
kGzipDefaultMemLevel, Z_DEFAULT_STRATEGY)) != Z_OK) {
348358
return ZlibErrorPrefix("zlib deflateInit failed: ", stream_.msg);
349359
}
350360
compressor_initialized_ = true;

0 commit comments

Comments
 (0)