Skip to content

Commit 819de0d

Browse files
committed
fix: update default compression level for ZSTD to 4 in write_parquet method
1 parent b1db46c commit 819de0d

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

python/datafusion/dataframe.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -629,14 +629,14 @@ def write_parquet(
629629
path (str | pathlib.Path): The file path to write the Parquet file.
630630
compression (str): The compression algorithm to use. Default is "ZSTD".
631631
compression_level (int | None): The compression level to use. For ZSTD, the
632-
recommended range is 1 to 22, with the default being 3. Higher levels
632+
recommended range is 1 to 22, with the default being 4. Higher levels
633633
provide better compression but slower speed.
634634
"""
635635
if compression == "ZSTD":
636636
if compression_level is None:
637-
# Default compression level for ZSTD is 3 as per
638-
# https://facebook.github.io/zstd/zstd_manual.html
639-
compression_level = 3
637+
# Default compression level for ZSTD is 4 like in delta-rs
638+
# https://github.com/apache/datafusion-python/pull/981#discussion_r1899871918
639+
compression_level = 4
640640
elif not (1 <= compression_level <= 22):
641641
raise ValueError("Compression level for ZSTD must be between 1 and 22")
642642
self.df.write_parquet(str(path), compression, compression_level)

0 commit comments

Comments
 (0)