Skip to content

Commit 0c3fed9

Browse files
committed
fix: update default compression to ZSTD and improve documentation for write_parquet method
1 parent 79c22d6 commit 0c3fed9

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

python/datafusion/dataframe.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -620,16 +620,24 @@ def write_csv(self, path: str | pathlib.Path, with_header: bool = False) -> None
620620
def write_parquet(
621621
self,
622622
path: str | pathlib.Path,
623-
compression: str = "uncompressed",
623+
compression: str = "ZSTD",
624624
compression_level: int | None = None,
625625
) -> None:
626626
"""Execute the :py:class:`DataFrame` and write the results to a Parquet file.
627627
628628
Args:
629-
path: Path of the Parquet file to write.
630-
compression: Compression type to use.
631-
compression_level: Compression level to use.
632-
"""
629+
path (str | pathlib.Path): The file path to write the Parquet file.
630+
compression (str): The compression algorithm to use. Default is "ZSTD".
631+
compression_level (int | None): The compression level to use. For ZSTD, the
632+
recommended range is 1 to 22, with the default being 3. Higher levels
633+
provide better compression but slower speed.
634+
"""
635+
# default compression level to 3 for ZSTD
636+
if compression == "ZSTD":
637+
if compression_level is None:
638+
compression_level = 3
639+
elif not (1 <= compression_level <= 22):
640+
raise ValueError("Compression level for ZSTD must be between 1 and 22")
633641
self.df.write_parquet(str(path), compression, compression_level)
634642

635643
def write_json(self, path: str | pathlib.Path) -> None:

0 commit comments

Comments
 (0)