Skip to content

Commit a8ccbcd

Browse files
committed
Add dataframe writer options and docstring
1 parent 819451f commit a8ccbcd

File tree

1 file changed

+31
-7
lines changed

1 file changed

+31
-7
lines changed

python/datafusion/dataframe.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -928,21 +928,28 @@ def except_all(self, other: DataFrame) -> DataFrame:
928928
"""
929929
return DataFrame(self.df.except_all(other.df))
930930

931-
def write_csv(self, path: str | pathlib.Path, with_header: bool = False) -> None:
931+
def write_csv(
932+
self,
933+
path: str | pathlib.Path,
934+
with_header: bool = False,
935+
write_options: DataFrameWriteOptions | None = None,
936+
) -> None:
932937
"""Execute the :py:class:`DataFrame` and write the results to a CSV file.
933938
934939
Args:
935940
path: Path of the CSV file to write.
936941
with_header: If true, output the CSV header row.
942+
write_options: Options that impact how the DataFrame is written.
937943
"""
938-
self.df.write_csv(str(path), with_header)
944+
self.df.write_csv(str(path), with_header, write_options=write_options)
939945

940946
@overload
941947
def write_parquet(
942948
self,
943949
path: str | pathlib.Path,
944950
compression: str,
945951
compression_level: int | None = None,
952+
write_options: DataFrameWriteOptions | None = None,
946953
) -> None: ...
947954

948955
@overload
@@ -951,6 +958,7 @@ def write_parquet(
951958
path: str | pathlib.Path,
952959
compression: Compression = Compression.ZSTD,
953960
compression_level: int | None = None,
961+
write_options: DataFrameWriteOptions | None = None,
954962
) -> None: ...
955963

956964
@overload
@@ -959,16 +967,20 @@ def write_parquet(
959967
path: str | pathlib.Path,
960968
compression: ParquetWriterOptions,
961969
compression_level: None = None,
970+
write_options: DataFrameWriteOptions | None = None,
962971
) -> None: ...
963972

964973
def write_parquet(
965974
self,
966975
path: str | pathlib.Path,
967976
compression: Union[str, Compression, ParquetWriterOptions] = Compression.ZSTD,
968977
compression_level: int | None = None,
978+
write_options: DataFrameWriteOptions | None = None,
969979
) -> None:
970980
"""Execute the :py:class:`DataFrame` and write the results to a Parquet file.
971981
982+
LZO compression is not yet implemented in arrow-rs and is therefore excluded.
983+
972984
Args:
973985
path: Path of the Parquet file to write.
974986
compression: Compression type to use. Default is "ZSTD".
@@ -980,10 +992,10 @@ def write_parquet(
980992
- "lz4": LZ4 compression.
981993
- "lz4_raw": LZ4_RAW compression.
982994
- "zstd": Zstandard compression.
983-
Note: LZO is not yet implemented in arrow-rs and is therefore excluded.
984995
compression_level: Compression level to use. For ZSTD, the
985996
recommended range is 1 to 22, with the default being 4. Higher levels
986997
provide better compression but slower speed.
998+
write_options: Options that impact how the DataFrame is written.
987999
"""
9881000
if isinstance(compression, ParquetWriterOptions):
9891001
if compression_level is not None:
@@ -1001,10 +1013,15 @@ def write_parquet(
10011013
):
10021014
compression_level = compression.get_default_level()
10031015

1004-
self.df.write_parquet(str(path), compression.value, compression_level)
1016+
self.df.write_parquet(
1017+
str(path), compression.value, compression_level, write_options
1018+
)
10051019

10061020
def write_parquet_with_options(
1007-
self, path: str | pathlib.Path, options: ParquetWriterOptions
1021+
self,
1022+
path: str | pathlib.Path,
1023+
options: ParquetWriterOptions,
1024+
write_options: DataFrameWriteOptions | None = None,
10081025
) -> None:
10091026
"""Execute the :py:class:`DataFrame` and write the results to a Parquet file.
10101027
@@ -1013,6 +1030,7 @@ def write_parquet_with_options(
10131030
Args:
10141031
path: Path of the Parquet file to write.
10151032
options: Sets the writer parquet options (see `ParquetWriterOptions`).
1033+
write_options: Options that impact how the DataFrame is written.
10161034
"""
10171035
options_internal = ParquetWriterOptionsInternal(
10181036
options.data_pagesize_limit,
@@ -1053,15 +1071,21 @@ def write_parquet_with_options(
10531071
str(path),
10541072
options_internal,
10551073
column_specific_options_internal,
1074+
write_options,
10561075
)
10571076

1058-
def write_json(self, path: str | pathlib.Path) -> None:
1077+
def write_json(
1078+
self,
1079+
path: str | pathlib.Path,
1080+
write_options: DataFrameWriteOptions | None = None,
1081+
) -> None:
10591082
"""Execute the :py:class:`DataFrame` and write the results to a JSON file.
10601083
10611084
Args:
10621085
path: Path of the JSON file to write.
1086+
write_options: Options that impact how the DataFrame is written.
10631087
"""
1064-
self.df.write_json(str(path))
1088+
self.df.write_json(str(path), write_options=write_options)
10651089

10661090
def write_table(
10671091
self, table_name: str, write_options: DataFrameWriteOptions | None = None

0 commit comments

Comments
 (0)