Skip to content

Commit b738b19

Browse files
committed
Rename write_parquet_options to write_parquet_with_options
1 parent 5d5b1ba commit b738b19

File tree

3 files changed

+40
-40
lines changed

3 files changed

+40
-40
lines changed

python/datafusion/dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -906,7 +906,7 @@ def write_parquet(
906906

907907
self.df.write_parquet(str(path), compression.value, compression_level)
908908

909-
def write_parquet_options(
909+
def write_parquet_with_options(
910910
self, path: str | pathlib.Path, options: ParquetWriterOptions
911911
) -> None:
912912
"""Execute the :py:class:`DataFrame` and write the results to a Parquet file.
@@ -952,7 +952,7 @@ def write_parquet_options(
952952
bloom_filter_ndv=opts.bloom_filter_ndv,
953953
)
954954

955-
self.df.write_parquet_options(
955+
self.df.write_parquet_with_options(
956956
str(path),
957957
options_internal,
958958
column_specific_options_internal,

python/tests/test_dataframe.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,7 +1613,7 @@ def test_write_compressed_parquet_default_compression_level(df, tmp_path, compre
16131613
df.write_parquet(str(path), compression=compression)
16141614

16151615

1616-
def test_write_parquet_options_default_compression(df, tmp_path):
1616+
def test_write_parquet_with_options_default_compression(df, tmp_path):
16171617
"""Test that the default compression is ZSTD."""
16181618
df.write_parquet(tmp_path)
16191619

@@ -1628,11 +1628,11 @@ def test_write_parquet_options_default_compression(df, tmp_path):
16281628
"compression",
16291629
["gzip(6)", "brotli(7)", "zstd(15)", "snappy", "uncompressed"],
16301630
)
1631-
def test_write_parquet_options_compression(df, tmp_path, compression):
1631+
def test_write_parquet_with_options_compression(df, tmp_path, compression):
16321632
import re
16331633

16341634
path = tmp_path
1635-
df.write_parquet_options(str(path), ParquetWriterOptions(compression=compression))
1635+
df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
16361636

16371637
# test that the actual compression scheme is the one written
16381638
for _root, _dirs, files in os.walk(path):
@@ -1655,32 +1655,32 @@ def test_write_parquet_options_compression(df, tmp_path, compression):
16551655
"compression",
16561656
["gzip(12)", "brotli(15)", "zstd(23)"],
16571657
)
1658-
def test_write_parquet_options_wrong_compression_level(df, tmp_path, compression):
1658+
def test_write_parquet_with_options_wrong_compression_level(df, tmp_path, compression):
16591659
path = tmp_path
16601660

16611661
with pytest.raises(Exception, match=r"valid compression range .*? exceeded."):
1662-
df.write_parquet_options(str(path), ParquetWriterOptions(compression=compression))
1662+
df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
16631663

16641664

16651665
@pytest.mark.parametrize("compression", ["wrong", "wrong(12)"])
1666-
def test_write_parquet_options_invalid_compression(df, tmp_path, compression):
1666+
def test_write_parquet_with_options_invalid_compression(df, tmp_path, compression):
16671667
path = tmp_path
16681668

16691669
with pytest.raises(Exception, match="Unknown or unsupported parquet compression"):
1670-
df.write_parquet_options(str(path), ParquetWriterOptions(compression=compression))
1670+
df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
16711671

16721672

16731673
@pytest.mark.parametrize(
16741674
("writer_version", "format_version"),
16751675
[("1.0", "1.0"), ("2.0", "2.6"), (None, "1.0")],
16761676
)
1677-
def test_write_parquet_options_writer_version(df, tmp_path, writer_version, format_version):
1677+
def test_write_parquet_with_options_writer_version(df, tmp_path, writer_version, format_version):
16781678
"""Test the Parquet writer version. Note that writer_version=2.0 results in
16791679
format_version=2.6"""
16801680
if writer_version is None:
1681-
df.write_parquet_options(tmp_path, ParquetWriterOptions())
1681+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions())
16821682
else:
1683-
df.write_parquet_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
1683+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
16841684

16851685
for file in tmp_path.rglob("*.parquet"):
16861686
parquet = pq.ParquetFile(file)
@@ -1689,18 +1689,18 @@ def test_write_parquet_options_writer_version(df, tmp_path, writer_version, form
16891689

16901690

16911691
@pytest.mark.parametrize("writer_version", ["1.2.3", "custom-version", "0"])
1692-
def test_write_parquet_options_wrong_writer_version(df, tmp_path, writer_version):
1692+
def test_write_parquet_with_options_wrong_writer_version(df, tmp_path, writer_version):
16931693
"""Test that invalid writer versions in Parquet throw an exception."""
16941694
with pytest.raises(
16951695
Exception, match="Unknown or unsupported parquet writer version"
16961696
):
1697-
df.write_parquet_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
1697+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
16981698

16991699

17001700
@pytest.mark.parametrize("dictionary_enabled", [True, False, None])
1701-
def test_write_parquet_options_dictionary_enabled(df, tmp_path, dictionary_enabled):
1701+
def test_write_parquet_with_options_dictionary_enabled(df, tmp_path, dictionary_enabled):
17021702
"""Test enabling/disabling the dictionaries in Parquet."""
1703-
df.write_parquet_options(tmp_path, ParquetWriterOptions(dictionary_enabled=dictionary_enabled))
1703+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(dictionary_enabled=dictionary_enabled))
17041704
# by default, the dictionary is enabled, so None results in True
17051705
result = dictionary_enabled if dictionary_enabled is not None else True
17061706

@@ -1717,12 +1717,12 @@ def test_write_parquet_options_dictionary_enabled(df, tmp_path, dictionary_enabl
17171717
("statistics_enabled", "has_statistics"),
17181718
[("page", True), ("chunk", True), ("none", False), (None, True)],
17191719
)
1720-
def test_write_parquet_options_statistics_enabled(
1720+
def test_write_parquet_with_options_statistics_enabled(
17211721
df, tmp_path, statistics_enabled, has_statistics
17221722
):
17231723
"""Test configuring the statistics in Parquet. In pyarrow we can only check for
17241724
column-level statistics, so "page" and "chunk" are tested in the same way."""
1725-
df.write_parquet_options(tmp_path, ParquetWriterOptions(statistics_enabled=statistics_enabled))
1725+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(statistics_enabled=statistics_enabled))
17261726

17271727
for file in tmp_path.rglob("*.parquet"):
17281728
parquet = pq.ParquetFile(file)
@@ -1737,11 +1737,11 @@ def test_write_parquet_options_statistics_enabled(
17371737

17381738

17391739
@pytest.mark.parametrize("max_row_group_size", [1000, 5000, 10000, 100000])
1740-
def test_write_parquet_options_max_row_group_size(large_df, tmp_path, max_row_group_size):
1740+
def test_write_parquet_with_options_max_row_group_size(large_df, tmp_path, max_row_group_size):
17411741
"""Test configuring the max number of rows per group in Parquet. These test cases
17421742
guarantee that the number of rows for each row group is max_row_group_size, given
17431743
the total number of rows is a multiple of max_row_group_size."""
1744-
large_df.write_parquet_options(tmp_path, ParquetWriterOptions(max_row_group_size=max_row_group_size))
1744+
large_df.write_parquet_with_options(tmp_path, ParquetWriterOptions(max_row_group_size=max_row_group_size))
17451745

17461746
for file in tmp_path.rglob("*.parquet"):
17471747
parquet = pq.ParquetFile(file)
@@ -1751,9 +1751,9 @@ def test_write_parquet_options_max_row_group_size(large_df, tmp_path, max_row_gr
17511751

17521752

17531753
@pytest.mark.parametrize("created_by", ["datafusion", "datafusion-python", "custom"])
1754-
def test_write_parquet_options_created_by(df, tmp_path, created_by):
1754+
def test_write_parquet_with_options_created_by(df, tmp_path, created_by):
17551755
"""Test configuring the created by metadata in Parquet."""
1756-
df.write_parquet_options(tmp_path, ParquetWriterOptions(created_by=created_by))
1756+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(created_by=created_by))
17571757

17581758
for file in tmp_path.rglob("*.parquet"):
17591759
parquet = pq.ParquetFile(file)
@@ -1762,7 +1762,7 @@ def test_write_parquet_options_created_by(df, tmp_path, created_by):
17621762

17631763

17641764
@pytest.mark.parametrize("statistics_truncate_length", [5, 25, 50])
1765-
def test_write_parquet_options_statistics_truncate_length(
1765+
def test_write_parquet_with_options_statistics_truncate_length(
17661766
df, tmp_path, statistics_truncate_length
17671767
):
17681768
"""Test configuring the truncate limit in Parquet's row-group-level statistics."""
@@ -1776,7 +1776,7 @@ def test_write_parquet_options_statistics_truncate_length(
17761776
"b": ["a_smaller", "m_smaller", "z_smaller"],
17771777
}
17781778
df = ctx.from_arrow(pa.record_batch(data))
1779-
df.write_parquet_options(tmp_path, ParquetWriterOptions(statistics_truncate_length=statistics_truncate_length))
1779+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(statistics_truncate_length=statistics_truncate_length))
17801780

17811781
for file in tmp_path.rglob("*.parquet"):
17821782
parquet = pq.ParquetFile(file)
@@ -1789,7 +1789,7 @@ def test_write_parquet_options_statistics_truncate_length(
17891789
assert len(statistics["max"]) <= statistics_truncate_length
17901790

17911791

1792-
def test_write_parquet_options_default_encoding(tmp_path):
1792+
def test_write_parquet_with_options_default_encoding(tmp_path):
17931793
"""Test that, by default, Parquet files are written with dictionary encoding.
17941794
Note that dictionary encoding is not used for boolean values, so it is not tested
17951795
here."""
@@ -1800,7 +1800,7 @@ def test_write_parquet_options_default_encoding(tmp_path):
18001800
"c": [1.01, 2.02, 3.03],
18011801
}
18021802
df = ctx.from_arrow(pa.record_batch(data))
1803-
df.write_parquet_options(tmp_path, ParquetWriterOptions())
1803+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions())
18041804

18051805
for file in tmp_path.rglob("*.parquet"):
18061806
parquet = pq.ParquetFile(file)
@@ -1822,7 +1822,7 @@ def test_write_parquet_options_default_encoding(tmp_path):
18221822
("byte_stream_split", ["int", "float"], ("RLE", "BYTE_STREAM_SPLIT")),
18231823
],
18241824
)
1825-
def test_write_parquet_options_encoding(tmp_path, encoding, data_types, result):
1825+
def test_write_parquet_with_options_encoding(tmp_path, encoding, data_types, result):
18261826
"""Test different encodings in Parquet in their respective support column types."""
18271827
ctx = SessionContext()
18281828

@@ -1838,7 +1838,7 @@ def test_write_parquet_options_encoding(tmp_path, encoding, data_types, result):
18381838
data["bool"] = [True, False, True]
18391839

18401840
df = ctx.from_arrow(pa.record_batch(data))
1841-
df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding, dictionary_enabled=False))
1841+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding, dictionary_enabled=False))
18421842

18431843
for file in tmp_path.rglob("*.parquet"):
18441844
parquet = pq.ParquetFile(file)
@@ -1850,39 +1850,39 @@ def test_write_parquet_options_encoding(tmp_path, encoding, data_types, result):
18501850

18511851

18521852
@pytest.mark.parametrize("encoding", ["bit_packed"])
1853-
def test_write_parquet_options_unsupported_encoding(df, tmp_path, encoding):
1853+
def test_write_parquet_with_options_unsupported_encoding(df, tmp_path, encoding):
18541854
"""Test that unsupported Parquet encodings do not work."""
18551855
# BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
18561856
with pytest.raises(BaseException, match="Encoding .*? is not supported"):
1857-
df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding))
1857+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding))
18581858

18591859

18601860
@pytest.mark.parametrize("encoding", ["non_existent", "unknown", "plain123"])
1861-
def test_write_parquet_options_invalid_encoding(df, tmp_path, encoding):
1861+
def test_write_parquet_with_options_invalid_encoding(df, tmp_path, encoding):
18621862
"""Test that invalid Parquet encodings do not work."""
18631863
with pytest.raises(Exception, match="Unknown or unsupported parquet encoding"):
1864-
df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding))
1864+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding))
18651865

18661866

18671867
@pytest.mark.parametrize("encoding", ["plain_dictionary", "rle_dictionary"])
1868-
def test_write_parquet_options_dictionary_encoding_fallback(df, tmp_path, encoding):
1868+
def test_write_parquet_with_options_dictionary_encoding_fallback(df, tmp_path, encoding):
18691869
"""Test that the dictionary encoding cannot be used as fallback in Parquet."""
18701870
# BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
18711871
with pytest.raises(
18721872
BaseException, match="Dictionary encoding can not be used as fallback encoding"
18731873
):
1874-
df.write_parquet_options(tmp_path, ParquetWriterOptions(encoding=encoding))
1874+
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding))
18751875

18761876

1877-
def test_write_parquet_options_bloom_filter(df, tmp_path):
1877+
def test_write_parquet_with_options_bloom_filter(df, tmp_path):
18781878
"""Test Parquet files with and without (default) bloom filters. Since pyarrow does
18791879
not expose any information about bloom filters, the easiest way to confirm that they
18801880
are actually written is to compare the file size."""
18811881
path_no_bloom_filter = tmp_path / "1"
18821882
path_bloom_filter = tmp_path / "2"
18831883

1884-
df.write_parquet_options(path_no_bloom_filter, ParquetWriterOptions())
1885-
df.write_parquet_options(path_bloom_filter, ParquetWriterOptions(bloom_filter_on_write=True))
1884+
df.write_parquet_with_options(path_no_bloom_filter, ParquetWriterOptions())
1885+
df.write_parquet_with_options(path_bloom_filter, ParquetWriterOptions(bloom_filter_on_write=True))
18861886

18871887
size_no_bloom_filter = 0
18881888
for file in path_no_bloom_filter.rglob("*.parquet"):
@@ -1895,7 +1895,7 @@ def test_write_parquet_options_bloom_filter(df, tmp_path):
18951895
assert size_no_bloom_filter < size_bloom_filter
18961896

18971897

1898-
def test_write_parquet_options_column_options(df, tmp_path):
1898+
def test_write_parquet_with_options_column_options(df, tmp_path):
18991899
"""Test writing Parquet files with different options for each column, which replace
19001900
the global configs (when provided)."""
19011901
data = {
@@ -1951,7 +1951,7 @@ def test_write_parquet_options_column_options(df, tmp_path):
19511951

19521952
ctx = SessionContext()
19531953
df = ctx.from_arrow(pa.record_batch(data))
1954-
df.write_parquet_options(
1954+
df.write_parquet_with_options(
19551955
tmp_path,
19561956
ParquetWriterOptions(compression="brotli(8)",
19571957
column_specific_options=column_specific_options),

src/dataframe.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -770,7 +770,7 @@ impl PyDataFrame {
770770
}
771771

772772
/// Write a `DataFrame` to a Parquet file, using advanced options.
773-
fn write_parquet_options(
773+
fn write_parquet_with_options(
774774
&self,
775775
path: &str,
776776
options: PyParquetWriterOptions,

0 commit comments

Comments
 (0)