@@ -1613,7 +1613,7 @@ def test_write_compressed_parquet_default_compression_level(df, tmp_path, compre
16131613 df .write_parquet (str (path ), compression = compression )
16141614
16151615
1616- def test_write_parquet_options_default_compression (df , tmp_path ):
1616+ def test_write_parquet_with_options_default_compression (df , tmp_path ):
16171617 """Test that the default compression is ZSTD."""
16181618 df .write_parquet (tmp_path )
16191619
@@ -1628,11 +1628,11 @@ def test_write_parquet_options_default_compression(df, tmp_path):
16281628 "compression" ,
16291629 ["gzip(6)" , "brotli(7)" , "zstd(15)" , "snappy" , "uncompressed" ],
16301630)
1631- def test_write_parquet_options_compression (df , tmp_path , compression ):
1631+ def test_write_parquet_with_options_compression (df , tmp_path , compression ):
16321632 import re
16331633
16341634 path = tmp_path
1635- df .write_parquet_options (str (path ), ParquetWriterOptions (compression = compression ))
1635+ df .write_parquet_with_options (str (path ), ParquetWriterOptions (compression = compression ))
16361636
16371637 # test that the actual compression scheme is the one written
16381638 for _root , _dirs , files in os .walk (path ):
@@ -1655,32 +1655,32 @@ def test_write_parquet_options_compression(df, tmp_path, compression):
16551655 "compression" ,
16561656 ["gzip(12)" , "brotli(15)" , "zstd(23)" ],
16571657)
1658- def test_write_parquet_options_wrong_compression_level (df , tmp_path , compression ):
1658+ def test_write_parquet_with_options_wrong_compression_level (df , tmp_path , compression ):
16591659 path = tmp_path
16601660
16611661 with pytest .raises (Exception , match = r"valid compression range .*? exceeded." ):
1662- df .write_parquet_options (str (path ), ParquetWriterOptions (compression = compression ))
1662+ df .write_parquet_with_options (str (path ), ParquetWriterOptions (compression = compression ))
16631663
16641664
16651665@pytest .mark .parametrize ("compression" , ["wrong" , "wrong(12)" ])
1666- def test_write_parquet_options_invalid_compression (df , tmp_path , compression ):
1666+ def test_write_parquet_with_options_invalid_compression (df , tmp_path , compression ):
16671667 path = tmp_path
16681668
16691669 with pytest .raises (Exception , match = "Unknown or unsupported parquet compression" ):
1670- df .write_parquet_options (str (path ), ParquetWriterOptions (compression = compression ))
1670+ df .write_parquet_with_options (str (path ), ParquetWriterOptions (compression = compression ))
16711671
16721672
16731673@pytest .mark .parametrize (
16741674 ("writer_version" , "format_version" ),
16751675 [("1.0" , "1.0" ), ("2.0" , "2.6" ), (None , "1.0" )],
16761676)
1677- def test_write_parquet_options_writer_version (df , tmp_path , writer_version , format_version ):
1677+ def test_write_parquet_with_options_writer_version (df , tmp_path , writer_version , format_version ):
16781678 """Test the Parquet writer version. Note that writer_version=2.0 results in
16791679 format_version=2.6"""
16801680 if writer_version is None :
1681- df .write_parquet_options (tmp_path , ParquetWriterOptions ())
1681+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions ())
16821682 else :
1683- df .write_parquet_options (tmp_path , ParquetWriterOptions (writer_version = writer_version ))
1683+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions (writer_version = writer_version ))
16841684
16851685 for file in tmp_path .rglob ("*.parquet" ):
16861686 parquet = pq .ParquetFile (file )
@@ -1689,18 +1689,18 @@ def test_write_parquet_options_writer_version(df, tmp_path, writer_version, form
16891689
16901690
16911691@pytest .mark .parametrize ("writer_version" , ["1.2.3" , "custom-version" , "0" ])
1692- def test_write_parquet_options_wrong_writer_version (df , tmp_path , writer_version ):
1692+ def test_write_parquet_with_options_wrong_writer_version (df , tmp_path , writer_version ):
16931693 """Test that invalid writer versions in Parquet throw an exception."""
16941694 with pytest .raises (
16951695 Exception , match = "Unknown or unsupported parquet writer version"
16961696 ):
1697- df .write_parquet_options (tmp_path , ParquetWriterOptions (writer_version = writer_version ))
1697+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions (writer_version = writer_version ))
16981698
16991699
17001700@pytest .mark .parametrize ("dictionary_enabled" , [True , False , None ])
1701- def test_write_parquet_options_dictionary_enabled (df , tmp_path , dictionary_enabled ):
1701+ def test_write_parquet_with_options_dictionary_enabled (df , tmp_path , dictionary_enabled ):
17021702 """Test enabling/disabling the dictionaries in Parquet."""
1703- df .write_parquet_options (tmp_path , ParquetWriterOptions (dictionary_enabled = dictionary_enabled ))
1703+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions (dictionary_enabled = dictionary_enabled ))
17041704 # by default, the dictionary is enabled, so None results in True
17051705 result = dictionary_enabled if dictionary_enabled is not None else True
17061706
@@ -1717,12 +1717,12 @@ def test_write_parquet_options_dictionary_enabled(df, tmp_path, dictionary_enabl
17171717 ("statistics_enabled" , "has_statistics" ),
17181718 [("page" , True ), ("chunk" , True ), ("none" , False ), (None , True )],
17191719)
1720- def test_write_parquet_options_statistics_enabled (
1720+ def test_write_parquet_with_options_statistics_enabled (
17211721 df , tmp_path , statistics_enabled , has_statistics
17221722):
17231723 """Test configuring the statistics in Parquet. In pyarrow we can only check for
17241724 column-level statistics, so "page" and "chunk" are tested in the same way."""
1725- df .write_parquet_options (tmp_path , ParquetWriterOptions (statistics_enabled = statistics_enabled ))
1725+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions (statistics_enabled = statistics_enabled ))
17261726
17271727 for file in tmp_path .rglob ("*.parquet" ):
17281728 parquet = pq .ParquetFile (file )
@@ -1737,11 +1737,11 @@ def test_write_parquet_options_statistics_enabled(
17371737
17381738
17391739@pytest .mark .parametrize ("max_row_group_size" , [1000 , 5000 , 10000 , 100000 ])
1740- def test_write_parquet_options_max_row_group_size (large_df , tmp_path , max_row_group_size ):
1740+ def test_write_parquet_with_options_max_row_group_size (large_df , tmp_path , max_row_group_size ):
17411741 """Test configuring the max number of rows per group in Parquet. These test cases
17421742 guarantee that the number of rows for each row group is max_row_group_size, given
17431743 the total number of rows is a multiple of max_row_group_size."""
1744- large_df .write_parquet_options (tmp_path , ParquetWriterOptions (max_row_group_size = max_row_group_size ))
1744+ large_df .write_parquet_with_options (tmp_path , ParquetWriterOptions (max_row_group_size = max_row_group_size ))
17451745
17461746 for file in tmp_path .rglob ("*.parquet" ):
17471747 parquet = pq .ParquetFile (file )
@@ -1751,9 +1751,9 @@ def test_write_parquet_options_max_row_group_size(large_df, tmp_path, max_row_gr
17511751
17521752
17531753@pytest .mark .parametrize ("created_by" , ["datafusion" , "datafusion-python" , "custom" ])
1754- def test_write_parquet_options_created_by (df , tmp_path , created_by ):
1754+ def test_write_parquet_with_options_created_by (df , tmp_path , created_by ):
17551755 """Test configuring the created by metadata in Parquet."""
1756- df .write_parquet_options (tmp_path , ParquetWriterOptions (created_by = created_by ))
1756+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions (created_by = created_by ))
17571757
17581758 for file in tmp_path .rglob ("*.parquet" ):
17591759 parquet = pq .ParquetFile (file )
@@ -1762,7 +1762,7 @@ def test_write_parquet_options_created_by(df, tmp_path, created_by):
17621762
17631763
17641764@pytest .mark .parametrize ("statistics_truncate_length" , [5 , 25 , 50 ])
1765- def test_write_parquet_options_statistics_truncate_length (
1765+ def test_write_parquet_with_options_statistics_truncate_length (
17661766 df , tmp_path , statistics_truncate_length
17671767):
17681768 """Test configuring the truncate limit in Parquet's row-group-level statistics."""
@@ -1776,7 +1776,7 @@ def test_write_parquet_options_statistics_truncate_length(
17761776 "b" : ["a_smaller" , "m_smaller" , "z_smaller" ],
17771777 }
17781778 df = ctx .from_arrow (pa .record_batch (data ))
1779- df .write_parquet_options (tmp_path , ParquetWriterOptions (statistics_truncate_length = statistics_truncate_length ))
1779+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions (statistics_truncate_length = statistics_truncate_length ))
17801780
17811781 for file in tmp_path .rglob ("*.parquet" ):
17821782 parquet = pq .ParquetFile (file )
@@ -1789,7 +1789,7 @@ def test_write_parquet_options_statistics_truncate_length(
17891789 assert len (statistics ["max" ]) <= statistics_truncate_length
17901790
17911791
1792- def test_write_parquet_options_default_encoding (tmp_path ):
1792+ def test_write_parquet_with_options_default_encoding (tmp_path ):
17931793 """Test that, by default, Parquet files are written with dictionary encoding.
17941794 Note that dictionary encoding is not used for boolean values, so it is not tested
17951795 here."""
@@ -1800,7 +1800,7 @@ def test_write_parquet_options_default_encoding(tmp_path):
18001800 "c" : [1.01 , 2.02 , 3.03 ],
18011801 }
18021802 df = ctx .from_arrow (pa .record_batch (data ))
1803- df .write_parquet_options (tmp_path , ParquetWriterOptions ())
1803+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions ())
18041804
18051805 for file in tmp_path .rglob ("*.parquet" ):
18061806 parquet = pq .ParquetFile (file )
@@ -1822,7 +1822,7 @@ def test_write_parquet_options_default_encoding(tmp_path):
18221822 ("byte_stream_split" , ["int" , "float" ], ("RLE" , "BYTE_STREAM_SPLIT" )),
18231823 ],
18241824)
1825- def test_write_parquet_options_encoding (tmp_path , encoding , data_types , result ):
1825+ def test_write_parquet_with_options_encoding (tmp_path , encoding , data_types , result ):
18261826 """Test different encodings in Parquet in their respective support column types."""
18271827 ctx = SessionContext ()
18281828
@@ -1838,7 +1838,7 @@ def test_write_parquet_options_encoding(tmp_path, encoding, data_types, result):
18381838 data ["bool" ] = [True , False , True ]
18391839
18401840 df = ctx .from_arrow (pa .record_batch (data ))
1841- df .write_parquet_options (tmp_path , ParquetWriterOptions (encoding = encoding , dictionary_enabled = False ))
1841+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions (encoding = encoding , dictionary_enabled = False ))
18421842
18431843 for file in tmp_path .rglob ("*.parquet" ):
18441844 parquet = pq .ParquetFile (file )
@@ -1850,39 +1850,39 @@ def test_write_parquet_options_encoding(tmp_path, encoding, data_types, result):
18501850
18511851
18521852@pytest .mark .parametrize ("encoding" , ["bit_packed" ])
1853- def test_write_parquet_options_unsupported_encoding (df , tmp_path , encoding ):
1853+ def test_write_parquet_with_options_unsupported_encoding (df , tmp_path , encoding ):
18541854 """Test that unsupported Parquet encodings do not work."""
18551855 # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
18561856 with pytest .raises (BaseException , match = "Encoding .*? is not supported" ):
1857- df .write_parquet_options (tmp_path , ParquetWriterOptions (encoding = encoding ))
1857+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions (encoding = encoding ))
18581858
18591859
18601860@pytest .mark .parametrize ("encoding" , ["non_existent" , "unknown" , "plain123" ])
1861- def test_write_parquet_options_invalid_encoding (df , tmp_path , encoding ):
1861+ def test_write_parquet_with_options_invalid_encoding (df , tmp_path , encoding ):
18621862 """Test that invalid Parquet encodings do not work."""
18631863 with pytest .raises (Exception , match = "Unknown or unsupported parquet encoding" ):
1864- df .write_parquet_options (tmp_path , ParquetWriterOptions (encoding = encoding ))
1864+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions (encoding = encoding ))
18651865
18661866
18671867@pytest .mark .parametrize ("encoding" , ["plain_dictionary" , "rle_dictionary" ])
1868- def test_write_parquet_options_dictionary_encoding_fallback (df , tmp_path , encoding ):
1868+ def test_write_parquet_with_options_dictionary_encoding_fallback (df , tmp_path , encoding ):
18691869 """Test that the dictionary encoding cannot be used as fallback in Parquet."""
18701870 # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
18711871 with pytest .raises (
18721872 BaseException , match = "Dictionary encoding can not be used as fallback encoding"
18731873 ):
1874- df .write_parquet_options (tmp_path , ParquetWriterOptions (encoding = encoding ))
1874+ df .write_parquet_with_options (tmp_path , ParquetWriterOptions (encoding = encoding ))
18751875
18761876
1877- def test_write_parquet_options_bloom_filter (df , tmp_path ):
1877+ def test_write_parquet_with_options_bloom_filter (df , tmp_path ):
18781878 """Test Parquet files with and without (default) bloom filters. Since pyarrow does
18791879 not expose any information about bloom filters, the easiest way to confirm that they
18801880 are actually written is to compare the file size."""
18811881 path_no_bloom_filter = tmp_path / "1"
18821882 path_bloom_filter = tmp_path / "2"
18831883
1884- df .write_parquet_options (path_no_bloom_filter , ParquetWriterOptions ())
1885- df .write_parquet_options (path_bloom_filter , ParquetWriterOptions (bloom_filter_on_write = True ))
1884+ df .write_parquet_with_options (path_no_bloom_filter , ParquetWriterOptions ())
1885+ df .write_parquet_with_options (path_bloom_filter , ParquetWriterOptions (bloom_filter_on_write = True ))
18861886
18871887 size_no_bloom_filter = 0
18881888 for file in path_no_bloom_filter .rglob ("*.parquet" ):
@@ -1895,7 +1895,7 @@ def test_write_parquet_options_bloom_filter(df, tmp_path):
18951895 assert size_no_bloom_filter < size_bloom_filter
18961896
18971897
1898- def test_write_parquet_options_column_options (df , tmp_path ):
1898+ def test_write_parquet_with_options_column_options (df , tmp_path ):
18991899 """Test writing Parquet files with different options for each column, which replace
19001900 the global configs (when provided)."""
19011901 data = {
@@ -1951,7 +1951,7 @@ def test_write_parquet_options_column_options(df, tmp_path):
19511951
19521952 ctx = SessionContext ()
19531953 df = ctx .from_arrow (pa .record_batch (data ))
1954- df .write_parquet_options (
1954+ df .write_parquet_with_options (
19551955 tmp_path ,
19561956 ParquetWriterOptions (compression = "brotli(8)" ,
19571957 column_specific_options = column_specific_options ),
0 commit comments