2727import  pytest 
2828from  datafusion  import  (
2929    DataFrame ,
30-     ParquetWriterOptions ,
3130    ParquetColumnOptions ,
31+     ParquetWriterOptions ,
3232    SessionContext ,
3333    WindowFrame ,
3434    column ,
@@ -1668,7 +1668,9 @@ def test_write_parquet_with_options_compression(df, tmp_path, compression):
16681668    import  re 
16691669
16701670    path  =  tmp_path 
1671-     df .write_parquet_with_options (str (path ), ParquetWriterOptions (compression = compression ))
1671+     df .write_parquet_with_options (
1672+         str (path ), ParquetWriterOptions (compression = compression )
1673+     )
16721674
16731675    # test that the actual compression scheme is the one written 
16741676    for  _root , _dirs , files  in  os .walk (path ):
@@ -1695,28 +1697,36 @@ def test_write_parquet_with_options_wrong_compression_level(df, tmp_path, compre
16951697    path  =  tmp_path 
16961698
16971699    with  pytest .raises (Exception , match = r"valid compression range .*? exceeded." ):
1698-         df .write_parquet_with_options (str (path ), ParquetWriterOptions (compression = compression ))
1700+         df .write_parquet_with_options (
1701+             str (path ), ParquetWriterOptions (compression = compression )
1702+         )
16991703
17001704
17011705@pytest .mark .parametrize ("compression" , ["wrong" , "wrong(12)" ]) 
17021706def  test_write_parquet_with_options_invalid_compression (df , tmp_path , compression ):
17031707    path  =  tmp_path 
17041708
17051709    with  pytest .raises (Exception , match = "Unknown or unsupported parquet compression" ):
1706-         df .write_parquet_with_options (str (path ), ParquetWriterOptions (compression = compression ))
1710+         df .write_parquet_with_options (
1711+             str (path ), ParquetWriterOptions (compression = compression )
1712+         )
17071713
17081714
17091715@pytest .mark .parametrize ( 
17101716    ("writer_version" , "format_version" ), 
17111717    [("1.0" , "1.0" ), ("2.0" , "2.6" ), (None , "1.0" )], 
17121718) 
1713- def  test_write_parquet_with_options_writer_version (df , tmp_path , writer_version , format_version ):
1719+ def  test_write_parquet_with_options_writer_version (
1720+     df , tmp_path , writer_version , format_version 
1721+ ):
17141722    """Test the Parquet writer version. Note that writer_version=2.0 results in 
17151723    format_version=2.6""" 
17161724    if  writer_version  is  None :
17171725        df .write_parquet_with_options (tmp_path , ParquetWriterOptions ())
17181726    else :
1719-         df .write_parquet_with_options (tmp_path , ParquetWriterOptions (writer_version = writer_version ))
1727+         df .write_parquet_with_options (
1728+             tmp_path , ParquetWriterOptions (writer_version = writer_version )
1729+         )
17201730
17211731    for  file  in  tmp_path .rglob ("*.parquet" ):
17221732        parquet  =  pq .ParquetFile (file )
@@ -1730,13 +1740,19 @@ def test_write_parquet_with_options_wrong_writer_version(df, tmp_path, writer_ve
17301740    with  pytest .raises (
17311741        Exception , match = "Unknown or unsupported parquet writer version" 
17321742    ):
1733-         df .write_parquet_with_options (tmp_path , ParquetWriterOptions (writer_version = writer_version ))
1743+         df .write_parquet_with_options (
1744+             tmp_path , ParquetWriterOptions (writer_version = writer_version )
1745+         )
17341746
17351747
17361748@pytest .mark .parametrize ("dictionary_enabled" , [True , False , None ]) 
1737- def  test_write_parquet_with_options_dictionary_enabled (df , tmp_path , dictionary_enabled ):
1749+ def  test_write_parquet_with_options_dictionary_enabled (
1750+     df , tmp_path , dictionary_enabled 
1751+ ):
17381752    """Test enabling/disabling the dictionaries in Parquet.""" 
1739-     df .write_parquet_with_options (tmp_path , ParquetWriterOptions (dictionary_enabled = dictionary_enabled ))
1753+     df .write_parquet_with_options (
1754+         tmp_path , ParquetWriterOptions (dictionary_enabled = dictionary_enabled )
1755+     )
17401756    # by default, the dictionary is enabled, so None results in True 
17411757    result  =  dictionary_enabled  if  dictionary_enabled  is  not   None  else  True 
17421758
@@ -1758,7 +1774,9 @@ def test_write_parquet_with_options_statistics_enabled(
17581774):
17591775    """Test configuring the statistics in Parquet. In pyarrow we can only check for 
17601776    column-level statistics, so "page" and "chunk" are tested in the same way.""" 
1761-     df .write_parquet_with_options (tmp_path , ParquetWriterOptions (statistics_enabled = statistics_enabled ))
1777+     df .write_parquet_with_options (
1778+         tmp_path , ParquetWriterOptions (statistics_enabled = statistics_enabled )
1779+     )
17621780
17631781    for  file  in  tmp_path .rglob ("*.parquet" ):
17641782        parquet  =  pq .ParquetFile (file )
@@ -1773,11 +1791,15 @@ def test_write_parquet_with_options_statistics_enabled(
17731791
17741792
17751793@pytest .mark .parametrize ("max_row_group_size" , [1000 , 5000 , 10000 , 100000 ]) 
1776- def  test_write_parquet_with_options_max_row_group_size (large_df , tmp_path , max_row_group_size ):
1794+ def  test_write_parquet_with_options_max_row_group_size (
1795+     large_df , tmp_path , max_row_group_size 
1796+ ):
17771797    """Test configuring the max number of rows per group in Parquet. These test cases 
17781798    guarantee that the number of rows for each row group is max_row_group_size, given 
17791799    the total number of rows is a multiple of max_row_group_size.""" 
1780-     large_df .write_parquet_with_options (tmp_path , ParquetWriterOptions (max_row_group_size = max_row_group_size ))
1800+     large_df .write_parquet_with_options (
1801+         tmp_path , ParquetWriterOptions (max_row_group_size = max_row_group_size )
1802+     )
17811803
17821804    for  file  in  tmp_path .rglob ("*.parquet" ):
17831805        parquet  =  pq .ParquetFile (file )
@@ -1812,7 +1834,10 @@ def test_write_parquet_with_options_statistics_truncate_length(
18121834        "b" : ["a_smaller" , "m_smaller" , "z_smaller" ],
18131835    }
18141836    df  =  ctx .from_arrow (pa .record_batch (data ))
1815-     df .write_parquet_with_options (tmp_path , ParquetWriterOptions (statistics_truncate_length = statistics_truncate_length ))
1837+     df .write_parquet_with_options (
1838+         tmp_path ,
1839+         ParquetWriterOptions (statistics_truncate_length = statistics_truncate_length ),
1840+     )
18161841
18171842    for  file  in  tmp_path .rglob ("*.parquet" ):
18181843        parquet  =  pq .ParquetFile (file )
@@ -1870,11 +1895,13 @@ def test_write_parquet_with_options_encoding(tmp_path, encoding, data_types, res
18701895            data ["float" ] =  [1.01 , 2.02 , 3.03 ]
18711896        elif  data_type  ==  "str" :
18721897            data ["str" ] =  ["a" , "b" , "c" ]
1873-         elif    data_type  ==  "bool" :
1898+         elif  data_type  ==  "bool" :
18741899            data ["bool" ] =  [True , False , True ]
18751900
18761901    df  =  ctx .from_arrow (pa .record_batch (data ))
1877-     df .write_parquet_with_options (tmp_path , ParquetWriterOptions (encoding = encoding , dictionary_enabled = False ))
1902+     df .write_parquet_with_options (
1903+         tmp_path , ParquetWriterOptions (encoding = encoding , dictionary_enabled = False )
1904+     )
18781905
18791906    for  file  in  tmp_path .rglob ("*.parquet" ):
18801907        parquet  =  pq .ParquetFile (file )
@@ -1901,7 +1928,9 @@ def test_write_parquet_with_options_invalid_encoding(df, tmp_path, encoding):
19011928
19021929
19031930@pytest .mark .parametrize ("encoding" , ["plain_dictionary" , "rle_dictionary" ]) 
1904- def  test_write_parquet_with_options_dictionary_encoding_fallback (df , tmp_path , encoding ):
1931+ def  test_write_parquet_with_options_dictionary_encoding_fallback (
1932+     df , tmp_path , encoding 
1933+ ):
19051934    """Test that the dictionary encoding cannot be used as fallback in Parquet.""" 
19061935    # BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519 
19071936    with  pytest .raises (
@@ -1918,7 +1947,9 @@ def test_write_parquet_with_options_bloom_filter(df, tmp_path):
19181947    path_bloom_filter  =  tmp_path  /  "2" 
19191948
19201949    df .write_parquet_with_options (path_no_bloom_filter , ParquetWriterOptions ())
1921-     df .write_parquet_with_options (path_bloom_filter , ParquetWriterOptions (bloom_filter_on_write = True ))
1950+     df .write_parquet_with_options (
1951+         path_bloom_filter , ParquetWriterOptions (bloom_filter_on_write = True )
1952+     )
19221953
19231954    size_no_bloom_filter  =  0 
19241955    for  file  in  path_no_bloom_filter .rglob ("*.parquet" ):
@@ -1989,8 +2020,9 @@ def test_write_parquet_with_options_column_options(df, tmp_path):
19892020    df  =  ctx .from_arrow (pa .record_batch (data ))
19902021    df .write_parquet_with_options (
19912022        tmp_path ,
1992-         ParquetWriterOptions (compression = "brotli(8)" ,
1993-         column_specific_options = column_specific_options ),
2023+         ParquetWriterOptions (
2024+             compression = "brotli(8)" , column_specific_options = column_specific_options 
2025+         ),
19942026    )
19952027
19962028    for  file  in  tmp_path .rglob ("*.parquet" ):
0 commit comments