Skip to content

Commit af791f2

Browse files
committed
Fix ruff errors
1 parent 4c44326 commit af791f2

File tree

1 file changed

+51
-19
lines changed

1 file changed

+51
-19
lines changed

python/tests/test_dataframe.py

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
import pytest
2828
from datafusion import (
2929
DataFrame,
30-
ParquetWriterOptions,
3130
ParquetColumnOptions,
31+
ParquetWriterOptions,
3232
SessionContext,
3333
WindowFrame,
3434
column,
@@ -1668,7 +1668,9 @@ def test_write_parquet_with_options_compression(df, tmp_path, compression):
16681668
import re
16691669

16701670
path = tmp_path
1671-
df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
1671+
df.write_parquet_with_options(
1672+
str(path), ParquetWriterOptions(compression=compression)
1673+
)
16721674

16731675
# test that the actual compression scheme is the one written
16741676
for _root, _dirs, files in os.walk(path):
@@ -1695,28 +1697,36 @@ def test_write_parquet_with_options_wrong_compression_level(df, tmp_path, compre
16951697
path = tmp_path
16961698

16971699
with pytest.raises(Exception, match=r"valid compression range .*? exceeded."):
1698-
df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
1700+
df.write_parquet_with_options(
1701+
str(path), ParquetWriterOptions(compression=compression)
1702+
)
16991703

17001704

17011705
@pytest.mark.parametrize("compression", ["wrong", "wrong(12)"])
17021706
def test_write_parquet_with_options_invalid_compression(df, tmp_path, compression):
17031707
path = tmp_path
17041708

17051709
with pytest.raises(Exception, match="Unknown or unsupported parquet compression"):
1706-
df.write_parquet_with_options(str(path), ParquetWriterOptions(compression=compression))
1710+
df.write_parquet_with_options(
1711+
str(path), ParquetWriterOptions(compression=compression)
1712+
)
17071713

17081714

17091715
@pytest.mark.parametrize(
17101716
("writer_version", "format_version"),
17111717
[("1.0", "1.0"), ("2.0", "2.6"), (None, "1.0")],
17121718
)
1713-
def test_write_parquet_with_options_writer_version(df, tmp_path, writer_version, format_version):
1719+
def test_write_parquet_with_options_writer_version(
1720+
df, tmp_path, writer_version, format_version
1721+
):
17141722
"""Test the Parquet writer version. Note that writer_version=2.0 results in
17151723
format_version=2.6"""
17161724
if writer_version is None:
17171725
df.write_parquet_with_options(tmp_path, ParquetWriterOptions())
17181726
else:
1719-
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
1727+
df.write_parquet_with_options(
1728+
tmp_path, ParquetWriterOptions(writer_version=writer_version)
1729+
)
17201730

17211731
for file in tmp_path.rglob("*.parquet"):
17221732
parquet = pq.ParquetFile(file)
@@ -1730,13 +1740,19 @@ def test_write_parquet_with_options_wrong_writer_version(df, tmp_path, writer_ve
17301740
with pytest.raises(
17311741
Exception, match="Unknown or unsupported parquet writer version"
17321742
):
1733-
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(writer_version=writer_version))
1743+
df.write_parquet_with_options(
1744+
tmp_path, ParquetWriterOptions(writer_version=writer_version)
1745+
)
17341746

17351747

17361748
@pytest.mark.parametrize("dictionary_enabled", [True, False, None])
1737-
def test_write_parquet_with_options_dictionary_enabled(df, tmp_path, dictionary_enabled):
1749+
def test_write_parquet_with_options_dictionary_enabled(
1750+
df, tmp_path, dictionary_enabled
1751+
):
17381752
"""Test enabling/disabling the dictionaries in Parquet."""
1739-
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(dictionary_enabled=dictionary_enabled))
1753+
df.write_parquet_with_options(
1754+
tmp_path, ParquetWriterOptions(dictionary_enabled=dictionary_enabled)
1755+
)
17401756
# by default, the dictionary is enabled, so None results in True
17411757
result = dictionary_enabled if dictionary_enabled is not None else True
17421758

@@ -1758,7 +1774,9 @@ def test_write_parquet_with_options_statistics_enabled(
17581774
):
17591775
"""Test configuring the statistics in Parquet. In pyarrow we can only check for
17601776
column-level statistics, so "page" and "chunk" are tested in the same way."""
1761-
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(statistics_enabled=statistics_enabled))
1777+
df.write_parquet_with_options(
1778+
tmp_path, ParquetWriterOptions(statistics_enabled=statistics_enabled)
1779+
)
17621780

17631781
for file in tmp_path.rglob("*.parquet"):
17641782
parquet = pq.ParquetFile(file)
@@ -1773,11 +1791,15 @@ def test_write_parquet_with_options_statistics_enabled(
17731791

17741792

17751793
@pytest.mark.parametrize("max_row_group_size", [1000, 5000, 10000, 100000])
1776-
def test_write_parquet_with_options_max_row_group_size(large_df, tmp_path, max_row_group_size):
1794+
def test_write_parquet_with_options_max_row_group_size(
1795+
large_df, tmp_path, max_row_group_size
1796+
):
17771797
"""Test configuring the max number of rows per group in Parquet. These test cases
17781798
guarantee that the number of rows for each row group is max_row_group_size, given
17791799
the total number of rows is a multiple of max_row_group_size."""
1780-
large_df.write_parquet_with_options(tmp_path, ParquetWriterOptions(max_row_group_size=max_row_group_size))
1800+
large_df.write_parquet_with_options(
1801+
tmp_path, ParquetWriterOptions(max_row_group_size=max_row_group_size)
1802+
)
17811803

17821804
for file in tmp_path.rglob("*.parquet"):
17831805
parquet = pq.ParquetFile(file)
@@ -1812,7 +1834,10 @@ def test_write_parquet_with_options_statistics_truncate_length(
18121834
"b": ["a_smaller", "m_smaller", "z_smaller"],
18131835
}
18141836
df = ctx.from_arrow(pa.record_batch(data))
1815-
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(statistics_truncate_length=statistics_truncate_length))
1837+
df.write_parquet_with_options(
1838+
tmp_path,
1839+
ParquetWriterOptions(statistics_truncate_length=statistics_truncate_length),
1840+
)
18161841

18171842
for file in tmp_path.rglob("*.parquet"):
18181843
parquet = pq.ParquetFile(file)
@@ -1870,11 +1895,13 @@ def test_write_parquet_with_options_encoding(tmp_path, encoding, data_types, res
18701895
data["float"] = [1.01, 2.02, 3.03]
18711896
elif data_type == "str":
18721897
data["str"] = ["a", "b", "c"]
1873-
elif data_type == "bool":
1898+
elif data_type == "bool":
18741899
data["bool"] = [True, False, True]
18751900

18761901
df = ctx.from_arrow(pa.record_batch(data))
1877-
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding, dictionary_enabled=False))
1902+
df.write_parquet_with_options(
1903+
tmp_path, ParquetWriterOptions(encoding=encoding, dictionary_enabled=False)
1904+
)
18781905

18791906
for file in tmp_path.rglob("*.parquet"):
18801907
parquet = pq.ParquetFile(file)
@@ -1901,7 +1928,9 @@ def test_write_parquet_with_options_invalid_encoding(df, tmp_path, encoding):
19011928

19021929

19031930
@pytest.mark.parametrize("encoding", ["plain_dictionary", "rle_dictionary"])
1904-
def test_write_parquet_with_options_dictionary_encoding_fallback(df, tmp_path, encoding):
1931+
def test_write_parquet_with_options_dictionary_encoding_fallback(
1932+
df, tmp_path, encoding
1933+
):
19051934
"""Test that the dictionary encoding cannot be used as fallback in Parquet."""
19061935
# BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
19071936
with pytest.raises(
@@ -1918,7 +1947,9 @@ def test_write_parquet_with_options_bloom_filter(df, tmp_path):
19181947
path_bloom_filter = tmp_path / "2"
19191948

19201949
df.write_parquet_with_options(path_no_bloom_filter, ParquetWriterOptions())
1921-
df.write_parquet_with_options(path_bloom_filter, ParquetWriterOptions(bloom_filter_on_write=True))
1950+
df.write_parquet_with_options(
1951+
path_bloom_filter, ParquetWriterOptions(bloom_filter_on_write=True)
1952+
)
19221953

19231954
size_no_bloom_filter = 0
19241955
for file in path_no_bloom_filter.rglob("*.parquet"):
@@ -1989,8 +2020,9 @@ def test_write_parquet_with_options_column_options(df, tmp_path):
19892020
df = ctx.from_arrow(pa.record_batch(data))
19902021
df.write_parquet_with_options(
19912022
tmp_path,
1992-
ParquetWriterOptions(compression="brotli(8)",
1993-
column_specific_options=column_specific_options),
2023+
ParquetWriterOptions(
2024+
compression="brotli(8)", column_specific_options=column_specific_options
2025+
),
19942026
)
19952027

19962028
for file in tmp_path.rglob("*.parquet"):

0 commit comments

Comments
 (0)