Skip to content

Commit 694851d

Browse files
committed
More ruff updates
1 parent 38e3630 commit 694851d

File tree

4 files changed

+57
-48
lines changed

4 files changed

+57
-48
lines changed

pyproject.toml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ features = ["substrait"]
6767
# Enable docstring linting using the google style guide
6868
[tool.ruff.lint]
6969
select = ["ALL" ]
70-
ignore = ["COM812", "ISC001"] # Recommended to ignore these rules when using with ruff-format
70+
ignore = ["COM812", "ISC001", "TD002"] # Recommended to ignore these rules when using with ruff-format
7171

7272
[tool.ruff.lint.pydocstyle]
7373
convention = "google"
@@ -77,7 +77,20 @@ max-doc-length = 88
7777

7878
# Disable docstring checking for these directories
7979
[tool.ruff.lint.per-file-ignores]
80-
"python/tests/*" = ["ANN", "ARG", "D", "S101", "SLF", "PD", "PLR2004", "PT011", "RUF015", "S608", "PLR0913"]
80+
"python/tests/*" = [
81+
"ANN",
82+
"ARG",
83+
"BLE001",
84+
"D",
85+
"S101",
86+
"SLF",
87+
"PD",
88+
"PLR2004",
89+
"PT011",
90+
"RUF015",
91+
"S608",
92+
"PLR0913"
93+
]
8194
"examples/*" = ["D", "W505"]
8295
"dev/*" = ["D"]
8396
"benchmarks/*" = ["D", "F"]

python/tests/test_context.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
# under the License.
1717
import datetime as dt
1818
import gzip
19-
import os
2019
import pathlib
2120

2221
import pyarrow as pa
@@ -45,7 +44,7 @@ def test_create_context_runtime_config_only():
4544
SessionContext(runtime=RuntimeEnvBuilder())
4645

4746

48-
@pytest.mark.parametrize("path_to_str", (True, False))
47+
@pytest.mark.parametrize("path_to_str", [True, False])
4948
def test_runtime_configs(tmp_path, path_to_str):
5049
path1 = tmp_path / "dir1"
5150
path2 = tmp_path / "dir2"
@@ -62,7 +61,7 @@ def test_runtime_configs(tmp_path, path_to_str):
6261
assert db is not None
6362

6463

65-
@pytest.mark.parametrize("path_to_str", (True, False))
64+
@pytest.mark.parametrize("path_to_str", [True, False])
6665
def test_temporary_files(tmp_path, path_to_str):
6766
path = str(tmp_path) if path_to_str else tmp_path
6867

@@ -79,14 +78,14 @@ def test_create_context_with_all_valid_args():
7978
runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000)
8079
config = (
8180
SessionConfig()
82-
.with_create_default_catalog_and_schema(True)
81+
.with_create_default_catalog_and_schema(enabled=True)
8382
.with_default_catalog_and_schema("foo", "bar")
8483
.with_target_partitions(1)
85-
.with_information_schema(True)
86-
.with_repartition_joins(False)
87-
.with_repartition_aggregations(False)
88-
.with_repartition_windows(False)
89-
.with_parquet_pruning(False)
84+
.with_information_schema(enabled=True)
85+
.with_repartition_joins(enabled=False)
86+
.with_repartition_aggregations(enabled=False)
87+
.with_repartition_windows(enabled=False)
88+
.with_parquet_pruning(enabled=False)
9089
)
9190

9291
ctx = SessionContext(config, runtime)
@@ -167,7 +166,7 @@ def test_from_arrow_table(ctx):
167166

168167
def record_batch_generator(num_batches: int):
169168
schema = pa.schema([("a", pa.int64()), ("b", pa.int64())])
170-
for i in range(num_batches):
169+
for _i in range(num_batches):
171170
yield pa.RecordBatch.from_arrays(
172171
[pa.array([1, 2, 3]), pa.array([4, 5, 6])], schema=schema
173172
)
@@ -492,10 +491,10 @@ def test_table_not_found(ctx):
492491

493492

494493
def test_read_json(ctx):
495-
path = os.path.dirname(os.path.abspath(__file__))
494+
path = pathlib.Path(__file__).parent.resolve()
496495

497496
# Default
498-
test_data_path = os.path.join(path, "data_test_context", "data.json")
497+
test_data_path = path / "data_test_context" / "data.json"
499498
df = ctx.read_json(test_data_path)
500499
result = df.collect()
501500

@@ -515,7 +514,7 @@ def test_read_json(ctx):
515514
assert result[0].schema == schema
516515

517516
# File extension
518-
test_data_path = os.path.join(path, "data_test_context", "data.json")
517+
test_data_path = path / "data_test_context" / "data.json"
519518
df = ctx.read_json(test_data_path, file_extension=".json")
520519
result = df.collect()
521520

@@ -524,15 +523,16 @@ def test_read_json(ctx):
524523

525524

526525
def test_read_json_compressed(ctx, tmp_path):
527-
path = os.path.dirname(os.path.abspath(__file__))
528-
test_data_path = os.path.join(path, "data_test_context", "data.json")
526+
path = pathlib.Path(__file__).parent.resolve()
527+
test_data_path = path / "data_test_context" / "data.json"
529528

530529
# File compression type
531530
gzip_path = tmp_path / "data.json.gz"
532531

533-
with open(test_data_path, "rb") as csv_file:
534-
with gzip.open(gzip_path, "wb") as gzipped_file:
535-
gzipped_file.writelines(csv_file)
532+
with pathlib.Path.open(test_data_path, "rb") as csv_file, gzip.open(
533+
gzip_path, "wb"
534+
) as gzipped_file:
535+
gzipped_file.writelines(csv_file)
536536

537537
df = ctx.read_json(gzip_path, file_extension=".gz", file_compression_type="gz")
538538
result = df.collect()
@@ -563,14 +563,15 @@ def test_read_csv_list(ctx):
563563

564564

565565
def test_read_csv_compressed(ctx, tmp_path):
566-
test_data_path = "testing/data/csv/aggregate_test_100.csv"
566+
test_data_path = pathlib.Path("testing/data/csv/aggregate_test_100.csv")
567567

568568
# File compression type
569569
gzip_path = tmp_path / "aggregate_test_100.csv.gz"
570570

571-
with open(test_data_path, "rb") as csv_file:
572-
with gzip.open(gzip_path, "wb") as gzipped_file:
573-
gzipped_file.writelines(csv_file)
571+
with pathlib.Path.open(test_data_path, "rb") as csv_file, gzip.open(
572+
gzip_path, "wb"
573+
) as gzipped_file:
574+
gzipped_file.writelines(csv_file)
574575

575576
csv_df = ctx.read_csv(gzip_path, file_extension=".gz", file_compression_type="gz")
576577
csv_df.select(column("c1")).show()
@@ -603,7 +604,7 @@ def test_create_sql_options():
603604
def test_sql_with_options_no_ddl(ctx):
604605
sql = "CREATE TABLE IF NOT EXISTS valuetable AS VALUES(1,'HELLO'),(12,'DATAFUSION')"
605606
ctx.sql(sql)
606-
options = SQLOptions().with_allow_ddl(False)
607+
options = SQLOptions().with_allow_ddl(allow=False)
607608
with pytest.raises(Exception, match="DDL"):
608609
ctx.sql_with_options(sql, options=options)
609610

@@ -618,14 +619,14 @@ def test_sql_with_options_no_dml(ctx):
618619
ctx.register_dataset(table_name, dataset)
619620
sql = f'INSERT INTO "{table_name}" VALUES (1, 2), (2, 3);'
620621
ctx.sql(sql)
621-
options = SQLOptions().with_allow_dml(False)
622+
options = SQLOptions().with_allow_dml(allow=False)
622623
with pytest.raises(Exception, match="DML"):
623624
ctx.sql_with_options(sql, options=options)
624625

625626

626627
def test_sql_with_options_no_statements(ctx):
627628
sql = "SET time zone = 1;"
628629
ctx.sql(sql)
629-
options = SQLOptions().with_allow_statements(False)
630+
options = SQLOptions().with_allow_statements(allow=False)
630631
with pytest.raises(Exception, match="SetVariable"):
631632
ctx.sql_with_options(sql, options=options)

python/tests/test_dataframe.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ def test_join():
339339

340340
# Verify we don't make a breaking change to pre-43.0.0
341341
# where users would pass join_keys as a positional argument
342-
df2 = df.join(df1, (["a"], ["a"]), how="inner") # type: ignore
342+
df2 = df.join(df1, (["a"], ["a"]), how="inner")
343343
df2.show()
344344
df2 = df2.sort(column("l.a"))
345345
table = pa.Table.from_batches(df2.collect())
@@ -375,17 +375,17 @@ def test_join_invalid_params():
375375
with pytest.raises(
376376
ValueError, match=r"`left_on` or `right_on` should not provided with `on`"
377377
):
378-
df2 = df.join(df1, on="a", how="inner", right_on="test") # type: ignore
378+
df2 = df.join(df1, on="a", how="inner", right_on="test")
379379

380380
with pytest.raises(
381381
ValueError, match=r"`left_on` and `right_on` should both be provided."
382382
):
383-
df2 = df.join(df1, left_on="a", how="inner") # type: ignore
383+
df2 = df.join(df1, left_on="a", how="inner")
384384

385385
with pytest.raises(
386386
ValueError, match=r"either `on` or `left_on` and `right_on` should be provided."
387387
):
388-
df2 = df.join(df1, how="inner") # type: ignore
388+
df2 = df.join(df1, how="inner")
389389

390390

391391
def test_join_on():
@@ -567,7 +567,7 @@ def test_distinct():
567567
]
568568

569569

570-
@pytest.mark.parametrize("name,expr,result", data_test_window_functions)
570+
@pytest.mark.parametrize(("name", "expr", "result"), data_test_window_functions)
571571
def test_window_functions(partitioned_df, name, expr, result):
572572
df = partitioned_df.select(
573573
column("a"), column("b"), column("c"), f.alias(expr, name)
@@ -885,7 +885,7 @@ def test_union_distinct(ctx):
885885
)
886886
df_c = ctx.create_dataframe([[batch]]).sort(column("a"))
887887

888-
df_a_u_b = df_a.union(df_b, True).sort(column("a"))
888+
df_a_u_b = df_a.union(df_b, distinct=True).sort(column("a"))
889889

890890
assert df_c.collect() == df_a_u_b.collect()
891891
assert df_c.collect() == df_a_u_b.collect()
@@ -954,8 +954,6 @@ def test_to_arrow_table(df):
954954

955955
def test_execute_stream(df):
956956
stream = df.execute_stream()
957-
for s in stream:
958-
print(type(s))
959957
assert all(batch is not None for batch in stream)
960958
assert not list(stream) # after one iteration the generator must be exhausted
961959

@@ -1033,7 +1031,7 @@ def test_describe(df):
10331031
}
10341032

10351033

1036-
@pytest.mark.parametrize("path_to_str", (True, False))
1034+
@pytest.mark.parametrize("path_to_str", [True, False])
10371035
def test_write_csv(ctx, df, tmp_path, path_to_str):
10381036
path = str(tmp_path) if path_to_str else tmp_path
10391037

@@ -1046,7 +1044,7 @@ def test_write_csv(ctx, df, tmp_path, path_to_str):
10461044
assert result == expected
10471045

10481046

1049-
@pytest.mark.parametrize("path_to_str", (True, False))
1047+
@pytest.mark.parametrize("path_to_str", [True, False])
10501048
def test_write_json(ctx, df, tmp_path, path_to_str):
10511049
path = str(tmp_path) if path_to_str else tmp_path
10521050

@@ -1059,7 +1057,7 @@ def test_write_json(ctx, df, tmp_path, path_to_str):
10591057
assert result == expected
10601058

10611059

1062-
@pytest.mark.parametrize("path_to_str", (True, False))
1060+
@pytest.mark.parametrize("path_to_str", [True, False])
10631061
def test_write_parquet(df, tmp_path, path_to_str):
10641062
path = str(tmp_path) if path_to_str else tmp_path
10651063

@@ -1071,7 +1069,7 @@ def test_write_parquet(df, tmp_path, path_to_str):
10711069

10721070

10731071
@pytest.mark.parametrize(
1074-
"compression, compression_level",
1072+
("compression", "compression_level"),
10751073
[("gzip", 6), ("brotli", 7), ("zstd", 15)],
10761074
)
10771075
def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
@@ -1082,7 +1080,7 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
10821080
)
10831081

10841082
# test that the actual compression scheme is the one written
1085-
for root, dirs, files in os.walk(path):
1083+
for _root, _dirs, files in os.walk(path):
10861084
for file in files:
10871085
if file.endswith(".parquet"):
10881086
metadata = pq.ParquetFile(tmp_path / file).metadata.to_dict()
@@ -1097,7 +1095,7 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
10971095

10981096

10991097
@pytest.mark.parametrize(
1100-
"compression, compression_level",
1098+
("compression", "compression_level"),
11011099
[("gzip", 12), ("brotli", 15), ("zstd", 23), ("wrong", 12)],
11021100
)
11031101
def test_write_compressed_parquet_wrong_compression_level(

python/tests/test_expr.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,18 +85,14 @@ def test_limit(test_ctx):
8585

8686
plan = plan.to_variant()
8787
assert isinstance(plan, Limit)
88-
# TODO: Upstream now has expressions for skip and fetch
89-
# REF: https://github.com/apache/datafusion/pull/12836
90-
# assert plan.skip() == 0
88+
assert "Skip: None" in str(plan)
9189

9290
df = test_ctx.sql("select c1 from test LIMIT 10 OFFSET 5")
9391
plan = df.logical_plan()
9492

9593
plan = plan.to_variant()
9694
assert isinstance(plan, Limit)
97-
# TODO: Upstream now has expressions for skip and fetch
98-
# REF: https://github.com/apache/datafusion/pull/12836
99-
# assert plan.skip() == 5
95+
assert "Skip: Some(Literal(Int64(5)))" in str(plan)
10096

10197

10298
def test_aggregate_query(test_ctx):
@@ -165,6 +161,7 @@ def traverse_logical_plan(plan):
165161
res = traverse_logical_plan(input_plan)
166162
if res is not None:
167163
return res
164+
return None
168165

169166
ctx = SessionContext()
170167
data = {"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]}
@@ -176,7 +173,7 @@ def traverse_logical_plan(plan):
176173
assert variant.expr().to_variant().qualified_name() == "table1.name"
177174
assert (
178175
str(variant.list())
179-
== '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]'
176+
== '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]' # noqa: E501
180177
)
181178
assert not variant.negated()
182179

0 commit comments

Comments
 (0)