Skip to content

Commit 04d91f0

Browse files
committed
Working through more ruff suggestions
1 parent 3aaa84a commit 04d91f0

File tree

5 files changed

+31
-30
lines changed

5 files changed

+31
-30
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ max-doc-length = 88
7777

7878
# Disable docstring checking for these directories
7979
[tool.ruff.lint.per-file-ignores]
80-
"python/tests/*" = ["ANN", "ARG", "D", "S101", "SLF", "PD", "PLR2004", "RUF015"]
80+
"python/tests/*" = ["ANN", "ARG", "D", "S101", "SLF", "PD", "PLR2004", "RUF015", "S608", "PLR0913"]
8181
"examples/*" = ["D", "W505"]
8282
"dev/*" = ["D"]
8383
"benchmarks/*" = ["D", "F"]

python/tests/test_sql.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
import gzip
18-
import os
18+
from pathlib import Path
1919

2020
import numpy as np
2121
import pyarrow as pa
@@ -47,9 +47,8 @@ def test_register_csv(ctx, tmp_path):
4747
)
4848
write_csv(table, path)
4949

50-
with open(path, "rb") as csv_file:
51-
with gzip.open(gzip_path, "wb") as gzipped_file:
52-
gzipped_file.writelines(csv_file)
50+
with Path.open(path, "rb") as csv_file, gzip.open(gzip_path, "wb") as gzipped_file:
51+
gzipped_file.writelines(csv_file)
5352

5453
ctx.register_csv("csv", path)
5554
ctx.register_csv("csv1", str(path))
@@ -158,7 +157,7 @@ def test_register_parquet(ctx, tmp_path):
158157
assert result.to_pydict() == {"cnt": [100]}
159158

160159

161-
@pytest.mark.parametrize("path_to_str", (True, False))
160+
@pytest.mark.parametrize("path_to_str", [True, False])
162161
def test_register_parquet_partitioned(ctx, tmp_path, path_to_str):
163162
dir_root = tmp_path / "dataset_parquet_partitioned"
164163
dir_root.mkdir(exist_ok=False)
@@ -194,7 +193,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str):
194193
assert dict(zip(rd["grp"], rd["cnt"])) == {"a": 3, "b": 1}
195194

196195

197-
@pytest.mark.parametrize("path_to_str", (True, False))
196+
@pytest.mark.parametrize("path_to_str", [True, False])
198197
def test_register_dataset(ctx, tmp_path, path_to_str):
199198
path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data())
200199
path = str(path) if path_to_str else path
@@ -209,13 +208,15 @@ def test_register_dataset(ctx, tmp_path, path_to_str):
209208

210209

211210
def test_register_json(ctx, tmp_path):
212-
path = os.path.dirname(os.path.abspath(__file__))
213-
test_data_path = os.path.join(path, "data_test_context", "data.json")
211+
path = Path(__file__).parent.resolve()
212+
test_data_path = Path(path) / "data_test_context" / "data.json"
214213
gzip_path = tmp_path / "data.json.gz"
215214

216-
with open(test_data_path, "rb") as json_file:
217-
with gzip.open(gzip_path, "wb") as gzipped_file:
218-
gzipped_file.writelines(json_file)
215+
with (
216+
Path.open(test_data_path, "rb") as json_file,
217+
gzip.open(gzip_path, "wb") as gzipped_file
218+
):
219+
gzipped_file.writelines(json_file)
219220

220221
ctx.register_json("json", test_data_path)
221222
ctx.register_json("json1", str(test_data_path))
@@ -470,16 +471,19 @@ def test_simple_select(ctx, tmp_path, arr):
470471
# In DF 43.0.0 we now default to having BinaryView and StringView
471472
# so the array that is saved to the parquet is slightly different
472473
# than the array read. Convert to values for comparison.
473-
if isinstance(result, pa.BinaryViewArray) or isinstance(result, pa.StringViewArray):
474+
if isinstance(result, (pa.BinaryViewArray, pa.StringViewArray)):
474475
arr = arr.tolist()
475476
result = result.tolist()
476477

477478
np.testing.assert_equal(result, arr)
478479

479480

480-
@pytest.mark.parametrize("file_sort_order", (None, [[col("int").sort(True, True)]]))
481-
@pytest.mark.parametrize("pass_schema", (True, False))
482-
@pytest.mark.parametrize("path_to_str", (True, False))
481+
@pytest.mark.parametrize("file_sort_order", [
482+
None,
483+
[[col("int").sort(ascending=True, nulls_first=True)]]
484+
])
485+
@pytest.mark.parametrize("pass_schema", [True, False])
486+
@pytest.mark.parametrize("path_to_str", [True, False])
483487
def test_register_listing_table(
484488
ctx, tmp_path, pass_schema, file_sort_order, path_to_str
485489
):
@@ -528,7 +532,7 @@ def test_register_listing_table(
528532
assert dict(zip(rd["grp"], rd["count"])) == {"a": 5, "b": 2}
529533

530534
result = ctx.sql(
531-
"SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp"
535+
"SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp" # noqa: E501
532536
).collect()
533537
result = pa.Table.from_batches(result)
534538

python/tests/test_store.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,24 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
import os
18+
from pathlib import Path
1919

2020
import pytest
2121
from datafusion import SessionContext
2222

2323

2424
@pytest.fixture
2525
def ctx():
26-
ctx = SessionContext()
27-
return ctx
26+
return SessionContext()
2827

2928

3029
def test_read_parquet(ctx):
3130
ctx.register_parquet(
3231
"test",
33-
f"file://{os.getcwd()}/parquet/data/alltypes_plain.parquet",
34-
[],
35-
True,
36-
".parquet",
32+
f"file://{Path.cwd()}/parquet/data/alltypes_plain.parquet",
33+
table_partition_cols=[],
34+
parquet_pruning=True,
35+
file_extension=".parquet",
3736
)
3837
df = ctx.sql("SELECT * FROM test")
3938
assert isinstance(df.collect(), list)

python/tests/test_substrait.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def test_substrait_serialization(ctx):
5050
substrait_plan = ss.Producer.to_substrait_plan(df.logical_plan(), ctx)
5151

5252

53-
@pytest.mark.parametrize("path_to_str", (True, False))
53+
@pytest.mark.parametrize("path_to_str", [True, False])
5454
def test_substrait_file_serialization(ctx, tmp_path, path_to_str):
5555
batch = pa.RecordBatch.from_arrays(
5656
[pa.array([1, 2, 3]), pa.array([4, 5, 6])],

python/tests/test_udaf.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
from __future__ import annotations
1919

20-
from typing import List
21-
2220
import pyarrow as pa
2321
import pyarrow.compute as pc
2422
import pytest
@@ -31,15 +29,15 @@ class Summarize(Accumulator):
3129
def __init__(self, initial_value: float = 0.0):
3230
self._sum = pa.scalar(initial_value)
3331

34-
def state(self) -> List[pa.Scalar]:
32+
def state(self) -> list[pa.Scalar]:
3533
return [self._sum]
3634

3735
def update(self, values: pa.Array) -> None:
3836
# Not nice since pyarrow scalars can't be summed yet.
3937
# This breaks on `None`
4038
self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
4139

42-
def merge(self, states: List[pa.Array]) -> None:
40+
def merge(self, states: list[pa.Array]) -> None:
4341
# Not nice since pyarrow scalars can't be summed yet.
4442
# This breaks on `None`
4543
self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py())
@@ -56,7 +54,7 @@ class MissingMethods(Accumulator):
5654
def __init__(self):
5755
self._sum = pa.scalar(0)
5856

59-
def state(self) -> List[pa.Scalar]:
57+
def state(self) -> list[pa.Scalar]:
6058
return [self._sum]
6159

6260

0 commit comments

Comments
 (0)