Working through more ruff suggestions

timsaucer · timsaucer · commit 04d91f064316 · 2025-03-09T10:07:34.000-04:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -77,7 +77,7 @@ max-doc-length = 88
 
 # Disable docstring checking for these directories
 [tool.ruff.lint.per-file-ignores]
-"python/tests/*" = ["ANN", "ARG", "D", "S101", "SLF", "PD", "PLR2004", "RUF015"]
+"python/tests/*" = ["ANN", "ARG", "D", "S101", "SLF", "PD", "PLR2004", "RUF015", "S608", "PLR0913"]
 "examples/*" = ["D", "W505"]
 "dev/*" = ["D"]
 "benchmarks/*" = ["D", "F"]
diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import gzip
-import os
+from pathlib import Path
 
 import numpy as np
 import pyarrow as pa
@@ -47,9 +47,8 @@ def test_register_csv(ctx, tmp_path):
     )
     write_csv(table, path)
 
-    with open(path, "rb") as csv_file:
-        with gzip.open(gzip_path, "wb") as gzipped_file:
-            gzipped_file.writelines(csv_file)
+    with Path.open(path, "rb") as csv_file, gzip.open(gzip_path, "wb") as gzipped_file:
+        gzipped_file.writelines(csv_file)
 
     ctx.register_csv("csv", path)
     ctx.register_csv("csv1", str(path))
@@ -158,7 +157,7 @@ def test_register_parquet(ctx, tmp_path):
     assert result.to_pydict() == {"cnt": [100]}
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_register_parquet_partitioned(ctx, tmp_path, path_to_str):
     dir_root = tmp_path / "dataset_parquet_partitioned"
     dir_root.mkdir(exist_ok=False)
@@ -194,7 +193,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str):
     assert dict(zip(rd["grp"], rd["cnt"])) == {"a": 3, "b": 1}
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_register_dataset(ctx, tmp_path, path_to_str):
     path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data())
     path = str(path) if path_to_str else path
@@ -209,13 +208,15 @@ def test_register_dataset(ctx, tmp_path, path_to_str):
 
 
 def test_register_json(ctx, tmp_path):
-    path = os.path.dirname(os.path.abspath(__file__))
-    test_data_path = os.path.join(path, "data_test_context", "data.json")
+    path = Path(__file__).parent.resolve()
+    test_data_path = Path(path) / "data_test_context" / "data.json"
     gzip_path = tmp_path / "data.json.gz"
 
-    with open(test_data_path, "rb") as json_file:
-        with gzip.open(gzip_path, "wb") as gzipped_file:
-            gzipped_file.writelines(json_file)
+    with (
+        Path.open(test_data_path, "rb") as json_file,
+        gzip.open(gzip_path, "wb") as gzipped_file
+    ):
+        gzipped_file.writelines(json_file)
 
     ctx.register_json("json", test_data_path)
     ctx.register_json("json1", str(test_data_path))
@@ -470,16 +471,19 @@ def test_simple_select(ctx, tmp_path, arr):
     # In DF 43.0.0 we now default to having BinaryView and StringView
     # so the array that is saved to the parquet is slightly different
     # than the array read. Convert to values for comparison.
-    if isinstance(result, pa.BinaryViewArray) or isinstance(result, pa.StringViewArray):
+    if isinstance(result, (pa.BinaryViewArray, pa.StringViewArray)):
         arr = arr.tolist()
         result = result.tolist()
 
     np.testing.assert_equal(result, arr)
 
 
-@pytest.mark.parametrize("file_sort_order", (None, [[col("int").sort(True, True)]]))
-@pytest.mark.parametrize("pass_schema", (True, False))
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("file_sort_order", [
+    None,
+    [[col("int").sort(ascending=True, nulls_first=True)]]
+    ])
+@pytest.mark.parametrize("pass_schema", [True, False])
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_register_listing_table(
     ctx, tmp_path, pass_schema, file_sort_order, path_to_str
 ):
@@ -528,7 +532,7 @@ def test_register_listing_table(
     assert dict(zip(rd["grp"], rd["count"])) == {"a": 5, "b": 2}
 
     result = ctx.sql(
-        "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp"
+        "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp"  # noqa: E501
     ).collect()
     result = pa.Table.from_batches(result)
 
diff --git a/python/tests/test_store.py b/python/tests/test_store.py
@@ -15,25 +15,24 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import os
+from pathlib import Path
 
 import pytest
 from datafusion import SessionContext
 
 
 @pytest.fixture
 def ctx():
-    ctx = SessionContext()
-    return ctx
+    return SessionContext()
 
 
 def test_read_parquet(ctx):
     ctx.register_parquet(
         "test",
-        f"file://{os.getcwd()}/parquet/data/alltypes_plain.parquet",
-        [],
-        True,
-        ".parquet",
+        f"file://{Path.cwd()}/parquet/data/alltypes_plain.parquet",
+        table_partition_cols=[],
+        parquet_pruning=True,
+        file_extension=".parquet",
     )
     df = ctx.sql("SELECT * FROM test")
     assert isinstance(df.collect(), list)
diff --git a/python/tests/test_substrait.py b/python/tests/test_substrait.py
@@ -50,7 +50,7 @@ def test_substrait_serialization(ctx):
     substrait_plan = ss.Producer.to_substrait_plan(df.logical_plan(), ctx)
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_substrait_file_serialization(ctx, tmp_path, path_to_str):
     batch = pa.RecordBatch.from_arrays(
         [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py
@@ -17,8 +17,6 @@
 
 from __future__ import annotations
 
-from typing import List
-
 import pyarrow as pa
 import pyarrow.compute as pc
 import pytest
@@ -31,15 +29,15 @@ class Summarize(Accumulator):
     def __init__(self, initial_value: float = 0.0):
         self._sum = pa.scalar(initial_value)
 
-    def state(self) -> List[pa.Scalar]:
+    def state(self) -> list[pa.Scalar]:
         return [self._sum]
 
     def update(self, values: pa.Array) -> None:
         # Not nice since pyarrow scalars can't be summed yet.
         # This breaks on `None`
         self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
 
-    def merge(self, states: List[pa.Array]) -> None:
+    def merge(self, states: list[pa.Array]) -> None:
         # Not nice since pyarrow scalars can't be summed yet.
         # This breaks on `None`
         self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py())
@@ -56,7 +54,7 @@ class MissingMethods(Accumulator):
     def __init__(self):
         self._sum = pa.scalar(0)
 
-    def state(self) -> List[pa.Scalar]:
+    def state(self) -> list[pa.Scalar]:
         return [self._sum]