more ruff formatting suggestions

timsaucer · timsaucer · commit 4e2601859e26 · 2025-03-09T20:07:01.000-04:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -71,6 +71,7 @@ ignore = [
     "ANN401",  # Allow Any for wrapper classes
     "COM812",  # Recommended to ignore these rules when using with ruff-format
     "ISC001",  # Recommended to ignore these rules when using with ruff-format
+    "SLF001",  # Allow accessing private members
     "TD002",
     "UP007"    # Disallowing Union is pedantic
 ]
diff --git a/python/datafusion/io.py b/python/datafusion/io.py
@@ -19,23 +19,28 @@
 
 from __future__ import annotations
 
-import pathlib
-
-import pyarrow
+from typing import TYPE_CHECKING
 
 from datafusion.dataframe import DataFrame
-from datafusion.expr import Expr
 
 from ._internal import SessionContext as SessionContextInternal
 
+if TYPE_CHECKING:
+    import pathlib
+
+    import pyarrow as pa
+
+    from datafusion.expr import Expr
+
 
-def read_parquet(
+def read_parquet(  # noqa: PLR0913
     path: str | pathlib.Path,
+    *,
     table_partition_cols: list[tuple[str, str]] | None = None,
     parquet_pruning: bool = True,
     file_extension: str = ".parquet",
     skip_metadata: bool = True,
-    schema: pyarrow.Schema | None = None,
+    schema: pa.Schema | None = None,
     file_sort_order: list[list[Expr]] | None = None,
 ) -> DataFrame:
     """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`.
@@ -77,9 +82,10 @@ def read_parquet(
     )
 
 
-def read_json(
+def read_json(  # noqa: PLR0913
     path: str | pathlib.Path,
-    schema: pyarrow.Schema | None = None,
+    *,
+    schema: pa.Schema | None = None,
     schema_infer_max_records: int = 1000,
     file_extension: str = ".json",
     table_partition_cols: list[tuple[str, str]] | None = None,
@@ -118,9 +124,10 @@ def read_json(
     )
 
 
-def read_csv(
+def read_csv(  # noqa: PLR0913
     path: str | pathlib.Path | list[str] | list[pathlib.Path],
-    schema: pyarrow.Schema | None = None,
+    *,
+    schema: pa.Schema | None = None,
     has_header: bool = True,
     delimiter: str = ",",
     schema_infer_max_records: int = 1000,
@@ -173,7 +180,7 @@ def read_csv(
 
 def read_avro(
     path: str | pathlib.Path,
-    schema: pyarrow.Schema | None = None,
+    schema: pa.Schema | None = None,
     file_partition_cols: list[tuple[str, str]] | None = None,
     file_extension: str = ".avro",
 ) -> DataFrame:
diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py
@@ -19,7 +19,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, List
+from typing import TYPE_CHECKING, Any
 
 import datafusion._internal as df_internal
 
@@ -54,7 +54,7 @@ def to_variant(self) -> Any:
         """Convert the logical plan into its specific variant."""
         return self._raw_plan.to_variant()
 
-    def inputs(self) -> List[LogicalPlan]:
+    def inputs(self) -> list[LogicalPlan]:
         """Returns the list of inputs to the logical plan."""
         return [LogicalPlan(p) for p in self._raw_plan.inputs()]
 
@@ -106,7 +106,7 @@ def __init__(self, plan: df_internal.ExecutionPlan) -> None:
         """This constructor should not be called by the end user."""
         self._raw_plan = plan
 
-    def children(self) -> List[ExecutionPlan]:
+    def children(self) -> list[ExecutionPlan]:
         """Get a list of children `ExecutionPlan` that act as inputs to this plan.
 
         The returned list will be empty for leaf nodes such as scans, will contain a
diff --git a/python/datafusion/record_batch.py b/python/datafusion/record_batch.py
@@ -26,14 +26,14 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    import pyarrow
+    import pyarrow as pa
     import typing_extensions
 
     import datafusion._internal as df_internal
 
 
 class RecordBatch:
-    """This class is essentially a wrapper for :py:class:`pyarrow.RecordBatch`."""
+    """This class is essentially a wrapper for :py:class:`pa.RecordBatch`."""
 
     def __init__(self, record_batch: df_internal.RecordBatch) -> None:
         """This constructor is generally not called by the end user.
@@ -42,8 +42,8 @@ def __init__(self, record_batch: df_internal.RecordBatch) -> None:
         """
         self.record_batch = record_batch
 
-    def to_pyarrow(self) -> pyarrow.RecordBatch:
-        """Convert to :py:class:`pyarrow.RecordBatch`."""
+    def to_pyarrow(self) -> pa.RecordBatch:
+        """Convert to :py:class:`pa.RecordBatch`."""
         return self.record_batch.to_pyarrow()
 
 
diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py
@@ -23,7 +23,6 @@
 
 from __future__ import annotations
 
-import pathlib
 from typing import TYPE_CHECKING
 
 try:
@@ -36,6 +35,8 @@
 from ._internal import substrait as substrait_internal
 
 if TYPE_CHECKING:
+    import pathlib
+
     from datafusion.context import SessionContext
 
 __all__ = [
@@ -68,7 +69,7 @@ def encode(self) -> bytes:
 
 
 @deprecated("Use `Plan` instead.")
-class plan(Plan):
+class plan(Plan):  # noqa: N801
     """See `Plan`."""
 
 
@@ -138,7 +139,7 @@ def deserialize_bytes(proto_bytes: bytes) -> Plan:
 
 
 @deprecated("Use `Serde` instead.")
-class serde(Serde):
+class serde(Serde):  # noqa: N801
     """See `Serde` instead."""
 
 
@@ -164,7 +165,7 @@ def to_substrait_plan(logical_plan: LogicalPlan, ctx: SessionContext) -> Plan:
 
 
 @deprecated("Use `Producer` instead.")
-class producer(Producer):
+class producer(Producer):  # noqa: N801
     """Use `Producer` instead."""
 
 
@@ -188,5 +189,5 @@ def from_substrait_plan(ctx: SessionContext, plan: Plan) -> LogicalPlan:
 
 
 @deprecated("Use `Consumer` instead.")
-class consumer(Consumer):
+class consumer(Consumer):  # noqa: N801
     """Use `Consumer` instead."""
diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py
@@ -575,10 +575,12 @@ def evaluate_all_with_rank(  # noqa: B027
         The user must implement this method if ``include_rank`` returns True.
         """
 
+    @abstractmethod
     def supports_bounded_execution(self) -> bool:
         """Can the window function be incrementally computed using bounded memory?"""
         return False
 
+    @abstractmethod
     def uses_window_frame(self) -> bool:
         """Does the window function use the values from the window frame?"""
         return False

Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,7 @@ ignore = [`
`71`	`71`	`"ANN401", # Allow Any for wrapper classes`
`72`	`72`	`"COM812", # Recommended to ignore these rules when using with ruff-format`
`73`	`73`	`"ISC001", # Recommended to ignore these rules when using with ruff-format`
	`74`	`+ "SLF001", # Allow accessing private members`
`74`	`75`	`"TD002",`
`75`	`76`	`"UP007" # Disallowing Union is pedantic`
`76`	`77`	`]`