Skip to content

Commit 534bd76

Browse files
committed
Ruff linting - fixed all remaining issues
1 parent dc95538 commit 534bd76

27 files changed

+78
-156
lines changed

duckdb/experimental/spark/exception.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# ruff: noqa: D100
12
from typing import Optional
23

34

duckdb/experimental/spark/sql/dataframe.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from .group import GroupedData
3030
from .session import SparkSession
3131

32-
from .functions import _to_column_expr, col, lit
32+
from duckdb.experimental.spark.sql import functions as spark_sql_functions
3333

3434

3535
class DataFrame: # noqa: D101
@@ -438,7 +438,7 @@ def sort(self, *cols: Union[str, Column, list[Union[str, Column]]], **kwargs: An
438438
for c in cols:
439439
_c = c
440440
if isinstance(c, str):
441-
_c = col(c)
441+
_c = spark_sql_functions.col(c)
442442
elif isinstance(c, int) and not isinstance(c, bool):
443443
# ordinal is 1-based
444444
if c > 0:
@@ -466,7 +466,7 @@ def sort(self, *cols: Union[str, Column, list[Union[str, Column]]], **kwargs: An
466466
message_parameters={"arg_name": "ascending", "arg_type": type(ascending).__name__},
467467
)
468468

469-
columns = [_to_column_expr(c) for c in columns]
469+
columns = [spark_sql_functions._to_column_expr(c) for c in columns]
470470
rel = self.relation.sort(*columns)
471471
return DataFrame(rel, self.session)
472472

@@ -678,7 +678,7 @@ def join(
678678
if on is not None and not all(isinstance(x, str) for x in on):
679679
assert isinstance(on, list)
680680
# Get (or create) the Expressions from the list of Columns
681-
on = [_to_column_expr(x) for x in on]
681+
on = [spark_sql_functions._to_column_expr(x) for x in on]
682682

683683
# & all the Expressions together to form one Expression
684684
assert isinstance(on[0], Expression), "on should be Column or list of Column"
@@ -882,7 +882,7 @@ def __getitem__(self, item: Union[int, str, Column, list, tuple]) -> Union[Colum
882882
elif isinstance(item, (list, tuple)):
883883
return self.select(*item)
884884
elif isinstance(item, int):
885-
return col(self._schema[item].name)
885+
return spark_sql_functions.col(self._schema[item].name)
886886
else:
887887
msg = f"Unexpected item type: {type(item)}"
888888
raise TypeError(msg)
@@ -904,7 +904,7 @@ def __getattr__(self, name: str) -> Column:
904904
def groupBy(self, *cols: "ColumnOrName") -> "GroupedData": ...
905905

906906
@overload
907-
def groupBy(self, __cols: Union[list[Column], list[str]]) -> "GroupedData": ...
907+
def groupBy(self, __cols: Union[list[Column], list[str]]) -> "GroupedData": ... # noqa: PYI063
908908

909909
def groupBy(self, *cols: "ColumnOrName") -> "GroupedData": # type: ignore[misc]
910910
"""Groups the :class:`DataFrame` using the specified columns,
@@ -1094,7 +1094,7 @@ def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) ->
10941094
if col in other.relation.columns:
10951095
cols.append(col)
10961096
else:
1097-
cols.append(lit(None))
1097+
cols.append(spark_sql_functions.lit(None))
10981098
other = other.select(*cols)
10991099
else:
11001100
other = other.select(*self.relation.columns)

duckdb/experimental/spark/sql/functions.py

Lines changed: 8 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -220,57 +220,6 @@ def slice(x: "ColumnOrName", start: Union["ColumnOrName", int], length: Union["C
220220
return _invoke_function("list_slice", _to_column_expr(x), start, end)
221221

222222

223-
def asc(col: "ColumnOrName") -> Column:
224-
"""Returns a sort expression based on the ascending order of the given column name.
225-
226-
.. versionadded:: 1.3.0
227-
228-
.. versionchanged:: 3.4.0
229-
Supports Spark Connect.
230-
231-
Parameters
232-
----------
233-
col : :class:`~pyspark.sql.Column` or str
234-
target column to sort by in the ascending order.
235-
236-
Returns:
237-
-------
238-
:class:`~pyspark.sql.Column`
239-
the column specifying the order.
240-
241-
Examples:
242-
--------
243-
Sort by the column 'id' in the descending order.
244-
245-
>>> df = spark.range(5)
246-
>>> df = df.sort(desc("id"))
247-
>>> df.show()
248-
+---+
249-
| id|
250-
+---+
251-
| 4|
252-
| 3|
253-
| 2|
254-
| 1|
255-
| 0|
256-
+---+
257-
258-
Sort by the column 'id' in the ascending order.
259-
260-
>>> df.orderBy(asc("id")).show()
261-
+---+
262-
| id|
263-
+---+
264-
| 0|
265-
| 1|
266-
| 2|
267-
| 3|
268-
| 4|
269-
+---+
270-
"""
271-
return Column(_to_column_expr(col)).asc()
272-
273-
274223
def asc_nulls_first(col: "ColumnOrName") -> Column:
275224
"""Returns a sort expression based on the ascending order of the given
276225
column name, and null values return before non-null values.
@@ -341,42 +290,6 @@ def asc_nulls_last(col: "ColumnOrName") -> Column:
341290
return asc(col).nulls_last()
342291

343292

344-
def desc(col: "ColumnOrName") -> Column:
345-
"""Returns a sort expression based on the descending order of the given column name.
346-
347-
.. versionadded:: 1.3.0
348-
349-
.. versionchanged:: 3.4.0
350-
Supports Spark Connect.
351-
352-
Parameters
353-
----------
354-
col : :class:`~pyspark.sql.Column` or str
355-
target column to sort by in the descending order.
356-
357-
Returns:
358-
-------
359-
:class:`~pyspark.sql.Column`
360-
the column specifying the order.
361-
362-
Examples:
363-
--------
364-
Sort by the column 'id' in the descending order.
365-
366-
>>> spark.range(5).orderBy(desc("id")).show()
367-
+---+
368-
| id|
369-
+---+
370-
| 4|
371-
| 3|
372-
| 2|
373-
| 1|
374-
| 0|
375-
+---+
376-
"""
377-
return Column(_to_column_expr(col)).desc()
378-
379-
380293
def desc_nulls_first(col: "ColumnOrName") -> Column:
381294
"""Returns a sort expression based on the descending order of the given
382295
column name, and null values appear before non-null values.
@@ -4873,9 +4786,10 @@ def octet_length(col: "ColumnOrName") -> Column:
48734786

48744787

48754788
def hex(col: "ColumnOrName") -> Column:
4876-
"""Computes hex value of the given column, which could be :class:`~pyspark.sql.types.StringType`,
4877-
:class:`~pyspark.sql.types.BinaryType`, :class:`~pyspark.sql.types.IntegerType` or
4878-
:class:`~pyspark.sql.types.LongType`.
4789+
"""Computes hex value of the given column.
4790+
4791+
The column can be :class:`~pyspark.sql.types.StringType`, :class:`~pyspark.sql.types.BinaryType`,
4792+
:class:`~pyspark.sql.types.IntegerType` or :class:`~pyspark.sql.types.LongType`.
48794793
48804794
.. versionadded:: 1.5.0
48814795
@@ -4901,8 +4815,10 @@ def hex(col: "ColumnOrName") -> Column:
49014815

49024816

49034817
def unhex(col: "ColumnOrName") -> Column:
4904-
"""Inverse of hex. Interprets each pair of characters as a hexadecimal number and converts to the byte
4905-
representation of number. column and returns it as a binary column.
4818+
"""Inverse of hex.
4819+
4820+
Interprets each pair of characters as a hexadecimal number and converts to the byte representation of number column
4821+
and returns it as a binary column.
49064822
49074823
.. versionadded:: 1.5.0
49084824

duckdb/experimental/spark/sql/group.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ def sum(self, *cols: str) -> DataFrame:
317317
def agg(self, *exprs: Column) -> DataFrame: ...
318318

319319
@overload
320-
def agg(self, __exprs: dict[str, str]) -> DataFrame: ...
320+
def agg(self, __exprs: dict[str, str]) -> DataFrame: ... # noqa: PYI063
321321

322322
def agg(self, *exprs: Union[Column, dict[str, str]]) -> DataFrame:
323323
"""Compute aggregates and returns the result as a :class:`DataFrame`.

duckdb/experimental/spark/sql/readwriter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def load( # noqa: D102
100100
from duckdb.experimental.spark.sql.dataframe import DataFrame
101101

102102
if not isinstance(path, str):
103-
raise ImportError
103+
raise TypeError
104104
if options:
105105
raise ContributionsAcceptedError
106106

duckdb/experimental/spark/sql/session.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from collections.abc import Iterable, Sized
33
from typing import TYPE_CHECKING, Any, NoReturn, Optional, Union
44

5+
import duckdb
6+
57
if TYPE_CHECKING:
68
from pandas.core.frame import DataFrame as PandasDataFrame
79

@@ -31,7 +33,7 @@
3133

3234
# data is a List of rows
3335
# every value in each row needs to be turned into a Value
34-
def _combine_data_and_schema(data: Iterable[Any], schema: StructType) -> list["duckdb.Value"]:
36+
def _combine_data_and_schema(data: Iterable[Any], schema: StructType) -> list[duckdb.Value]:
3537
from duckdb import Value
3638

3739
new_data = []

duckdb/experimental/spark/sql/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1018,7 +1018,7 @@ def simpleString(self) -> str:
10181018
return "udt"
10191019

10201020
def __eq__(self, other: object) -> bool:
1021-
return type(self) == type(other)
1021+
return type(self) is type(other)
10221022

10231023

10241024
_atomic_types: list[type[DataType]] = [

duckdb/query_graph/__main__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ def __init__(self, phase: str, time: float) -> None: # noqa: D107
8585
def calculate_percentage(self, total_time: float) -> None: # noqa: D102
8686
self.percentage = self.time / total_time
8787

88-
def combine_timing(l: "NodeTiming", r: "NodeTiming") -> "NodeTiming": # noqa: D102
88+
def combine_timing(self, r: "NodeTiming") -> "NodeTiming": # noqa: D102
8989
# TODO: can only add timings for same-phase nodes # noqa: TD002, TD003
90-
total_time = l.time + r.time
91-
return NodeTiming(l.phase, total_time)
90+
total_time = self.time + r.time
91+
return NodeTiming(self.phase, total_time)
9292

9393

9494
class AllTimings: # noqa: D101
@@ -257,7 +257,7 @@ def generate_tree_html(graph_json: object) -> str: # noqa: D103
257257
def generate_ipython(json_input: str) -> str: # noqa: D103
258258
from IPython.core.display import HTML
259259

260-
html_output = generate_html(json_input, False)
260+
html_output = generate_html(json_input, False) # noqa: F821
261261

262262
return HTML(
263263
('\n ${CSS}\n ${LIBRARIES}\n <div class="chart" id="query-profile"></div>\n ${CHART_SCRIPT}\n ')

duckdb/udf.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1+
# ruff: noqa: D100
12
from typing import Callable
23

34

45
def vectorized(func: Callable) -> Callable:
5-
"""Decorate a function with annotated function parameters, so DuckDB can infer that the function should be
6-
provided with pyarrow arrays and should expect pyarrow array(s) as output.
6+
"""Decorate a function with annotated function parameters.
7+
8+
This allows DuckDB to infer that the function should be provided with pyarrow arrays and should expect
9+
pyarrow array(s) as output.
710
"""
811
import types
912
from inspect import signature

duckdb_packaging/_versioning.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def strip_post_from_version(version: str) -> str:
149149

150150
def get_git_describe(
151151
repo_path: Optional[pathlib.Path] = None,
152-
since_major: bool = False,
152+
since_major: bool = False, # noqa: FBT001
153153
since_minor: bool = False, # noqa: FBT001
154154
) -> Optional[str]:
155155
"""Get git describe output for version determination.

0 commit comments

Comments
 (0)