evertlammerts
diff --git a/‎duckdb/experimental/spark/exception.py‎
Lines changed: 1 addition & 0 deletions b/‎duckdb/experimental/spark/exception.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎duckdb/experimental/spark/sql/dataframe.py‎
Lines changed: 7 additions & 7 deletions b/‎duckdb/experimental/spark/sql/dataframe.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎duckdb/experimental/spark/sql/functions.py‎
Lines changed: 8 additions & 92 deletions b/‎duckdb/experimental/spark/sql/functions.py‎
Lines changed: 8 additions & 92 deletions
diff --git a/‎duckdb/experimental/spark/sql/group.py‎
Lines changed: 1 addition & 1 deletion b/‎duckdb/experimental/spark/sql/group.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎duckdb/experimental/spark/sql/readwriter.py‎
Lines changed: 1 addition & 1 deletion b/‎duckdb/experimental/spark/sql/readwriter.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎duckdb/experimental/spark/sql/session.py‎
Lines changed: 3 additions & 1 deletion b/‎duckdb/experimental/spark/sql/session.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎duckdb/experimental/spark/sql/types.py‎
Lines changed: 1 addition & 1 deletion b/‎duckdb/experimental/spark/sql/types.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎duckdb/query_graph/__main__.py‎
Lines changed: 4 additions & 4 deletions b/‎duckdb/query_graph/__main__.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎duckdb/udf.py‎
Lines changed: 5 additions & 2 deletions b/‎duckdb/udf.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎duckdb_packaging/_versioning.py‎
Lines changed: 1 addition & 1 deletion b/‎duckdb_packaging/_versioning.py‎
Lines changed: 1 addition & 1 deletion
@@ -1,3 +1,4 @@
+# ruff: noqa: D100
 from typing import Optional
 
 
 
@@ -29,7 +29,7 @@
     from .group import GroupedData
     from .session import SparkSession
 
-from .functions import _to_column_expr, col, lit
+from duckdb.experimental.spark.sql import functions as spark_sql_functions
 
 
 class DataFrame:  # noqa: D101
@@ -438,7 +438,7 @@ def sort(self, *cols: Union[str, Column, list[Union[str, Column]]], **kwargs: An
         for c in cols:
             _c = c
             if isinstance(c, str):
-                _c = col(c)
+                _c = spark_sql_functions.col(c)
             elif isinstance(c, int) and not isinstance(c, bool):
                 # ordinal is 1-based
                 if c > 0:
@@ -466,7 +466,7 @@ def sort(self, *cols: Union[str, Column, list[Union[str, Column]]], **kwargs: An
                 message_parameters={"arg_name": "ascending", "arg_type": type(ascending).__name__},
             )
 
-        columns = [_to_column_expr(c) for c in columns]
+        columns = [spark_sql_functions._to_column_expr(c) for c in columns]
         rel = self.relation.sort(*columns)
         return DataFrame(rel, self.session)
 
@@ -678,7 +678,7 @@ def join(
         if on is not None and not all(isinstance(x, str) for x in on):
             assert isinstance(on, list)
             # Get (or create) the Expressions from the list of Columns
-            on = [_to_column_expr(x) for x in on]
+            on = [spark_sql_functions._to_column_expr(x) for x in on]
 
             # & all the Expressions together to form one Expression
             assert isinstance(on[0], Expression), "on should be Column or list of Column"
@@ -882,7 +882,7 @@ def __getitem__(self, item: Union[int, str, Column, list, tuple]) -> Union[Colum
         elif isinstance(item, (list, tuple)):
             return self.select(*item)
         elif isinstance(item, int):
-            return col(self._schema[item].name)
+            return spark_sql_functions.col(self._schema[item].name)
         else:
             msg = f"Unexpected item type: {type(item)}"
             raise TypeError(msg)
@@ -904,7 +904,7 @@ def __getattr__(self, name: str) -> Column:
     def groupBy(self, *cols: "ColumnOrName") -> "GroupedData": ...
 
     @overload
-    def groupBy(self, __cols: Union[list[Column], list[str]]) -> "GroupedData": ...
+    def groupBy(self, __cols: Union[list[Column], list[str]]) -> "GroupedData": ...  # noqa: PYI063
 
     def groupBy(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
         """Groups the :class:`DataFrame` using the specified columns,
@@ -1094,7 +1094,7 @@ def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) ->
                 if col in other.relation.columns:
                     cols.append(col)
                 else:
-                    cols.append(lit(None))
+                    cols.append(spark_sql_functions.lit(None))
             other = other.select(*cols)
         else:
             other = other.select(*self.relation.columns)
 
@@ -220,57 +220,6 @@ def slice(x: "ColumnOrName", start: Union["ColumnOrName", int], length: Union["C
     return _invoke_function("list_slice", _to_column_expr(x), start, end)
 
 
-def asc(col: "ColumnOrName") -> Column:
-    """Returns a sort expression based on the ascending order of the given column name.
-
-    .. versionadded:: 1.3.0
-
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
-
-    Parameters
-    ----------
-    col : :class:`~pyspark.sql.Column` or str
-        target column to sort by in the ascending order.
-
-    Returns:
-    -------
-    :class:`~pyspark.sql.Column`
-        the column specifying the order.
-
-    Examples:
-    --------
-    Sort by the column 'id' in the descending order.
-
-    >>> df = spark.range(5)
-    >>> df = df.sort(desc("id"))
-    >>> df.show()
-    +---+
-    | id|
-    +---+
-    |  4|
-    |  3|
-    |  2|
-    |  1|
-    |  0|
-    +---+
-
-    Sort by the column 'id' in the ascending order.
-
-    >>> df.orderBy(asc("id")).show()
-    +---+
-    | id|
-    +---+
-    |  0|
-    |  1|
-    |  2|
-    |  3|
-    |  4|
-    +---+
-    """
-    return Column(_to_column_expr(col)).asc()
-
-
 def asc_nulls_first(col: "ColumnOrName") -> Column:
     """Returns a sort expression based on the ascending order of the given
     column name, and null values return before non-null values.
@@ -341,42 +290,6 @@ def asc_nulls_last(col: "ColumnOrName") -> Column:
     return asc(col).nulls_last()
 
 
-def desc(col: "ColumnOrName") -> Column:
-    """Returns a sort expression based on the descending order of the given column name.
-
-    .. versionadded:: 1.3.0
-
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
-
-    Parameters
-    ----------
-    col : :class:`~pyspark.sql.Column` or str
-        target column to sort by in the descending order.
-
-    Returns:
-    -------
-    :class:`~pyspark.sql.Column`
-        the column specifying the order.
-
-    Examples:
-    --------
-    Sort by the column 'id' in the descending order.
-
-    >>> spark.range(5).orderBy(desc("id")).show()
-    +---+
-    | id|
-    +---+
-    |  4|
-    |  3|
-    |  2|
-    |  1|
-    |  0|
-    +---+
-    """
-    return Column(_to_column_expr(col)).desc()
-
-
 def desc_nulls_first(col: "ColumnOrName") -> Column:
     """Returns a sort expression based on the descending order of the given
     column name, and null values appear before non-null values.
@@ -4873,9 +4786,10 @@ def octet_length(col: "ColumnOrName") -> Column:
 
 
 def hex(col: "ColumnOrName") -> Column:
-    """Computes hex value of the given column, which could be :class:`~pyspark.sql.types.StringType`,
-    :class:`~pyspark.sql.types.BinaryType`, :class:`~pyspark.sql.types.IntegerType` or
-    :class:`~pyspark.sql.types.LongType`.
+    """Computes hex value of the given column.
+
+    The column can be :class:`~pyspark.sql.types.StringType`, :class:`~pyspark.sql.types.BinaryType`,
+    :class:`~pyspark.sql.types.IntegerType` or :class:`~pyspark.sql.types.LongType`.
 
     .. versionadded:: 1.5.0
 
@@ -4901,8 +4815,10 @@ def hex(col: "ColumnOrName") -> Column:
 
 
 def unhex(col: "ColumnOrName") -> Column:
-    """Inverse of hex. Interprets each pair of characters as a hexadecimal number and converts to the byte
-    representation of number. column and returns it as a binary column.
+    """Inverse of hex.
+
+    Interprets each pair of characters as a hexadecimal number and converts to the byte representation of number column
+    and returns it as a binary column.
 
     .. versionadded:: 1.5.0
 
 
@@ -317,7 +317,7 @@ def sum(self, *cols: str) -> DataFrame:
     def agg(self, *exprs: Column) -> DataFrame: ...
 
     @overload
-    def agg(self, __exprs: dict[str, str]) -> DataFrame: ...
+    def agg(self, __exprs: dict[str, str]) -> DataFrame: ...  # noqa: PYI063
 
     def agg(self, *exprs: Union[Column, dict[str, str]]) -> DataFrame:
         """Compute aggregates and returns the result as a :class:`DataFrame`.
 
@@ -100,7 +100,7 @@ def load(  # noqa: D102
         from duckdb.experimental.spark.sql.dataframe import DataFrame
 
         if not isinstance(path, str):
-            raise ImportError
+            raise TypeError
         if options:
             raise ContributionsAcceptedError
 
 
@@ -2,6 +2,8 @@
 from collections.abc import Iterable, Sized
 from typing import TYPE_CHECKING, Any, NoReturn, Optional, Union
 
+import duckdb
+
 if TYPE_CHECKING:
     from pandas.core.frame import DataFrame as PandasDataFrame
 
@@ -31,7 +33,7 @@
 
 # data is a List of rows
 # every value in each row needs to be turned into a Value
-def _combine_data_and_schema(data: Iterable[Any], schema: StructType) -> list["duckdb.Value"]:
+def _combine_data_and_schema(data: Iterable[Any], schema: StructType) -> list[duckdb.Value]:
     from duckdb import Value
 
     new_data = []
 
@@ -1018,7 +1018,7 @@ def simpleString(self) -> str:
         return "udt"
 
     def __eq__(self, other: object) -> bool:
-        return type(self) == type(other)
+        return type(self) is type(other)
 
 
 _atomic_types: list[type[DataType]] = [
 
@@ -85,10 +85,10 @@ def __init__(self, phase: str, time: float) -> None:  # noqa: D107
     def calculate_percentage(self, total_time: float) -> None:  # noqa: D102
         self.percentage = self.time / total_time
 
-    def combine_timing(l: "NodeTiming", r: "NodeTiming") -> "NodeTiming":  # noqa: D102
+    def combine_timing(self, r: "NodeTiming") -> "NodeTiming":  # noqa: D102
         # TODO: can only add timings for same-phase nodes  # noqa: TD002, TD003
-        total_time = l.time + r.time
-        return NodeTiming(l.phase, total_time)
+        total_time = self.time + r.time
+        return NodeTiming(self.phase, total_time)
 
 
 class AllTimings:  # noqa: D101
@@ -257,7 +257,7 @@ def generate_tree_html(graph_json: object) -> str:  # noqa: D103
 def generate_ipython(json_input: str) -> str:  # noqa: D103
     from IPython.core.display import HTML
 
-    html_output = generate_html(json_input, False)
+    html_output = generate_html(json_input, False)  # noqa: F821
 
     return HTML(
         ('\n	${CSS}\n	${LIBRARIES}\n	<div class="chart" id="query-profile"></div>\n	${CHART_SCRIPT}\n	')
 
@@ -1,9 +1,12 @@
+# ruff: noqa: D100
 from typing import Callable
 
 
 def vectorized(func: Callable) -> Callable:
-    """Decorate a function with annotated function parameters, so DuckDB can infer that the function should be
-    provided with pyarrow arrays and should expect pyarrow array(s) as output.
+    """Decorate a function with annotated function parameters.
+
+    This allows DuckDB to infer that the function should be provided with pyarrow arrays and should expect
+    pyarrow array(s) as output.
     """
     import types
     from inspect import signature
 
@@ -149,7 +149,7 @@ def strip_post_from_version(version: str) -> str:
 
 def get_git_describe(
     repo_path: Optional[pathlib.Path] = None,
-    since_major: bool = False,
+    since_major: bool = False,  # noqa: FBT001
     since_minor: bool = False,  # noqa: FBT001
 ) -> Optional[str]:
     """Get git describe output for version determination.
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# ruff: noqa: D100`
`1`	`2`	`from typing import Optional`
`2`	`3`
`3`	`4`