evertlammerts
diff --git a/‎duckdb/experimental/spark/context.py‎
Lines changed: 27 additions & 11 deletions b/‎duckdb/experimental/spark/context.py‎
Lines changed: 27 additions & 11 deletions
diff --git a/‎duckdb/experimental/spark/errors/error_classes.py‎
Lines changed: 1 addition & 1 deletion b/‎duckdb/experimental/spark/errors/error_classes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎duckdb/experimental/spark/sql/functions.py‎
Lines changed: 5 additions & 2 deletions b/‎duckdb/experimental/spark/sql/functions.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎duckdb/experimental/spark/sql/session.py‎
Lines changed: 2 additions & 1 deletion b/‎duckdb/experimental/spark/sql/session.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎duckdb/experimental/spark/sql/types.py‎
Lines changed: 4 additions & 3 deletions b/‎duckdb/experimental/spark/sql/types.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎duckdb/query_graph/__main__.py‎
Lines changed: 2 additions & 1 deletion b/‎duckdb/query_graph/__main__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎duckdb/udf.py‎
Lines changed: 3 additions & 1 deletion b/‎duckdb/udf.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tests/extensions/test_httpfs.py‎
Lines changed: 5 additions & 3 deletions b/‎tests/extensions/test_httpfs.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎tests/fast/api/test_duckdb_connection.py‎
Lines changed: 2 additions & 1 deletion b/‎tests/fast/api/test_duckdb_connection.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/fast/api/test_duckdb_query.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/fast/api/test_duckdb_query.py‎
Lines changed: 2 additions & 2 deletions
@@ -58,7 +58,8 @@ def version(self) -> str:  # noqa: D102
     def __repr__(self) -> str:  # noqa: D105
         raise ContributionsAcceptedError
 
-    # def accumulator(self, value: ~T, accum_param: Optional[ForwardRef('AccumulatorParam[T]')] = None) -> 'Accumulator[T]':
+    # def accumulator(self, value: ~T, accum_param: Optional[ForwardRef('AccumulatorParam[T]')] = None
+    #     ) -> 'Accumulator[T]':
     # 	pass
 
     def addArchive(self, path: str) -> None:  # noqa: D102
@@ -70,7 +71,8 @@ def addFile(self, path: str, recursive: bool = False) -> None:  # noqa: D102
     def addPyFile(self, path: str) -> None:  # noqa: D102
         raise ContributionsAcceptedError
 
-    # def binaryFiles(self, path: str, minPartitions: Optional[int] = None) -> duckdb.experimental.spark.rdd.RDD[typing.Tuple[str, bytes]]:
+    # def binaryFiles(self, path: str, minPartitions: Optional[int] = None
+    #     ) -> duckdb.experimental.spark.rdd.RDD[typing.Tuple[str, bytes]]:
     # 	pass
 
     # def binaryRecords(self, path: str, recordLength: int) -> duckdb.experimental.spark.rdd.RDD[bytes]:
@@ -100,16 +102,24 @@ def getConf(self) -> SparkConf:  # noqa: D102
     def getLocalProperty(self, key: str) -> Optional[str]:  # noqa: D102
         raise ContributionsAcceptedError
 
-    # def hadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
+    # def hadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str,
+    #     keyConverter: Optional[str] = None, valueConverter: Optional[str] = None,
+    #     conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
     # 	pass
 
-    # def hadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
+    # def hadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None,
+    #     valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0
+    #     ) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
     # 	pass
 
-    # def newAPIHadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
+    # def newAPIHadoopFile(self, path: str, inputFormatClass: str, keyClass: str, valueClass: str,
+    #     keyConverter: Optional[str] = None, valueConverter: Optional[str] = None,
+    #     conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
     # 	pass
 
-    # def newAPIHadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
+    # def newAPIHadoopRDD(self, inputFormatClass: str, keyClass: str, valueClass: str,
+    #     keyConverter: Optional[str] = None, valueConverter: Optional[str] = None,
+    #     conf: Optional[Dict[str, str]] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
     # 	pass
 
     # def parallelize(self, c: Iterable[~T], numSlices: Optional[int] = None) -> pyspark.rdd.RDD[~T]:
@@ -118,13 +128,17 @@ def getLocalProperty(self, key: str) -> Optional[str]:  # noqa: D102
     # def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> pyspark.rdd.RDD[typing.Any]:
     # 	pass
 
-    # def range(self, start: int, end: Optional[int] = None, step: int = 1, numSlices: Optional[int] = None) -> pyspark.rdd.RDD[int]:
+    # def range(self, start: int, end: Optional[int] = None, step: int = 1, numSlices: Optional[int] = None
+    #     ) -> pyspark.rdd.RDD[int]:
     # 	pass
 
-    # def runJob(self, rdd: pyspark.rdd.RDD[~T], partitionFunc: Callable[[Iterable[~T]], Iterable[~U]], partitions: Optional[Sequence[int]] = None, allowLocal: bool = False) -> List[~U]:
+    # def runJob(self, rdd: pyspark.rdd.RDD[~T], partitionFunc: Callable[[Iterable[~T]], Iterable[~U]],
+    #     partitions: Optional[Sequence[int]] = None, allowLocal: bool = False) -> List[~U]:
     # 	pass
 
-    # def sequenceFile(self, path: str, keyClass: Optional[str] = None, valueClass: Optional[str] = None, keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, minSplits: Optional[int] = None, batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
+    # def sequenceFile(self, path: str, keyClass: Optional[str] = None, valueClass: Optional[str] = None,
+    #     keyConverter: Optional[str] = None, valueConverter: Optional[str] = None, minSplits: Optional[int] = None,
+    #     batchSize: int = 0) -> pyspark.rdd.RDD[typing.Tuple[~T, ~U]]:
     # 	pass
 
     def setCheckpointDir(self, dirName: str) -> None:  # noqa: D102
@@ -151,13 +165,15 @@ def sparkUser(self) -> str:  # noqa: D102
     # def statusTracker(self) -> duckdb.experimental.spark.status.StatusTracker:
     # 	raise ContributionsAcceptedError
 
-    # def textFile(self, name: str, minPartitions: Optional[int] = None, use_unicode: bool = True) -> pyspark.rdd.RDD[str]:
+    # def textFile(self, name: str, minPartitions: Optional[int] = None, use_unicode: bool = True
+    #     ) -> pyspark.rdd.RDD[str]:
     # 	pass
 
     # def union(self, rdds: List[pyspark.rdd.RDD[~T]]) -> pyspark.rdd.RDD[~T]:
     # 	pass
 
-    # def wholeTextFiles(self, path: str, minPartitions: Optional[int] = None, use_unicode: bool = True) -> pyspark.rdd.RDD[typing.Tuple[str, str]]:
+    # def wholeTextFiles(self, path: str, minPartitions: Optional[int] = None, use_unicode: bool = True
+    #     ) -> pyspark.rdd.RDD[typing.Tuple[str, str]]:
     # 	pass
 
 
 
@@ -1,4 +1,4 @@
-#  # noqa: D100
+# ruff: noqa: D100, E501
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 
@@ -4879,7 +4879,9 @@ def octet_length(col: "ColumnOrName") -> Column:
 
 
 def hex(col: "ColumnOrName") -> Column:
-    """Computes hex value of the given column, which could be :class:`~pyspark.sql.types.StringType`, :class:`~pyspark.sql.types.BinaryType`, :class:`~pyspark.sql.types.IntegerType` or :class:`~pyspark.sql.types.LongType`.
+    """Computes hex value of the given column, which could be :class:`~pyspark.sql.types.StringType`,
+    :class:`~pyspark.sql.types.BinaryType`, :class:`~pyspark.sql.types.IntegerType` or
+    :class:`~pyspark.sql.types.LongType`.
 
     .. versionadded:: 1.5.0
 
@@ -4905,7 +4907,8 @@ def hex(col: "ColumnOrName") -> Column:
 
 
 def unhex(col: "ColumnOrName") -> Column:
-    """Inverse of hex. Interprets each pair of characters as a hexadecimal number and converts to the byte representation of number. column and returns it as a binary column.
+    """Inverse of hex. Interprets each pair of characters as a hexadecimal number and converts to the byte
+    representation of number. column and returns it as a binary column.
 
     .. versionadded:: 1.5.0
 
 
@@ -22,7 +22,8 @@
 # In spark:
 # SparkSession holds a SparkContext
 # SparkContext gets created from SparkConf
-# At this level the check is made to determine whether the instance already exists and just needs to be retrieved or it needs to be created
+# At this level the check is made to determine whether the instance already exists and just needs
+# to be retrieved or it needs to be created.
 
 # For us this is done inside of `duckdb.connect`, based on the passed in path + configuration
 # SparkContext can be compared to our Connection class, and SparkConf to our ClientContext class
 
@@ -1,4 +1,5 @@
-# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'spark' folder.  # noqa: D100
+# This code is based on code from Apache Spark under the license found in the LICENSE  # noqa: D100
+# file located in the 'spark' folder.
 
 import calendar
 import datetime
@@ -456,8 +457,8 @@ def simpleString(self) -> str:  # noqa: D102
 class HugeIntegerType(IntegralType):
     """Huge integer data type, i.e. a signed 128-bit integer.
 
-    If the values are beyond the range of [-170141183460469231731687303715884105728, 170141183460469231731687303715884105727],
-    please use :class:`DecimalType`.
+    If the values are beyond the range of [-170141183460469231731687303715884105728,
+    170141183460469231731687303715884105727], please use :class:`DecimalType`.
     """
 
     def __init__(self) -> None:  # noqa: D107
 
@@ -327,7 +327,8 @@ def main() -> None:  # noqa: D103
         exit(1)
     parser = argparse.ArgumentParser(
         prog="Query Graph Generator",
-        description="Given a json profile output, generate a html file showing the query graph and timings of operators",
+        description="""Given a json profile output, generate a html file showing the query graph and
+        timings of operators""",
     )
     parser.add_argument("profile_input", help="profile input in json")
     parser.add_argument("--out", required=False, default=False)
 
@@ -2,7 +2,9 @@
 
 
 def vectorized(func: Callable) -> Callable:
-    """Decorate a function with annotated function parameters, so DuckDB can infer that the function should be provided with pyarrow arrays and should expect pyarrow array(s) as output."""
+    """Decorate a function with annotated function parameters, so DuckDB can infer that the function should be
+    provided with pyarrow arrays and should expect pyarrow array(s) as output.
+    """
     import types
     from inspect import signature
 
 
@@ -39,9 +39,11 @@ def test_s3fs(self, require):
     def test_httpfs(self, require, pandas):
         connection = require("httpfs")
         try:
-            connection.execute(
-                "SELECT id, first_name, last_name FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') LIMIT 3;"
-            )
+            connection.execute("""
+                SELECT id, first_name, last_name FROM PARQUET_SCAN(
+                    'https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet'
+                ) LIMIT 3;
+            """)
         except RuntimeError as e:
             # Test will ignore result if it fails due to networking issues while running the test.
             if str(e).startswith("HTTP HEAD error") or str(e).startswith("Unable to connect"):
 
@@ -122,7 +122,8 @@ def test_execute(self):
 
     def test_executemany(self):
         # executemany does not keep an open result set
-        # TODO: shouldn't we also have a version that executes a query multiple times with different parameters, returning all of the results?
+        # TODO: shouldn't we also have a version that executes a query multiple times with different parameters,
+        #  returning all of the results?
         duckdb.execute("create table tbl (i integer, j varchar)")
         duckdb.executemany("insert into tbl VALUES (?, ?)", [(5, "test"), (2, "duck"), (42, "quack")])
         res = duckdb.table("tbl").fetchall()
 
@@ -111,7 +111,7 @@ def test_named_param_not_exhaustive(self):
 
         with pytest.raises(
             duckdb.InvalidInputException,
-            match="Invalid Input Error: Values were not provided for the following prepared statement parameters: name3",
+            match="Invalid Input Error: Values were not provided for the following prepared statement parameters: name3",  # noqa: E501
         ):
             con.execute("select $name1, $name2, $name3", {"name1": 5, "name2": 3})
 
@@ -184,7 +184,7 @@ def test_conversion_from_tuple(self):
         # we throw an error
         with pytest.raises(
             duckdb.InvalidInputException,
-            match="Tried to create a STRUCT value from a tuple containing 3 elements, but the STRUCT consists of 2 children",
+            match="Tried to create a STRUCT value from a tuple containing 3 elements, but the STRUCT consists of 2 children",  # noqa: E501
         ):
             result = con.execute("select $1", [Value(("a", 21, True), {"a": str, "b": int})]).fetchall()
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# # noqa: D100`
	`1`	`+# ruff: noqa: D100, E501`
`2`	`2`	`# Licensed to the Apache Software Foundation (ASF) under one or more`
`3`	`3`	`# contributor license agreements. See the NOTICE file distributed with`
`4`	`4`	`# this work for additional information regarding copyright ownership.`