evertlammerts
diff --git a/‎duckdb/experimental/spark/_globals.py‎
Lines changed: 2 additions & 1 deletion b/‎duckdb/experimental/spark/_globals.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎duckdb/experimental/spark/errors/utils.py‎
Lines changed: 4 additions & 2 deletions b/‎duckdb/experimental/spark/errors/utils.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎duckdb/experimental/spark/sql/column.py‎
Lines changed: 4 additions & 2 deletions b/‎duckdb/experimental/spark/sql/column.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎duckdb/experimental/spark/sql/dataframe.py‎
Lines changed: 6 additions & 3 deletions b/‎duckdb/experimental/spark/sql/dataframe.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎duckdb/experimental/spark/sql/functions.py‎
Lines changed: 20 additions & 10 deletions b/‎duckdb/experimental/spark/sql/functions.py‎
Lines changed: 20 additions & 10 deletions
diff --git a/‎duckdb/experimental/spark/sql/readwriter.py‎
Lines changed: 52 additions & 26 deletions b/‎duckdb/experimental/spark/sql/readwriter.py‎
Lines changed: 52 additions & 26 deletions
diff --git a/‎duckdb/experimental/spark/sql/types.py‎
Lines changed: 24 additions & 12 deletions b/‎duckdb/experimental/spark/sql/types.py‎
Lines changed: 24 additions & 12 deletions
@@ -39,7 +39,8 @@ def foo(arg=pyducdkb.spark._NoValue):
 # Disallow reloading this module so as to preserve the identities of the
 # classes defined here.
 if "_is_loaded" in globals():
-    raise RuntimeError("Reloading duckdb.experimental.spark._globals is not allowed")
+    msg = "Reloading duckdb.experimental.spark._globals is not allowed"
+    raise RuntimeError(msg)
 _is_loaded = True
 
 
 
@@ -86,7 +86,8 @@ def get_message_template(self, error_class: str) -> str:
         if main_error_class in self.error_info_map:
             main_error_class_info_map = self.error_info_map[main_error_class]
         else:
-            raise ValueError(f"Cannot find main error class '{main_error_class}'")
+            msg = f"Cannot find main error class '{main_error_class}'"
+            raise ValueError(msg)
 
         main_message_template = "\n".join(main_error_class_info_map["message"])
 
@@ -101,7 +102,8 @@ def get_message_template(self, error_class: str) -> str:
             if sub_error_class in main_error_class_subclass_info_map:
                 sub_error_class_info_map = main_error_class_subclass_info_map[sub_error_class]
             else:
-                raise ValueError(f"Cannot find sub error class '{sub_error_class}'")
+                msg = f"Cannot find sub error class '{sub_error_class}'"
+                raise ValueError(msg)
 
             sub_message_template = "\n".join(sub_error_class_info_map["message"])
             message_template = main_message_template + " " + sub_message_template
 
@@ -201,15 +201,17 @@ def __getattr__(self, item: Any) -> "Column":
         +------+
         """
         if item.startswith("__"):
-            raise AttributeError("Can not access __ (dunder) method")
+            msg = "Can not access __ (dunder) method"
+            raise AttributeError(msg)
         return self[item]
 
     def alias(self, alias: str):
         return Column(self.expr.alias(alias))
 
     def when(self, condition: "Column", value: Any):
         if not isinstance(condition, Column):
-            raise TypeError("condition should be a Column")
+            msg = "condition should be a Column"
+            raise TypeError(msg)
         v = _get_expr(value)
         expr = self.expr.when(condition.expr, v)
         return Column(expr)
 
@@ -108,7 +108,8 @@ def createGlobalTempView(self, name: str) -> None:
 
     def withColumnRenamed(self, columnName: str, newName: str) -> "DataFrame":
         if columnName not in self.relation:
-            raise ValueError(f"DataFrame does not contain a column named {columnName}")
+            msg = f"DataFrame does not contain a column named {columnName}"
+            raise ValueError(msg)
         cols = []
         for x in self.relation.columns:
             col = ColumnExpression(x)
@@ -258,7 +259,8 @@ def withColumnsRenamed(self, colsMap: dict[str, str]) -> "DataFrame":
 
         unknown_columns = set(colsMap.keys()) - set(self.relation.columns)
         if unknown_columns:
-            raise ValueError(f"DataFrame does not contain column(s): {', '.join(unknown_columns)}")
+            msg = f"DataFrame does not contain column(s): {', '.join(unknown_columns)}"
+            raise ValueError(msg)
 
         # Compute this only once
         old_column_names = list(colsMap.keys())
@@ -887,7 +889,8 @@ def __getitem__(self, item: Union[int, str, Column, list, tuple]) -> Union[Colum
         elif isinstance(item, int):
             return col(self._schema[item].name)
         else:
-            raise TypeError(f"Unexpected item type: {type(item)}")
+            msg = f"Unexpected item type: {type(item)}"
+            raise TypeError(msg)
 
     def __getattr__(self, name: str) -> Column:
         """Returns the :class:`Column` denoted by ``name``.
 
@@ -92,7 +92,8 @@ def ucase(str: "ColumnOrName") -> Column:
 
 def when(condition: "Column", value: Any) -> Column:
     if not isinstance(condition, Column):
-        raise TypeError("condition should be a Column")
+        msg = "condition should be a Column"
+        raise TypeError(msg)
     v = _get_expr(value)
     expr = CaseExpression(condition.expr, v)
     return Column(expr)
@@ -1480,7 +1481,8 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C
     +---------------+
     """
     if rsd is not None:
-        raise ValueError("rsd is not supported by DuckDB")
+        msg = "rsd is not supported by DuckDB"
+        raise ValueError(msg)
     return _invoke_function_over_columns("approx_count_distinct", col)
 
 
@@ -2365,7 +2367,8 @@ def rand(seed: Optional[int] = None) -> Column:
     """
     if seed is not None:
         # Maybe call setseed just before but how do we know when it is executed?
-        raise ContributionsAcceptedError("Seed is not yet implemented")
+        msg = "Seed is not yet implemented"
+        raise ContributionsAcceptedError(msg)
     return _invoke_function("random")
 
 
@@ -2842,7 +2845,8 @@ def encode(col: "ColumnOrName", charset: str) -> Column:
     +----------------+
     """
     if charset != "UTF-8":
-        raise ContributionsAcceptedError("Only UTF-8 charset is supported right now")
+        msg = "Only UTF-8 charset is supported right now"
+        raise ContributionsAcceptedError(msg)
     return _invoke_function("encode", _to_column_expr(col))
 
 
@@ -3017,7 +3021,8 @@ def greatest(*cols: "ColumnOrName") -> Column:
     [Row(greatest=4)]
     """
     if len(cols) < 2:
-        raise ValueError("greatest should take at least 2 columns")
+        msg = "greatest should take at least 2 columns"
+        raise ValueError(msg)
 
     cols = [_to_column_expr(expr) for expr in cols]
     return _invoke_function("greatest", *cols)
@@ -3049,7 +3054,8 @@ def least(*cols: "ColumnOrName") -> Column:
     [Row(least=1)]
     """
     if len(cols) < 2:
-        raise ValueError("least should take at least 2 columns")
+        msg = "least should take at least 2 columns"
+        raise ValueError(msg)
 
     cols = [_to_column_expr(expr) for expr in cols]
     return _invoke_function("least", *cols)
@@ -3550,12 +3556,14 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
     +-----+----------------------------------------------------------------+
     """
     if numBits not in {224, 256, 384, 512, 0}:
-        raise ValueError("numBits should be one of {224, 256, 384, 512, 0}")
+        msg = "numBits should be one of {224, 256, 384, 512, 0}"
+        raise ValueError(msg)
 
     if numBits == 256:
         return _invoke_function_over_columns("sha256", col)
 
-    raise ContributionsAcceptedError("SHA-224, SHA-384, and SHA-512 are not supported yet.")
+    msg = "SHA-224, SHA-384, and SHA-512 are not supported yet."
+    raise ContributionsAcceptedError(msg)
 
 
 def curdate() -> Column:
@@ -5241,7 +5249,8 @@ def array_sort(col: "ColumnOrName", comparator: Optional[Callable[[Column, Colum
     [Row(r=['foobar', 'foo', None, 'bar']), Row(r=['foo']), Row(r=[])]
     """
     if comparator is not None:
-        raise ContributionsAcceptedError("comparator is not yet supported")
+        msg = "comparator is not yet supported"
+        raise ContributionsAcceptedError(msg)
     else:
         return _invoke_function_over_columns("list_sort", col, lit("ASC"), lit("NULLS LAST"))
 
@@ -5335,7 +5344,8 @@ def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
     if limit > 0:
         # Unclear how to implement this in DuckDB as we'd need to map back from the split array
         # to the original array which is tricky with regular expressions.
-        raise ContributionsAcceptedError("limit is not yet supported")
+        msg = "limit is not yet supported"
+        raise ContributionsAcceptedError(msg)
     return _invoke_function_over_columns("regexp_split_to_array", str, lit(pattern))
 
 
 
@@ -248,10 +248,12 @@ def csv(
     def parquet(self, *paths: str, **options: "OptionalPrimitiveType") -> "DataFrame":
         input = list(paths)
         if len(input) != 1:
-            raise NotImplementedError("Only single paths are supported for now")
+            msg = "Only single paths are supported for now"
+            raise NotImplementedError(msg)
         option_amount = len(options.keys())
         if option_amount != 0:
-            raise ContributionsAcceptedError("Options are not supported")
+            msg = "Options are not supported"
+            raise ContributionsAcceptedError(msg)
         path = input[0]
         rel = self.session.conn.read_parquet(path)
         from ..sql.dataframe import DataFrame
@@ -338,53 +340,77 @@ def json(
         +---+------------+
         """
         if schema is not None:
-            raise ContributionsAcceptedError("The 'schema' option is not supported")
+            msg = "The 'schema' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if primitivesAsString is not None:
-            raise ContributionsAcceptedError("The 'primitivesAsString' option is not supported")
+            msg = "The 'primitivesAsString' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if prefersDecimal is not None:
-            raise ContributionsAcceptedError("The 'prefersDecimal' option is not supported")
+            msg = "The 'prefersDecimal' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if allowComments is not None:
-            raise ContributionsAcceptedError("The 'allowComments' option is not supported")
+            msg = "The 'allowComments' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if allowUnquotedFieldNames is not None:
-            raise ContributionsAcceptedError("The 'allowUnquotedFieldNames' option is not supported")
+            msg = "The 'allowUnquotedFieldNames' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if allowSingleQuotes is not None:
-            raise ContributionsAcceptedError("The 'allowSingleQuotes' option is not supported")
+            msg = "The 'allowSingleQuotes' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if allowNumericLeadingZero is not None:
-            raise ContributionsAcceptedError("The 'allowNumericLeadingZero' option is not supported")
+            msg = "The 'allowNumericLeadingZero' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if allowBackslashEscapingAnyCharacter is not None:
-            raise ContributionsAcceptedError("The 'allowBackslashEscapingAnyCharacter' option is not supported")
+            msg = "The 'allowBackslashEscapingAnyCharacter' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if mode is not None:
-            raise ContributionsAcceptedError("The 'mode' option is not supported")
+            msg = "The 'mode' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if columnNameOfCorruptRecord is not None:
-            raise ContributionsAcceptedError("The 'columnNameOfCorruptRecord' option is not supported")
+            msg = "The 'columnNameOfCorruptRecord' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if dateFormat is not None:
-            raise ContributionsAcceptedError("The 'dateFormat' option is not supported")
+            msg = "The 'dateFormat' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if timestampFormat is not None:
-            raise ContributionsAcceptedError("The 'timestampFormat' option is not supported")
+            msg = "The 'timestampFormat' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if multiLine is not None:
-            raise ContributionsAcceptedError("The 'multiLine' option is not supported")
+            msg = "The 'multiLine' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if allowUnquotedControlChars is not None:
-            raise ContributionsAcceptedError("The 'allowUnquotedControlChars' option is not supported")
+            msg = "The 'allowUnquotedControlChars' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if lineSep is not None:
-            raise ContributionsAcceptedError("The 'lineSep' option is not supported")
+            msg = "The 'lineSep' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if samplingRatio is not None:
-            raise ContributionsAcceptedError("The 'samplingRatio' option is not supported")
+            msg = "The 'samplingRatio' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if dropFieldIfAllNull is not None:
-            raise ContributionsAcceptedError("The 'dropFieldIfAllNull' option is not supported")
+            msg = "The 'dropFieldIfAllNull' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if encoding is not None:
-            raise ContributionsAcceptedError("The 'encoding' option is not supported")
+            msg = "The 'encoding' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if locale is not None:
-            raise ContributionsAcceptedError("The 'locale' option is not supported")
+            msg = "The 'locale' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if pathGlobFilter is not None:
-            raise ContributionsAcceptedError("The 'pathGlobFilter' option is not supported")
+            msg = "The 'pathGlobFilter' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if recursiveFileLookup is not None:
-            raise ContributionsAcceptedError("The 'recursiveFileLookup' option is not supported")
+            msg = "The 'recursiveFileLookup' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if modifiedBefore is not None:
-            raise ContributionsAcceptedError("The 'modifiedBefore' option is not supported")
+            msg = "The 'modifiedBefore' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if modifiedAfter is not None:
-            raise ContributionsAcceptedError("The 'modifiedAfter' option is not supported")
+            msg = "The 'modifiedAfter' option is not supported"
+            raise ContributionsAcceptedError(msg)
         if allowNonNumericNumbers is not None:
-            raise ContributionsAcceptedError("The 'allowNonNumericNumbers' option is not supported")
+            msg = "The 'allowNonNumericNumbers' option is not supported"
+            raise ContributionsAcceptedError(msg)
 
         if isinstance(path, str):
             path = [path]
 
@@ -731,7 +731,8 @@ def fromInternal(self, obj: T) -> T:
         return self.dataType.fromInternal(obj)
 
     def typeName(self) -> str:  # type: ignore[override]
-        raise TypeError("StructField does not have typeName. Use typeName on its type explicitly instead.")
+        msg = "StructField does not have typeName. Use typeName on its type explicitly instead."
+        raise TypeError(msg)
 
 
 class StructType(DataType):
@@ -841,7 +842,8 @@ def add(
             self.names.append(field.name)
         else:
             if isinstance(field, str) and data_type is None:
-                raise ValueError("Must specify DataType if passing name of struct_field to create.")
+                msg = "Must specify DataType if passing name of struct_field to create."
+                raise ValueError(msg)
             else:
                 data_type_f = data_type
             self.fields.append(StructField(field, data_type_f, nullable, metadata))
@@ -866,16 +868,19 @@ def __getitem__(self, key: Union[str, int]) -> StructField:
             for field in self:
                 if field.name == key:
                     return field
-            raise KeyError(f"No StructField named {key}")
+            msg = f"No StructField named {key}"
+            raise KeyError(msg)
         elif isinstance(key, int):
             try:
                 return self.fields[key]
             except IndexError:
-                raise IndexError("StructType index out of range")
+                msg = "StructType index out of range"
+                raise IndexError(msg)
         elif isinstance(key, slice):
             return StructType(self.fields[key])
         else:
-            raise TypeError("StructType keys should be strings, integers or slices")
+            msg = "StructType keys should be strings, integers or slices"
+            raise TypeError(msg)
 
     def simpleString(self) -> str:
         return "struct<%s>" % (",".join(f.simpleString() for f in self))
@@ -978,12 +983,14 @@ def typeName(cls) -> str:
     @classmethod
     def sqlType(cls) -> DataType:
         """Underlying SQL storage type for this UDT."""
-        raise NotImplementedError("UDT must implement sqlType().")
+        msg = "UDT must implement sqlType()."
+        raise NotImplementedError(msg)
 
     @classmethod
     def module(cls) -> str:
         """The Python module of the UDT."""
-        raise NotImplementedError("UDT must implement module().")
+        msg = "UDT must implement module()."
+        raise NotImplementedError(msg)
 
     @classmethod
     def scalaUDT(cls) -> str:
@@ -1013,11 +1020,13 @@ def fromInternal(self, obj: Any) -> Any:
 
     def serialize(self, obj: Any) -> Any:
         """Converts a user-type object into a SQL datum."""
-        raise NotImplementedError("UDT must implement toInternal().")
+        msg = "UDT must implement toInternal()."
+        raise NotImplementedError(msg)
 
     def deserialize(self, datum: Any) -> Any:
         """Converts a SQL datum into a user-type object."""
-        raise NotImplementedError("UDT must implement fromInternal().")
+        msg = "UDT must implement fromInternal()."
+        raise NotImplementedError(msg)
 
     def simpleString(self) -> str:
         return "udt"
@@ -1126,7 +1135,8 @@ def __new__(cls, **kwargs: Any) -> "Row": ...
 
     def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row":
         if args and kwargs:
-            raise ValueError("Can not use both args and kwargs to create Row")
+            msg = "Can not use both args and kwargs to create Row"
+            raise ValueError(msg)
         if kwargs:
             # create row objects
             row = tuple.__new__(cls, list(kwargs.values()))
@@ -1163,7 +1173,8 @@ def asDict(self, recursive: bool = False) -> dict[str, Any]:
         True
         """
         if not hasattr(self, "__fields__"):
-            raise TypeError("Cannot convert a Row class into dict")
+            msg = "Cannot convert a Row class into dict"
+            raise TypeError(msg)
 
         if recursive:
 
@@ -1224,7 +1235,8 @@ def __getattr__(self, item: str) -> Any:
 
     def __setattr__(self, key: Any, value: Any) -> None:
         if key != "__fields__":
-            raise RuntimeError("Row is read-only")
+            msg = "Row is read-only"
+            raise RuntimeError(msg)
         self.__dict__[key] = value
 
     def __reduce__(