evertlammerts
diff --git a/‎duckdb/_duckdb.pyi‎ renamed to ‎_duckdb-stubs/__init__.pyi‎ b/‎duckdb/_duckdb.pyi‎ renamed to ‎_duckdb-stubs/__init__.pyi‎
diff --git a/‎duckdb/_duckdb/functional.pyi‎ renamed to ‎_duckdb-stubs/functional.pyi‎ b/‎duckdb/_duckdb/functional.pyi‎ renamed to ‎_duckdb-stubs/functional.pyi‎
diff --git a/‎duckdb/_duckdb/typing.pyi‎ renamed to ‎_duckdb-stubs/typing.pyi‎ b/‎duckdb/_duckdb/typing.pyi‎ renamed to ‎_duckdb-stubs/typing.pyi‎
diff --git a/‎duckdb/_duckdb/__init__.pyi‎
Lines changed: 0 additions & 2 deletions b/‎duckdb/_duckdb/__init__.pyi‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎duckdb/filesystem.py‎
Lines changed: 20 additions & 15 deletions b/‎duckdb/filesystem.py‎
Lines changed: 20 additions & 15 deletions
diff --git a/‎duckdb/polars_io.py‎
Lines changed: 75 additions & 42 deletions b/‎duckdb/polars_io.py‎
Lines changed: 75 additions & 42 deletions
diff --git a/‎duckdb/udf.py‎
Lines changed: 2 additions & 2 deletions b/‎duckdb/udf.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 9 additions & 4 deletions b/‎pyproject.toml‎
Lines changed: 9 additions & 4 deletions
@@ -1,28 +1,33 @@
-from io import TextIOBase  # noqa: D100
-from typing import IO
+"""In-memory filesystem to store ephemeral dependencies.
+
+Warning: Not for external use. May change at any moment. Likely to be made internal.
+"""
+
+from __future__ import annotations
+
+import io
+import typing
 
 from fsspec import AbstractFileSystem
 from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
 
 from .bytes_io_wrapper import BytesIOWrapper
 
 
-def is_file_like(obj) -> bool:  # noqa: D103, ANN001
-    # We only care that we can read from the file
-    return hasattr(obj, "read") and hasattr(obj, "seek")
+class ModifiedMemoryFileSystem(MemoryFileSystem):
+    """In-memory filesystem implementation that uses its own protocol."""
 
-
-class ModifiedMemoryFileSystem(MemoryFileSystem):  # noqa: D101
     protocol = ("DUCKDB_INTERNAL_OBJECTSTORE",)
     # defer to the original implementation that doesn't hardcode the protocol
-    _strip_protocol = classmethod(AbstractFileSystem._strip_protocol.__func__)
+    _strip_protocol: typing.Callable[[str], str] = classmethod(AbstractFileSystem._strip_protocol.__func__)  # type: ignore[assignment]
 
-    def add_file(self, object: IO, path: str) -> None:  # noqa: D102
-        if not is_file_like(object):
+    def add_file(self, obj: io.IOBase | BytesIOWrapper, path: str) -> None:
+        """Add a file to the filesystem."""
+        if not isinstance(obj, io.IOBase):
             msg = "Can not read from a non file-like object"
-            raise ValueError(msg)
-        path = self._strip_protocol(path)
-        if isinstance(object, TextIOBase):
+            raise TypeError(msg)
+        if isinstance(obj, io.TextIOBase):
             # Wrap this so that we can return a bytes object from 'read'
-            object = BytesIOWrapper(object)
-        self.store[path] = MemoryFile(self, path, object.read())
+            obj = BytesIOWrapper(obj)
+        path = self._strip_protocol(path)
+        self.store[path] = MemoryFile(self, path, obj.read())
@@ -1,17 +1,24 @@
-import datetime  # noqa: D100
+from __future__ import annotations  # noqa: D100
+
+import datetime
 import json
-from collections.abc import Iterator
+import typing
 from decimal import Decimal
-from typing import Optional
 
 import polars as pl
 from polars.io.plugins import register_io_source
 
 import duckdb
-from duckdb import SQLExpression
 
+if typing.TYPE_CHECKING:
+    from collections.abc import Iterator
+
+    import typing_extensions
+
+_ExpressionTree: typing_extensions.TypeAlias = typing.Dict[str, typing.Union[str, int, "_ExpressionTree", typing.Any]]  # noqa: UP006
 
-def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
+
+def _predicate_to_expression(predicate: pl.Expr) -> duckdb.Expression | None:
     """Convert a Polars predicate expression to a DuckDB-compatible SQL expression.
 
     Parameters:
@@ -31,7 +38,7 @@ def _predicate_to_expression(predicate: pl.Expr) -> Optional[SQLExpression]:
     try:
         # Convert the tree to SQL
         sql_filter = _pl_tree_to_sql(tree)
-        return SQLExpression(sql_filter)
+        return duckdb.SQLExpression(sql_filter)
     except Exception:
         # If the conversion fails, we return None
         return None
@@ -70,7 +77,7 @@ def _escape_sql_identifier(identifier: str) -> str:
     return f'"{escaped}"'
 
 
-def _pl_tree_to_sql(tree: dict) -> str:
+def _pl_tree_to_sql(tree: _ExpressionTree) -> str:
     """Recursively convert a Polars expression tree (as JSON) to a SQL string.
 
     Parameters:
@@ -91,38 +98,51 @@ def _pl_tree_to_sql(tree: dict) -> str:
         Output: "(foo > 5)"
     """
     [node_type] = tree.keys()
-    subtree = tree[node_type]
 
     if node_type == "BinaryExpr":
         # Binary expressions: left OP right
-        return (
-            "("
-            + " ".join(
-                (
-                    _pl_tree_to_sql(subtree["left"]),
-                    _pl_operation_to_sql(subtree["op"]),
-                    _pl_tree_to_sql(subtree["right"]),
-                )
-            )
-            + ")"
-        )
+        bin_expr_tree = tree[node_type]
+        assert isinstance(bin_expr_tree, dict), f"A {node_type} should be a dict but got {type(bin_expr_tree)}"
+        lhs, op, rhs = bin_expr_tree["left"], bin_expr_tree["op"], bin_expr_tree["right"]
+        assert isinstance(lhs, dict), f"LHS of a {node_type} should be a dict but got {type(lhs)}"
+        assert isinstance(op, str), f"The op of a {node_type} should be a str but got {type(op)}"
+        assert isinstance(rhs, dict), f"RHS of a {node_type} should be a dict but got {type(rhs)}"
+        return f"({_pl_tree_to_sql(lhs)} {_pl_operation_to_sql(op)} {_pl_tree_to_sql(rhs)})"
     if node_type == "Column":
         # A reference to a column name
         # Wrap in quotes to handle special characters
-        return _escape_sql_identifier(subtree)
+        col_name = tree[node_type]
+        assert isinstance(col_name, str), f"The col name of a {node_type} should be a str but got {type(col_name)}"
+        return _escape_sql_identifier(col_name)
 
     if node_type in ("Literal", "Dyn"):
         # Recursively process dynamic or literal values
-        return _pl_tree_to_sql(subtree)
+        val_tree = tree[node_type]
+        assert isinstance(val_tree, dict), f"A {node_type} should be a dict but got {type(val_tree)}"
+        return _pl_tree_to_sql(val_tree)
 
     if node_type == "Int":
         # Direct integer literals
-        return str(subtree)
+        int_literal = tree[node_type]
+        assert isinstance(int_literal, (int, str)), (
+            f"The value of an Int should be an int or str but got {type(int_literal)}"
+        )
+        return str(int_literal)
 
     if node_type == "Function":
         # Handle boolean functions like IsNull, IsNotNull
-        inputs = subtree["input"]
-        func_dict = subtree["function"]
+        func_tree = tree[node_type]
+        assert isinstance(func_tree, dict), f"A {node_type} should be a dict but got {type(func_tree)}"
+        inputs = func_tree["input"]
+        assert isinstance(inputs, list), f"A {node_type} should have a list of dicts as input but got {type(inputs)}"
+        input_tree = inputs[0]
+        assert isinstance(input_tree, dict), (
+            f"A {node_type} should have a list of dicts as input but got {type(input_tree)}"
+        )
+        func_dict = func_tree["function"]
+        assert isinstance(func_dict, dict), (
+            f"A {node_type} should have a function dict as input but got {type(func_dict)}"
+        )
 
         if "Boolean" in func_dict:
             func = func_dict["Boolean"]
@@ -140,24 +160,31 @@ def _pl_tree_to_sql(tree: dict) -> str:
 
     if node_type == "Scalar":
         # Detect format: old style (dtype/value) or new style (direct type key)
-        if "dtype" in subtree and "value" in subtree:
-            dtype = str(subtree["dtype"])
-            value = subtree["value"]
+        scalar_tree = tree[node_type]
+        assert isinstance(scalar_tree, dict), f"A {node_type} should be a dict but got {type(scalar_tree)}"
+        if "dtype" in scalar_tree and "value" in scalar_tree:
+            dtype = str(scalar_tree["dtype"])
+            value = scalar_tree["value"]
         else:
             # New style: dtype is the single key in the dict
-            dtype = next(iter(subtree.keys()))
-            value = subtree
+            dtype = next(iter(scalar_tree.keys()))
+            value = scalar_tree
+        assert isinstance(dtype, str), f"A {node_type} should have a str dtype but got  {type(dtype)}"
+        assert isinstance(value, dict), f"A {node_type} should have a dict value but got {type(value)}"
 
         # Decimal support
         if dtype.startswith("{'Decimal'") or dtype == "Decimal":
             decimal_value = value["Decimal"]
-            decimal_value = Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1])
-            return str(decimal_value)
+            assert isinstance(decimal_value, list), (
+                f"A {dtype} should be a two member list but got {type(decimal_value)}"
+            )
+            return str(Decimal(decimal_value[0]) / Decimal(10 ** decimal_value[1]))
 
         # Datetime with microseconds since epoch
         if dtype.startswith("{'Datetime'") or dtype == "Datetime":
-            micros = value["Datetime"][0]
-            dt_timestamp = datetime.datetime.fromtimestamp(micros / 1_000_000, tz=datetime.UTC)
+            micros = value["Datetime"]
+            assert isinstance(micros, list), f"A {dtype} should be a one member list but got {type(micros)}"
+            dt_timestamp = datetime.datetime.fromtimestamp(micros[0] / 1_000_000, tz=datetime.timezone.utc)
             return f"'{dt_timestamp!s}'::TIMESTAMP"
 
         # Match simple numeric/boolean types
@@ -179,6 +206,7 @@ def _pl_tree_to_sql(tree: dict) -> str:
         # Time type
         if dtype == "Time":
             nanoseconds = value["Time"]
+            assert isinstance(nanoseconds, int), f"A {dtype} should be an int but got {type(nanoseconds)}"
             seconds = nanoseconds // 1_000_000_000
             microseconds = (nanoseconds % 1_000_000_000) // 1_000
             dt_time = (datetime.datetime.min + datetime.timedelta(seconds=seconds, microseconds=microseconds)).time()
@@ -187,36 +215,41 @@ def _pl_tree_to_sql(tree: dict) -> str:
         # Date type
         if dtype == "Date":
             days_since_epoch = value["Date"]
+            assert isinstance(days_since_epoch, (float, int)), (
+                f"A {dtype} should be a number but got {type(days_since_epoch)}"
+            )
             date = datetime.date(1970, 1, 1) + datetime.timedelta(days=days_since_epoch)
             return f"'{date}'::DATE"
 
         # Binary type
         if dtype == "Binary":
-            binary_data = bytes(value["Binary"])
+            bin_value = value["Binary"]
+            assert isinstance(bin_value, bytes), f"A {dtype} should be bytes but got {type(bin_value)}"
+            binary_data = bytes(bin_value)
             escaped = "".join(f"\\x{b:02x}" for b in binary_data)
             return f"'{escaped}'::BLOB"
 
         # String type
         if dtype == "String" or dtype == "StringOwned":
             # Some new formats may store directly under StringOwned
-            string_val = value.get("StringOwned", value.get("String", None))
+            string_val: object | None = value.get("StringOwned", value.get("String", None))
             return f"'{string_val}'"
 
         msg = f"Unsupported scalar type {dtype!s}, with value {value}"
         raise NotImplementedError(msg)
 
-    msg = f"Node type: {node_type} is not implemented. {subtree}"
+    msg = f"Node type: {node_type} is not implemented. {tree[node_type]}"
     raise NotImplementedError(msg)
 
 
 def duckdb_source(relation: duckdb.DuckDBPyRelation, schema: pl.schema.Schema) -> pl.LazyFrame:
     """A polars IO plugin for DuckDB."""
 
     def source_generator(
-        with_columns: Optional[list[str]],
-        predicate: Optional[pl.Expr],
-        n_rows: Optional[int],
-        batch_size: Optional[int],
+        with_columns: list[str] | None,
+        predicate: pl.Expr | None,
+        n_rows: int | None,
+        batch_size: int | None,
     ) -> Iterator[pl.DataFrame]:
         duck_predicate = None
         relation_final = relation
@@ -239,8 +272,8 @@ def source_generator(
         for record_batch in iter(results.read_next_batch, None):
             if predicate is not None and duck_predicate is None:
                 # We have a predicate, but did not manage to push it down, we fallback here
-                yield pl.from_arrow(record_batch).filter(predicate)
+                yield pl.from_arrow(record_batch).filter(predicate)  # type: ignore[arg-type,misc]
             else:
-                yield pl.from_arrow(record_batch)
+                yield pl.from_arrow(record_batch)  # type: ignore[misc]
 
     return register_io_source(source_generator, schema=schema)
@@ -1,8 +1,8 @@
 # ruff: noqa: D100
-from typing import Callable
+import typing
 
 
-def vectorized(func: Callable) -> Callable:
+def vectorized(func: typing.Callable[..., typing.Any]) -> typing.Callable[..., typing.Any]:
     """Decorate a function with annotated function parameters.
 
     This allows DuckDB to infer that the function should be provided with pyarrow arrays and should expect
 
@@ -305,19 +305,24 @@ pretty = true
 python_version = "3.9"
 exclude = [
     "duckdb/experimental/",
-    "duckdb/functional/",
     "duckdb/query_graph/",
-    "duckdb/typing/",
     "duckdb/value/",
 ]
 
 [[tool.mypy.overrides]]
 module = [
-#    "some_untyped_dependency.*",
-#    "another_untyped_lib"
+    "fsspec.*",
+    "pandas",
+    "polars",
+    "pyarrow",
+    "torch",
 ]
 ignore_missing_imports = true
 
+[[tool.mypy.overrides]]
+module = "duckdb.filesystem"
+disallow_subclassing_any = false
+
 [tool.pytest.ini_options]
 minversion = "6.0"
 addopts = "-ra -q"