apache · timsaucer · Oct 11, 2025 · Sep 15, 2025 · Sep 16, 2025 · Sep 16, 2025
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -91,6 +91,13 @@ def autoapi_skip_member_fn(app, what, name, obj, skip, options) -> bool:  # noqa
         ("method", "datafusion.context.SessionContext.tables"),
         ("method", "datafusion.dataframe.DataFrame.unnest_column"),
     ]
+    # Explicitly skip certain members listed above. These are either
+    # re-exports, duplicate module-level documentation, deprecated
+    # API surfaces, or private variables that would otherwise appear
+    # in the generated docs and cause confusing duplication.
+    # Keeping this explicit list avoids surprising entries in the
+    # AutoAPI output and gives us a single place to opt-out items
+    # when we intentionally hide them from the docs.
     if (what, name) in skip_contents:
         skip = True
 

diff --git a/docs/source/contributor-guide/ffi.rst b/docs/source/contributor-guide/ffi.rst
@@ -34,7 +34,7 @@ as performant as possible and to utilize the features of DataFusion, you may dec
 your source in Rust and then expose it through `PyO3 <https://pyo3.rs>`_ as a Python library.
 
 At first glance, it may appear the best way to do this is to add the ``datafusion-python``
-crate as a dependency, provide a ``PyTable``, and then to register it with the 
+crate as a dependency, provide a ``PyTable``, and then to register it with the
 ``SessionContext``. Unfortunately, this will not work.
 
 When you produce your code as a Python library and it needs to interact with the DataFusion

diff --git a/docs/source/user-guide/data-sources.rst b/docs/source/user-guide/data-sources.rst
@@ -154,11 +154,11 @@ as Delta Lake. This will require a recent version of
     from deltalake import DeltaTable
 
     delta_table = DeltaTable("path_to_table")
-    ctx.register_table_provider("my_delta_table", delta_table)
+    ctx.register_table("my_delta_table", delta_table)
     df = ctx.table("my_delta_table")
     df.show()
 
-On older versions of ``deltalake`` (prior to 0.22) you can use the 
+On older versions of ``deltalake`` (prior to 0.22) you can use the
 `Arrow DataSet <https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Dataset.html>`_
 interface to import to DataFusion, but this does not support features such as filter push down
 which can lead to a significant performance difference.

diff --git a/docs/source/user-guide/io/table_provider.rst b/docs/source/user-guide/io/table_provider.rst
@@ -37,22 +37,26 @@ A complete example can be found in the `examples folder <https://github.com/apac
             &self,
             py: Python<'py>,
         ) -> PyResult<Bound<'py, PyCapsule>> {
-            let name = CString::new("datafusion_table_provider").unwrap();
+            let name = cr"datafusion_table_provider".into();
 
-            let provider = Arc::new(self.clone())
-                .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
-            let provider = FFI_TableProvider::new(Arc::new(provider), false);
+            let provider = Arc::new(self.clone());
+            let provider = FFI_TableProvider::new(provider, false, None);
 
             PyCapsule::new_bound(py, provider, Some(name.clone()))
         }
     }
 
-Once you have this library available, in python you can register your table provider
-to the ``SessionContext``.
+Once you have this library available, you can construct a
+:py:class:`~datafusion.Table` in Python and register it with the
+``SessionContext``.
 
 .. code-block:: python
 
+    from datafusion import SessionContext, Table
+
+    ctx = SessionContext()
     provider = MyTableProvider()
-    ctx.register_table_provider("my_table", provider)
 
-    ctx.table("my_table").show()
+    ctx.register_table("capsule_table", provider)
+
+    ctx.table("capsule_table").show()
diff --git a/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py b/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py
@@ -36,9 +36,9 @@ def test_catalog_provider():
 
     my_catalog_schemas = my_catalog.names()
     assert expected_schema_name in my_catalog_schemas
-    my_database = my_catalog.database(expected_schema_name)
-    assert expected_table_name in my_database.names()
-    my_table = my_database.table(expected_table_name)
+    my_schema = my_catalog.schema(expected_schema_name)
+    assert expected_table_name in my_schema.names()
+    my_table = my_schema.table(expected_table_name)
     assert expected_table_columns == my_table.schema.names
 
     result = ctx.table(

diff --git a/examples/datafusion-ffi-example/python/tests/_test_table_function.py b/examples/datafusion-ffi-example/python/tests/_test_table_function.py
@@ -53,7 +53,7 @@ def test_ffi_table_function_call_directly():
     table_udtf = udtf(table_func, "my_table_func")
 
     my_table = table_udtf()
-    ctx.register_table_provider("t", my_table)
+    ctx.register_table("t", my_table)
     result = ctx.table("t").collect()
 
     assert len(result) == 2

diff --git a/examples/datafusion-ffi-example/python/tests/_test_table_provider.py b/examples/datafusion-ffi-example/python/tests/_test_table_provider.py
@@ -25,7 +25,7 @@
 def test_table_loading():
     ctx = SessionContext()
     table = MyTableProvider(3, 2, 4)
-    ctx.register_table_provider("t", table)
+    ctx.register_table("t", table)
     result = ctx.table("t").collect()
 
     assert len(result) == 4
@@ -40,3 +40,7 @@ def test_table_loading():
     ]
 
     assert result == expected
+
+    result = ctx.read_table(table).collect()
+    result = [r.column(0) for r in result]
+    assert result == expected
diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py
@@ -28,17 +28,16 @@
 try:
     import importlib.metadata as importlib_metadata
 except ImportError:
-    import importlib_metadata
+    import importlib_metadata  # type: ignore[import]
 
+# Public submodules
 from . import functions, object_store, substrait, unparser
 
 # The following imports are okay to remain as opaque to the user.
 from ._internal import Config
 from .catalog import Catalog, Database, Table
 from .col import col, column
-from .common import (
-    DFSchema,
-)
+from .common import DFSchema
 from .context import (
     RuntimeEnvBuilder,
     SessionConfig,
@@ -47,10 +46,7 @@
 )
 from .dataframe import DataFrame, ParquetColumnOptions, ParquetWriterOptions
 from .dataframe_formatter import configure_formatter
-from .expr import (
-    Expr,
-    WindowFrame,
-)
+from .expr import Expr, WindowFrame
 from .io import read_avro, read_csv, read_json, read_parquet
 from .plan import ExecutionPlan, LogicalPlan
 from .record_batch import RecordBatch, RecordBatchStream

diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py
@@ -20,13 +20,16 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Protocol
+from typing import TYPE_CHECKING, Any, Protocol
 
 import datafusion._internal as df_internal
 
 if TYPE_CHECKING:
     import pyarrow as pa
 
+    from datafusion import DataFrame
+    from datafusion.context import TableProviderExportable
+
 try:
     from warnings import deprecated  # Python 3.13+
 except ImportError:
@@ -82,7 +85,11 @@ def database(self, name: str = "public") -> Schema:
         """Returns the database with the given ``name`` from this catalog."""
         return self.schema(name)
 
-    def register_schema(self, name, schema) -> Schema | None:
+    def register_schema(
+        self,
+        name: str,
+        schema: Schema | SchemaProvider | SchemaProviderExportable,
+    ) -> Schema | None:
         """Register a schema with this catalog."""
         if isinstance(schema, Schema):
             return self.catalog.register_schema(name, schema._raw_schema)
@@ -122,10 +129,12 @@ def table(self, name: str) -> Table:
         """Return the table with the given ``name`` from this schema."""
         return Table(self._raw_schema.table(name))
 
-    def register_table(self, name, table) -> None:
-        """Register a table provider in this schema."""
-        if isinstance(table, Table):
-            return self._raw_schema.register_table(name, table.table)
+    def register_table(
+        self,
+        name: str,
+        table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset,
+    ) -> None:
+        """Register a table in this schema."""
         return self._raw_schema.register_table(name, table)
 
     def deregister_table(self, name: str) -> None:
@@ -139,30 +148,45 @@ class Database(Schema):
 
 
 class Table:
-    """DataFusion table."""
+    """A DataFusion table.
 
-    def __init__(self, table: df_internal.catalog.RawTable) -> None:
-        """This constructor is not typically called by the end user."""
-        self.table = table
+    Internally we currently support the following types of tables:
+
+    - Tables created using built-in DataFusion methods, such as
+      reading from CSV or Parquet
+    - pyarrow datasets
+    - DataFusion DataFrames, which will be converted into a view
+    - Externally provided tables implemented with the FFI PyCapsule
+      interface (advanced)
+    """
+
+    __slots__ = ("_inner",)
+
+    def __init__(
+        self, table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset
+    ) -> None:
+        """Constructor."""
+        self._inner = df_internal.catalog.RawTable(table)
 
     def __repr__(self) -> str:
         """Print a string representation of the table."""
-        return self.table.__repr__()
+        return repr(self._inner)
 
     @staticmethod
+    @deprecated("Use Table() constructor instead.")
     def from_dataset(dataset: pa.dataset.Dataset) -> Table:
-        """Turn a pyarrow Dataset into a Table."""
-        return Table(df_internal.catalog.RawTable.from_dataset(dataset))
+        """Turn a :mod:`pyarrow.dataset` ``Dataset`` into a :class:`Table`."""
+        return Table(dataset)
 
     @property
     def schema(self) -> pa.Schema:
         """Returns the schema associated with this table."""
-        return self.table.schema
+        return self._inner.schema
 
     @property
     def kind(self) -> str:
         """Returns the kind of table."""
-        return self.table.kind
+        return self._inner.kind
 
 
 class CatalogProvider(ABC):
@@ -219,14 +243,16 @@ def table(self, name: str) -> Table | None:
         """Retrieve a specific table from this schema."""
         ...
 
-    def register_table(self, name: str, table: Table) -> None:  # noqa: B027
-        """Add a table from this schema.
+    def register_table(  # noqa: B027
+        self, name: str, table: Table | TableProviderExportable | Any
+    ) -> None:
+        """Add a table to this schema.
 
         This method is optional. If your schema provides a fixed list of tables, you do
         not need to implement this method.
         """
 
-    def deregister_table(self, name, cascade: bool) -> None:  # noqa: B027
+    def deregister_table(self, name: str, cascade: bool) -> None:  # noqa: B027
         """Remove a table from this schema.
 
         This method is optional. If your schema provides a fixed list of tables, you do

diff --git a/python/datafusion/context.py b/python/datafusion/context.py
@@ -29,11 +29,10 @@
 
 import pyarrow as pa
 
-from datafusion.catalog import Catalog, CatalogProvider, Table
+from datafusion.catalog import Catalog
 from datafusion.dataframe import DataFrame
-from datafusion.expr import SortKey, sort_list_to_raw_sort_list
+from datafusion.expr import sort_list_to_raw_sort_list
 from datafusion.record_batch import RecordBatchStream
-from datafusion.user_defined import AggregateUDF, ScalarUDF, TableFunction, WindowUDF
 
 from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal
 from ._internal import SessionConfig as SessionConfigInternal
@@ -48,7 +47,15 @@
     import pandas as pd
     import polars as pl  # type: ignore[import]
 
+    from datafusion.catalog import CatalogProvider, Table
+    from datafusion.expr import SortKey
     from datafusion.plan import ExecutionPlan, LogicalPlan
+    from datafusion.user_defined import (
+        AggregateUDF,
+        ScalarUDF,
+        TableFunction,
+        WindowUDF,
+    )
 
 
 class ArrowStreamExportable(Protocol):
@@ -733,7 +740,7 @@ def from_polars(self, data: pl.DataFrame, name: str | None = None) -> DataFrame:
     # https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116
     # is the discussion on how we arrived at adding register_view
     def register_view(self, name: str, df: DataFrame) -> None:
-        """Register a :py:class: `~datafusion.detaframe.DataFrame` as a view.
+        """Register a :py:class:`~datafusion.dataframe.DataFrame` as a view.
 
         Args:
             name (str): The name to register the view under.
@@ -742,16 +749,21 @@ def register_view(self, name: str, df: DataFrame) -> None:
         view = df.into_view()
         self.ctx.register_table(name, view)
 
-    def register_table(self, name: str, table: Table) -> None:
-        """Register a :py:class: `~datafusion.catalog.Table` as a table.
+    def register_table(
+        self,
+        name: str,
+        table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset,
+    ) -> None:
+        """Register a :py:class:`~datafusion.Table` with this context.
 
-        The registered table can be referenced from SQL statement executed against.
+        The registered table can be referenced from SQL statements executed against
+        this context.
 
         Args:
             name: Name of the resultant table.
-            table: DataFusion table to add to the session context.
+            table: Any object that can be converted into a :class:`Table`.
         """
-        self.ctx.register_table(name, table.table)
+        self.ctx.register_table(name, table)
 
     def deregister_table(self, name: str) -> None:
         """Remove a table from the session."""
@@ -770,15 +782,17 @@ def register_catalog_provider(
         else:
             self.ctx.register_catalog_provider(name, provider)
 
+    @deprecated("Use register_table() instead.")
     def register_table_provider(
-        self, name: str, provider: TableProviderExportable
+        self,
+        name: str,
+        provider: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset,
     ) -> None:
         """Register a table provider.
 
-        This table provider must have a method called ``__datafusion_table_provider__``
-        which returns a PyCapsule that exposes a ``FFI_TableProvider``.
+        Deprecated: use :meth:`register_table` instead.
         """
-        self.ctx.register_table_provider(name, provider)
+        self.register_table(name, provider)
 
     def register_udtf(self, func: TableFunction) -> None:
         """Register a user defined table function."""
@@ -1163,14 +1177,11 @@ def read_avro(
             self.ctx.read_avro(str(path), schema, file_partition_cols, file_extension)
         )
 
-    def read_table(self, table: Table) -> DataFrame:
-        """Creates a :py:class:`~datafusion.dataframe.DataFrame` from a table.
-
-        For a :py:class:`~datafusion.catalog.Table` such as a
-        :py:class:`~datafusion.catalog.ListingTable`, create a
-        :py:class:`~datafusion.dataframe.DataFrame`.
-        """
-        return DataFrame(self.ctx.read_table(table.table))
+    def read_table(
+        self, table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset
+    ) -> DataFrame:
+        """Creates a :py:class:`~datafusion.dataframe.DataFrame` from a table."""
+        return DataFrame(self.ctx.read_table(table))
 
     def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream:
         """Execute the ``plan`` and return the results."""