Skip to content

Commit 709c918

Browse files
committed
REVERT CHANGES to commit
1 parent 0154c03 commit 709c918

File tree

23 files changed

+278
-383
lines changed

23 files changed

+278
-383
lines changed

docs/source/conf.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,6 @@ def autoapi_skip_member_fn(app, what, name, obj, skip, options) -> bool: # noqa
9191
("method", "datafusion.context.SessionContext.tables"),
9292
("method", "datafusion.dataframe.DataFrame.unnest_column"),
9393
]
94-
# Explicitly skip certain members listed above. These are either
95-
# re-exports, duplicate module-level documentation, deprecated
96-
# API surfaces, or private variables that would otherwise appear
97-
# in the generated docs and cause confusing duplication.
98-
# Keeping this explicit list avoids surprising entries in the
99-
# AutoAPI output and gives us a single place to opt-out items
100-
# when we intentionally hide them from the docs.
10194
if (what, name) in skip_contents:
10295
skip = True
10396

docs/source/contributor-guide/ffi.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ as performant as possible and to utilize the features of DataFusion, you may dec
3434
your source in Rust and then expose it through `PyO3 <https://pyo3.rs>`_ as a Python library.
3535

3636
At first glance, it may appear the best way to do this is to add the ``datafusion-python``
37-
crate as a dependency, provide a ``PyTable``, and then to register it with the
37+
crate as a dependency, provide a ``PyTable``, and then to register it with the
3838
``SessionContext``. Unfortunately, this will not work.
3939

4040
When you produce your code as a Python library and it needs to interact with the DataFusion

docs/source/user-guide/data-sources.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,11 @@ as Delta Lake. This will require a recent version of
154154
from deltalake import DeltaTable
155155
156156
delta_table = DeltaTable("path_to_table")
157-
ctx.register_table("my_delta_table", delta_table)
157+
ctx.register_table_provider("my_delta_table", delta_table)
158158
df = ctx.table("my_delta_table")
159159
df.show()
160160
161-
On older versions of ``deltalake`` (prior to 0.22) you can use the
161+
On older versions of ``deltalake`` (prior to 0.22) you can use the
162162
`Arrow DataSet <https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Dataset.html>`_
163163
interface to import to DataFusion, but this does not support features such as filter push down
164164
which can lead to a significant performance difference.

docs/source/user-guide/io/table_provider.rst

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,26 +37,22 @@ A complete example can be found in the `examples folder <https://github.com/apac
3737
&self,
3838
py: Python<'py>,
3939
) -> PyResult<Bound<'py, PyCapsule>> {
40-
let name = cr"datafusion_table_provider".into();
40+
let name = CString::new("datafusion_table_provider").unwrap();
4141
42-
let provider = Arc::new(self.clone());
43-
let provider = FFI_TableProvider::new(provider, false, None);
42+
let provider = Arc::new(self.clone())
43+
.map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
44+
let provider = FFI_TableProvider::new(Arc::new(provider), false);
4445
4546
PyCapsule::new_bound(py, provider, Some(name.clone()))
4647
}
4748
}
4849
49-
Once you have this library available, you can construct a
50-
:py:class:`~datafusion.Table` in Python and register it with the
51-
``SessionContext``.
50+
Once you have this library available, in python you can register your table provider
51+
to the ``SessionContext``.
5252

5353
.. code-block:: python
5454
55-
from datafusion import SessionContext, Table
56-
57-
ctx = SessionContext()
5855
provider = MyTableProvider()
56+
ctx.register_table_provider("my_table", provider)
5957
60-
ctx.register_table("capsule_table", provider)
61-
62-
ctx.table("capsule_table").show()
58+
ctx.table("my_table").show()

examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ def test_catalog_provider():
3636

3737
my_catalog_schemas = my_catalog.names()
3838
assert expected_schema_name in my_catalog_schemas
39-
my_schema = my_catalog.schema(expected_schema_name)
40-
assert expected_table_name in my_schema.names()
41-
my_table = my_schema.table(expected_table_name)
39+
my_database = my_catalog.database(expected_schema_name)
40+
assert expected_table_name in my_database.names()
41+
my_table = my_database.table(expected_table_name)
4242
assert expected_table_columns == my_table.schema.names
4343

4444
result = ctx.table(

examples/datafusion-ffi-example/python/tests/_test_table_function.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def test_ffi_table_function_call_directly():
5353
table_udtf = udtf(table_func, "my_table_func")
5454

5555
my_table = table_udtf()
56-
ctx.register_table("t", my_table)
56+
ctx.register_table_provider("t", my_table)
5757
result = ctx.table("t").collect()
5858

5959
assert len(result) == 2

examples/datafusion-ffi-example/python/tests/_test_table_provider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
def test_table_loading():
2626
ctx = SessionContext()
2727
table = MyTableProvider(3, 2, 4)
28-
ctx.register_table("t", table)
28+
ctx.register_table_provider("t", table)
2929
result = ctx.table("t").collect()
3030

3131
assert len(result) == 4

python/datafusion/__init__.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,17 @@
2828
try:
2929
import importlib.metadata as importlib_metadata
3030
except ImportError:
31-
import importlib_metadata # type: ignore[import]
31+
import importlib_metadata
3232

33-
# Public submodules
3433
from . import functions, object_store, substrait, unparser
3534

3635
# The following imports are okay to remain as opaque to the user.
3736
from ._internal import Config
3837
from .catalog import Catalog, Database, Table
3938
from .col import col, column
40-
from .common import DFSchema
39+
from .common import (
40+
DFSchema,
41+
)
4142
from .context import (
4243
RuntimeEnvBuilder,
4344
SessionConfig,
@@ -46,7 +47,10 @@
4647
)
4748
from .dataframe import DataFrame, ParquetColumnOptions, ParquetWriterOptions
4849
from .dataframe_formatter import configure_formatter
49-
from .expr import Expr, WindowFrame
50+
from .expr import (
51+
Expr,
52+
WindowFrame,
53+
)
5054
from .io import read_avro, read_csv, read_json, read_parquet
5155
from .plan import ExecutionPlan, LogicalPlan
5256
from .record_batch import RecordBatch, RecordBatchStream

python/datafusion/catalog.py

Lines changed: 18 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,13 @@
2020
from __future__ import annotations
2121

2222
from abc import ABC, abstractmethod
23-
from typing import TYPE_CHECKING, Any, Protocol
23+
from typing import TYPE_CHECKING, Protocol
2424

2525
import datafusion._internal as df_internal
2626

2727
if TYPE_CHECKING:
2828
import pyarrow as pa
2929

30-
from datafusion import DataFrame
31-
from datafusion.context import TableProviderExportable
32-
3330
try:
3431
from warnings import deprecated # Python 3.13+
3532
except ImportError:
@@ -85,11 +82,7 @@ def database(self, name: str = "public") -> Schema:
8582
"""Returns the database with the given ``name`` from this catalog."""
8683
return self.schema(name)
8784

88-
def register_schema(
89-
self,
90-
name: str,
91-
schema: Schema | SchemaProvider | SchemaProviderExportable,
92-
) -> Schema | None:
85+
def register_schema(self, name, schema) -> Schema | None:
9386
"""Register a schema with this catalog."""
9487
if isinstance(schema, Schema):
9588
return self.catalog.register_schema(name, schema._raw_schema)
@@ -129,12 +122,10 @@ def table(self, name: str) -> Table:
129122
"""Return the table with the given ``name`` from this schema."""
130123
return Table(self._raw_schema.table(name))
131124

132-
def register_table(
133-
self,
134-
name: str,
135-
table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset,
136-
) -> None:
137-
"""Register a table in this schema."""
125+
def register_table(self, name, table) -> None:
126+
"""Register a table provider in this schema."""
127+
if isinstance(table, Table):
128+
return self._raw_schema.register_table(name, table.table)
138129
return self._raw_schema.register_table(name, table)
139130

140131
def deregister_table(self, name: str) -> None:
@@ -148,45 +139,30 @@ class Database(Schema):
148139

149140

150141
class Table:
151-
"""A DataFusion table.
152-
153-
Internally we currently support the following types of tables:
154-
155-
- Tables created using built-in DataFusion methods, such as
156-
reading from CSV or Parquet
157-
- pyarrow datasets
158-
- DataFusion DataFrames, which will be converted into a view
159-
- Externally provided tables implemented with the FFI PyCapsule
160-
interface (advanced)
161-
"""
142+
"""DataFusion table."""
162143

163-
__slots__ = ("_inner",)
164-
165-
def __init__(
166-
self, table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset
167-
) -> None:
168-
"""Constructor."""
169-
self._inner = df_internal.catalog.RawTable(table)
144+
def __init__(self, table: df_internal.catalog.RawTable) -> None:
145+
"""This constructor is not typically called by the end user."""
146+
self.table = table
170147

171148
def __repr__(self) -> str:
172149
"""Print a string representation of the table."""
173-
return repr(self._inner)
150+
return self.table.__repr__()
174151

175152
@staticmethod
176-
@deprecated("Use Table() constructor instead.")
177153
def from_dataset(dataset: pa.dataset.Dataset) -> Table:
178-
"""Turn a :mod:`pyarrow.dataset` ``Dataset`` into a :class:`Table`."""
179-
return Table(dataset)
154+
"""Turn a pyarrow Dataset into a Table."""
155+
return Table(df_internal.catalog.RawTable.from_dataset(dataset))
180156

181157
@property
182158
def schema(self) -> pa.Schema:
183159
"""Returns the schema associated with this table."""
184-
return self._inner.schema
160+
return self.table.schema
185161

186162
@property
187163
def kind(self) -> str:
188164
"""Returns the kind of table."""
189-
return self._inner.kind
165+
return self.table.kind
190166

191167

192168
class CatalogProvider(ABC):
@@ -243,16 +219,14 @@ def table(self, name: str) -> Table | None:
243219
"""Retrieve a specific table from this schema."""
244220
...
245221

246-
def register_table( # noqa: B027
247-
self, name: str, table: Table | TableProviderExportable | Any
248-
) -> None:
249-
"""Add a table to this schema.
222+
def register_table(self, name: str, table: Table) -> None: # noqa: B027
223+
"""Add a table from this schema.
250224
251225
This method is optional. If your schema provides a fixed list of tables, you do
252226
not need to implement this method.
253227
"""
254228

255-
def deregister_table(self, name: str, cascade: bool) -> None: # noqa: B027
229+
def deregister_table(self, name, cascade: bool) -> None: # noqa: B027
256230
"""Remove a table from this schema.
257231
258232
This method is optional. If your schema provides a fixed list of tables, you do

python/datafusion/context.py

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@
2929

3030
import pyarrow as pa
3131

32-
from datafusion.catalog import Catalog
32+
from datafusion.catalog import Catalog, CatalogProvider, Table
3333
from datafusion.dataframe import DataFrame
34-
from datafusion.expr import sort_list_to_raw_sort_list
34+
from datafusion.expr import SortKey, sort_list_to_raw_sort_list
3535
from datafusion.record_batch import RecordBatchStream
36+
from datafusion.user_defined import AggregateUDF, ScalarUDF, TableFunction, WindowUDF
3637

3738
from ._internal import RuntimeEnvBuilder as RuntimeEnvBuilderInternal
3839
from ._internal import SessionConfig as SessionConfigInternal
@@ -47,15 +48,7 @@
4748
import pandas as pd
4849
import polars as pl # type: ignore[import]
4950

50-
from datafusion.catalog import CatalogProvider, Table
51-
from datafusion.expr import SortKey
5251
from datafusion.plan import ExecutionPlan, LogicalPlan
53-
from datafusion.user_defined import (
54-
AggregateUDF,
55-
ScalarUDF,
56-
TableFunction,
57-
WindowUDF,
58-
)
5952

6053

6154
class ArrowStreamExportable(Protocol):
@@ -740,7 +733,7 @@ def from_polars(self, data: pl.DataFrame, name: str | None = None) -> DataFrame:
740733
# https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116
741734
# is the discussion on how we arrived at adding register_view
742735
def register_view(self, name: str, df: DataFrame) -> None:
743-
"""Register a :py:class:`~datafusion.dataframe.DataFrame` as a view.
736+
"""Register a :py:class: `~datafusion.detaframe.DataFrame` as a view.
744737
745738
Args:
746739
name (str): The name to register the view under.
@@ -749,21 +742,16 @@ def register_view(self, name: str, df: DataFrame) -> None:
749742
view = df.into_view()
750743
self.ctx.register_table(name, view)
751744

752-
def register_table(
753-
self,
754-
name: str,
755-
table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset,
756-
) -> None:
757-
"""Register a :py:class:`~datafusion.Table` with this context.
745+
def register_table(self, name: str, table: Table) -> None:
746+
"""Register a :py:class: `~datafusion.catalog.Table` as a table.
758747
759-
The registered table can be referenced from SQL statements executed against
760-
this context.
748+
The registered table can be referenced from SQL statement executed against.
761749
762750
Args:
763751
name: Name of the resultant table.
764-
table: Any object that can be converted into a :class:`Table`.
752+
table: DataFusion table to add to the session context.
765753
"""
766-
self.ctx.register_table(name, table)
754+
self.ctx.register_table(name, table.table)
767755

768756
def deregister_table(self, name: str) -> None:
769757
"""Remove a table from the session."""
@@ -782,17 +770,15 @@ def register_catalog_provider(
782770
else:
783771
self.ctx.register_catalog_provider(name, provider)
784772

785-
@deprecated("Use register_table() instead.")
786773
def register_table_provider(
787-
self,
788-
name: str,
789-
provider: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset,
774+
self, name: str, provider: TableProviderExportable
790775
) -> None:
791776
"""Register a table provider.
792777
793-
Deprecated: use :meth:`register_table` instead.
778+
This table provider must have a method called ``__datafusion_table_provider__``
779+
which returns a PyCapsule that exposes a ``FFI_TableProvider``.
794780
"""
795-
self.register_table(name, provider)
781+
self.ctx.register_table_provider(name, provider)
796782

797783
def register_udtf(self, func: TableFunction) -> None:
798784
"""Register a user defined table function."""
@@ -1184,7 +1170,7 @@ def read_table(self, table: Table) -> DataFrame:
11841170
:py:class:`~datafusion.catalog.ListingTable`, create a
11851171
:py:class:`~datafusion.dataframe.DataFrame`.
11861172
"""
1187-
return DataFrame(self.ctx.read_table(table._inner))
1173+
return DataFrame(self.ctx.read_table(table.table))
11881174

11891175
def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream:
11901176
"""Execute the ``plan`` and return the results."""

0 commit comments

Comments
 (0)