Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,12 @@ classifiers = [
"Programming Language :: Python",
"Programming Language :: Rust",
]
dependencies = ["pyarrow>=11.0.0", "typing-extensions;python_version<'3.13'"]
dependencies = ["typing-extensions;python_version<'3.13'"]
dynamic = ["version"]

[project.optional-dependencies]
pyarrow = ["pyarrow>=11.0.0"]

[project.urls]
homepage = "https://datafusion.apache.org/python"
documentation = "https://datafusion.apache.org/python"
Expand Down
45 changes: 30 additions & 15 deletions python/datafusion/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

import pandas as pd
import polars as pl # type: ignore[import]
import pyarrow as pa # Optional: only needed for type hints

Check failure on line 49 in python/datafusion/context.py

View workflow job for this annotation

GitHub Actions / build

Ruff (TC004)

python/datafusion/context.py:49:23: TC004 Move import `pyarrow` out of type-checking block. Import is used for more than type hinting.

from datafusion.catalog import CatalogProvider, Table
from datafusion.expr import SortKey
Expand All @@ -58,6 +59,16 @@
)


class ArrowSchemaExportable(Protocol):
"""Type hint for object exporting Arrow Schema via Arrow PyCapsule Interface.

https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
"""

def __arrow_c_schema__(self) -> object: # noqa: D105
...


class ArrowStreamExportable(Protocol):
"""Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface.

Expand All @@ -66,7 +77,8 @@

def __arrow_c_stream__( # noqa: D105
self, requested_schema: object | None = None
) -> object: ...
) -> object:
...


class ArrowArrayExportable(Protocol):
Expand All @@ -77,7 +89,8 @@

def __arrow_c_array__( # noqa: D105
self, requested_schema: object | None = None
) -> tuple[object, object]: ...
) -> tuple[object, object]:
...


class TableProviderExportable(Protocol):
Expand All @@ -86,7 +99,8 @@
https://datafusion.apache.org/python/user-guide/io/table_provider.html
"""

def __datafusion_table_provider__(self) -> object: ... # noqa: D105
def __datafusion_table_provider__(self) -> object: # noqa: D105
...


class CatalogProviderExportable(Protocol):
Expand All @@ -95,7 +109,8 @@
https://docs.rs/datafusion/latest/datafusion/catalog/trait.CatalogProvider.html
"""

def __datafusion_catalog_provider__(self) -> object: ... # noqa: D105
def __datafusion_catalog_provider__(self) -> object: # noqa: D105
...


class SessionConfig:
Expand Down Expand Up @@ -561,7 +576,7 @@
path: str | pathlib.Path,
table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
file_extension: str = ".parquet",
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
file_sort_order: Sequence[Sequence[SortKey]] | None = None,
) -> None:
"""Register multiple files as a single table.
Expand Down Expand Up @@ -630,7 +645,7 @@
self,
partitions: list[list[pa.RecordBatch]],
name: str | None = None,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
) -> DataFrame:
"""Create and return a dataframe using the provided partitions.

Expand Down Expand Up @@ -820,7 +835,7 @@
parquet_pruning: bool = True,
file_extension: str = ".parquet",
skip_metadata: bool = True,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
file_sort_order: Sequence[Sequence[SortKey]] | None = None,
) -> None:
"""Register a Parquet file as a table.
Expand Down Expand Up @@ -862,7 +877,7 @@
self,
name: str,
path: str | pathlib.Path | list[str | pathlib.Path],
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
has_header: bool = True,
delimiter: str = ",",
schema_infer_max_records: int = 1000,
Expand Down Expand Up @@ -905,7 +920,7 @@
self,
name: str,
path: str | pathlib.Path,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
schema_infer_max_records: int = 1000,
file_extension: str = ".json",
table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
Expand Down Expand Up @@ -944,7 +959,7 @@
self,
name: str,
path: str | pathlib.Path,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
file_extension: str = ".avro",
table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
) -> None:
Expand Down Expand Up @@ -1019,7 +1034,7 @@
def read_json(
self,
path: str | pathlib.Path,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
schema_infer_max_records: int = 1000,
file_extension: str = ".json",
table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
Expand Down Expand Up @@ -1057,7 +1072,7 @@
def read_csv(
self,
path: str | pathlib.Path | list[str] | list[pathlib.Path],
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
has_header: bool = True,
delimiter: str = ",",
schema_infer_max_records: int = 1000,
Expand Down Expand Up @@ -1111,7 +1126,7 @@
parquet_pruning: bool = True,
file_extension: str = ".parquet",
skip_metadata: bool = True,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
file_sort_order: Sequence[Sequence[SortKey]] | None = None,
) -> DataFrame:
"""Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`.
Expand Down Expand Up @@ -1155,7 +1170,7 @@
def read_avro(
self,
path: str | pathlib.Path,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
file_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
file_extension: str = ".avro",
) -> DataFrame:
Expand Down Expand Up @@ -1241,4 +1256,4 @@
stacklevel=2,
)

return converted_table_partition_cols
return converted_table_partition_cols

Check failure on line 1259 in python/datafusion/context.py

View workflow job for this annotation

GitHub Actions / build

Ruff (W292)

python/datafusion/context.py:1259:46: W292 No newline at end of file
Loading