Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,12 @@ classifiers = [
"Programming Language :: Python",
"Programming Language :: Rust",
]
dependencies = ["pyarrow>=11.0.0", "typing-extensions;python_version<'3.13'"]
dependencies = ["typing-extensions;python_version<'3.13'"]
dynamic = ["version"]

[project.optional-dependencies]
pyarrow = ["pyarrow>=11.0.0"]

[project.urls]
homepage = "https://datafusion.apache.org/python"
documentation = "https://datafusion.apache.org/python"
Expand Down
47 changes: 30 additions & 17 deletions python/datafusion/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
except ImportError:
from typing_extensions import deprecated # Python 3.12

import pyarrow as pa

from datafusion.catalog import Catalog, CatalogProvider, Table
from datafusion.dataframe import DataFrame
from datafusion.expr import SortKey, sort_list_to_raw_sort_list
Expand All @@ -47,10 +45,21 @@

import pandas as pd
import polars as pl # type: ignore[import]
import pyarrow as pa # Optional: only needed for type hints

Check failure on line 48 in python/datafusion/context.py

View workflow job for this annotation

GitHub Actions / build

Ruff (TC004)

python/datafusion/context.py:48:23: TC004 Move import `pyarrow` out of type-checking block. Import is used for more than type hinting.

from datafusion.plan import ExecutionPlan, LogicalPlan


class ArrowSchemaExportable(Protocol):
"""Type hint for object exporting Arrow Schema via Arrow PyCapsule Interface.

https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
"""

def __arrow_c_schema__(self) -> object: # noqa: D105
...


class ArrowStreamExportable(Protocol):
"""Type hint for object exporting Arrow C Stream via Arrow PyCapsule Interface.

Expand All @@ -59,7 +68,8 @@

def __arrow_c_stream__( # noqa: D105
self, requested_schema: object | None = None
) -> object: ...
) -> object:
...


class ArrowArrayExportable(Protocol):
Expand All @@ -70,7 +80,8 @@

def __arrow_c_array__( # noqa: D105
self, requested_schema: object | None = None
) -> tuple[object, object]: ...
) -> tuple[object, object]:
...


class TableProviderExportable(Protocol):
Expand All @@ -79,7 +90,8 @@
https://datafusion.apache.org/python/user-guide/io/table_provider.html
"""

def __datafusion_table_provider__(self) -> object: ... # noqa: D105
def __datafusion_table_provider__(self) -> object: # noqa: D105
...


class CatalogProviderExportable(Protocol):
Expand All @@ -88,7 +100,8 @@
https://docs.rs/datafusion/latest/datafusion/catalog/trait.CatalogProvider.html
"""

def __datafusion_catalog_provider__(self) -> object: ... # noqa: D105
def __datafusion_catalog_provider__(self) -> object: # noqa: D105
...


class SessionConfig:
Expand Down Expand Up @@ -554,7 +567,7 @@
path: str | pathlib.Path,
table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
file_extension: str = ".parquet",
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
file_sort_order: Sequence[Sequence[SortKey]] | None = None,
) -> None:
"""Register multiple files as a single table.
Expand Down Expand Up @@ -623,7 +636,7 @@
self,
partitions: list[list[pa.RecordBatch]],
name: str | None = None,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
) -> DataFrame:
"""Create and return a dataframe using the provided partitions.

Expand Down Expand Up @@ -806,7 +819,7 @@
parquet_pruning: bool = True,
file_extension: str = ".parquet",
skip_metadata: bool = True,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
file_sort_order: Sequence[Sequence[SortKey]] | None = None,
) -> None:
"""Register a Parquet file as a table.
Expand Down Expand Up @@ -848,7 +861,7 @@
self,
name: str,
path: str | pathlib.Path | list[str | pathlib.Path],
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
has_header: bool = True,
delimiter: str = ",",
schema_infer_max_records: int = 1000,
Expand Down Expand Up @@ -891,7 +904,7 @@
self,
name: str,
path: str | pathlib.Path,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
schema_infer_max_records: int = 1000,
file_extension: str = ".json",
table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
Expand Down Expand Up @@ -930,7 +943,7 @@
self,
name: str,
path: str | pathlib.Path,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
file_extension: str = ".avro",
table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
) -> None:
Expand Down Expand Up @@ -1005,7 +1018,7 @@
def read_json(
self,
path: str | pathlib.Path,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
schema_infer_max_records: int = 1000,
file_extension: str = ".json",
table_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
Expand Down Expand Up @@ -1043,7 +1056,7 @@
def read_csv(
self,
path: str | pathlib.Path | list[str] | list[pathlib.Path],
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
has_header: bool = True,
delimiter: str = ",",
schema_infer_max_records: int = 1000,
Expand Down Expand Up @@ -1097,7 +1110,7 @@
parquet_pruning: bool = True,
file_extension: str = ".parquet",
skip_metadata: bool = True,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
file_sort_order: Sequence[Sequence[SortKey]] | None = None,
) -> DataFrame:
"""Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`.
Expand Down Expand Up @@ -1141,7 +1154,7 @@
def read_avro(
self,
path: str | pathlib.Path,
schema: pa.Schema | None = None,
schema: ArrowSchemaExportable | None = None,
file_partition_cols: list[tuple[str, str | pa.DataType]] | None = None,
file_extension: str = ".avro",
) -> DataFrame:
Expand Down Expand Up @@ -1230,4 +1243,4 @@
stacklevel=2,
)

return converted_table_partition_cols
return converted_table_partition_cols

Check failure on line 1246 in python/datafusion/context.py

View workflow job for this annotation

GitHub Actions / build

Ruff (W292)

python/datafusion/context.py:1246:46: W292 No newline at end of file
Loading