ToucanToco
diff --git a/‎Cargo.lock‎
Lines changed: 2 additions & 2 deletions b/‎Cargo.lock‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 4 additions & 5 deletions b/‎Cargo.toml‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎README.md‎
Lines changed: 85 additions & 0 deletions b/‎README.md‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 6 additions & 5 deletions b/‎pyproject.toml‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎python/fastexcel/__init__.py‎
Lines changed: 72 additions & 19 deletions b/‎python/fastexcel/__init__.py‎
Lines changed: 72 additions & 19 deletions
diff --git a/‎python/fastexcel/_fastexcel.pyi‎
Lines changed: 14 additions & 5 deletions b/‎python/fastexcel/_fastexcel.pyi‎
Lines changed: 14 additions & 5 deletions
@@ -26,9 +26,7 @@ crate-type = ["cdylib"]
 
 [dependencies]
 # There's a lot of stuff we don't want here, such as serde support
-arrow = { version = "^55.2.0", default-features = false, features = [
-    "pyarrow",
-] }
+arrow = { version = "^55.2.0", default-features = false, features = ["ffi"] }
 calamine = { version = "^0.30.0", features = ["dates"] }
 chrono = { version = "^0.4.41", default-features = false }
 log = "0.4.27"
@@ -42,9 +40,10 @@ rstest = { version = "^0.26.1", default-features = false }
 # NOTE: This is a hack to bypass pyo3 limitations when testing:
 # https://pyo3.rs/v0.22.3/faq.html#i-cant-run-cargo-test-or-i-cant-build-in-a-cargo-workspace-im-having-linker-issues-like-symbol-not-found-or-undefined-reference-to-_pyexc_systemerror
 [features]
+default = ["extension-module", "pyarrow"]
 extension-module = ["pyo3/extension-module"]
-default = ["extension-module"]
+pyarrow = ["arrow/pyarrow"]
 # feature for tests only. This makes Python::with_gil auto-initialize Python
 # interpreters, which allows us to instantiate Python objects in tests
 # (see https://pyo3.rs/v0.22.3/features#auto-initialize)
-tests = ["pyo3/auto-initialize"]
+tests = ["pyo3/auto-initialize", "pyarrow"]
@@ -6,6 +6,91 @@ Based on [`calamine`](https://github.com/tafia/calamine) and [Apache Arrow](http
 
 Docs available [here](https://fastexcel.toucantoco.dev/).
 
+## Installation
+
+```bash
+# Lightweight installation (no pyarrow dependency)
+pip install fastexcel
+
+# With Polars support only (no pyarrow needed)
+pip install fastexcel[polars]
+
+# With pandas support (includes pyarrow)
+pip install fastexcel[pandas]
+
+# With pyarrow support
+pip install fastexcel[pyarrow]
+
+# With all integrations
+pip install fastexcel[pandas,polars]
+```
+
+## Quick Start
+
+### Modern usage (recommended)
+
+FastExcel supports the [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) for zero-copy data exchange with libraries like Polars, without requiring pyarrow as a dependency.
+Use fastexcel with any Arrow-compatible library without requiring pyarrow.
+
+```python
+import fastexcel
+
+# Load an Excel file
+reader = fastexcel.read_excel("data.xlsx")
+sheet = reader.load_sheet(0)  # Load first sheet
+
+# Use with Polars (zero-copy, no pyarrow needed)
+import polars as pl
+df = pl.DataFrame(sheet)  # Direct PyCapsule interface
+print(df)
+
+# Or use the to_polars() method (also via PyCapsule)
+df = sheet.to_polars()
+print(df)
+
+# Or access the raw Arrow data via PyCapsule interface
+schema = sheet.__arrow_c_schema__()
+array_data = sheet.__arrow_c_array__()
+```
+
+### Traditional usage (with pandas/pyarrow)
+
+```python
+import fastexcel
+
+reader = fastexcel.read_excel("data.xlsx")
+sheet = reader.load_sheet(0)
+
+# Convert to pandas (requires `pandas` extra)
+df = sheet.to_pandas()
+
+# Or get pyarrow RecordBatch directly
+record_batch = sheet.to_arrow()
+```
+
+### Working with tables
+
+```python
+reader = fastexcel.read_excel("data.xlsx")
+
+# List available tables
+tables = reader.table_names()
+print(f"Available tables: {tables}")
+
+# Load a specific table
+table = reader.load_table("MyTable")
+df = pl.DataFrame(table)  # Zero-copy via PyCapsule, no pyarrow needed
+```
+
+## Key Features
+
+- **Zero-copy data exchange** via [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html)
+- **Flexible dependencies** - use with Polars (no PyArrow needed) or Pandas (includes PyArrow)
+- **Seamless Polars integration** - `pl.DataFrame(sheet)` and `sheet.to_polars()` work without PyArrow via PyCapsule interface
+- **High performance** - written in Rust with [calamine](https://github.com/tafia/calamine) and [Apache Arrow](https://arrow.apache.org/)
+- **Memory efficient** - lazy loading and optional eager evaluation
+- **Type safety** - automatic type inference with manual override options
+
 ## Dev setup
 
 ### Prerequisites
 
@@ -23,16 +23,17 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
     "Programming Language :: Python :: Implementation :: CPython",
 ]
-dependencies = [
-    "pyarrow>=8.0.0",
-    "typing-extensions>=4.0.0; python_version<'3.10'",
-]
+dependencies = ["typing-extensions>=4.0.0; python_version<'3.10'"]
 dynamic = ["version"]
 
 [project.optional-dependencies]
-pandas = ["pandas>=1.4.4"]
+pyarrow = ["pyarrow>=8.0.0"]
+pandas = ["pandas>=1.4.4", "pyarrow>=8.0.0"]
 polars = ["polars>=0.16.14"]
 
+[dependency-groups]
+test = ["pytest>=7.0.0", "pyarrow>=8.0.0", "pandas>=1.4.4", "polars>=0.16.14"]
+
 [project.urls]
 "Source Code" = "https://github.com/ToucanToco/fastexcel"
 Issues = "https://github.com/ToucanToco/fastexcel"
 
@@ -12,11 +12,18 @@
 if TYPE_CHECKING:
     import pandas as pd
     import polars as pl
+    import pyarrow as pa
 
 from os.path import expanduser
 from pathlib import Path
 
-import pyarrow as pa
+try:
+    import pyarrow as pa
+
+    _PYARROW_AVAILABLE = True
+except ImportError:
+    pa = None
+    _PYARROW_AVAILABLE = False
 
 from ._fastexcel import (
     ArrowError,
@@ -46,14 +53,6 @@
 SheetVisible: TypeAlias = Literal["visible", "hidden", "veryhidden"]
 
 
-def _recordbatch_to_polars(rb: pa.RecordBatch) -> pl.DataFrame:
-    import polars as pl
-
-    df = pl.from_arrow(data=rb)
-    assert isinstance(df, pl.DataFrame)
-    return df
-
-
 class ExcelSheet:
     """A class representing a single sheet in an Excel File"""
 
@@ -99,16 +98,24 @@ def visible(self) -> SheetVisible:
         """The visibility of the sheet"""
         return self._sheet.visible
 
-    def to_arrow(self) -> pa.RecordBatch:
+    def to_arrow(self) -> "pa.RecordBatch":
         """Converts the sheet to a pyarrow `RecordBatch`"""
+        if not _PYARROW_AVAILABLE:
+            raise ImportError(
+                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
+            )
         return self._sheet.to_arrow()
 
-    def to_arrow_with_errors(self) -> tuple[pa.RecordBatch, CellErrors | None]:
+    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors | None]":
         """Converts the sheet to a pyarrow `RecordBatch` with error information.
 
         Stores the positions of any values that cannot be parsed as the specified type and were
         therefore converted to None.
         """
+        if not _PYARROW_AVAILABLE:
+            raise ImportError(
+                "pyarrow is required for to_arrow_with_errors(). Install with: pip install 'fastexcel[pyarrow]'"  # noqa: E501
+            )
         rb, cell_errors = self._sheet.to_arrow_with_errors()
         if not cell_errors.errors:
             return (rb, None)
@@ -119,15 +126,36 @@ def to_pandas(self) -> "pd.DataFrame":
 
         Requires the `pandas` extra to be installed.
         """
-        # We know for sure that the sheet will yield exactly one RecordBatch
+        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
+        # which we don't implement. Using pyarrow conversion for now.
+        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
         return self.to_arrow().to_pandas()
 
     def to_polars(self) -> "pl.DataFrame":
         """Converts the sheet to a Polars `DataFrame`.
 
+        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
         Requires the `polars` extra to be installed.
         """
-        return _recordbatch_to_polars(self.to_arrow())
+        import polars as pl
+
+        return pl.DataFrame(self)
+
+    def __arrow_c_schema__(self) -> object:
+        """Arrow PyCapsule Interface: Export schema as a PyCapsule.
+
+        This method allows zero-copy data exchange with Arrow-compatible libraries
+        like Polars without requiring PyArrow as a dependency.
+        """
+        return self._sheet.__arrow_c_schema__()
+
+    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
+        """Arrow PyCapsule Interface: Export array and schema as PyCapsules.
+
+        Returns data as a tuple of (schema_capsule, array_capsule) for zero-copy
+        data exchange with Arrow-compatible libraries.
+        """
+        return self._sheet.__arrow_c_array__(requested_schema)
 
     def __repr__(self) -> str:
         return self._sheet.__repr__()
@@ -183,24 +211,49 @@ def specified_dtypes(self) -> DTypeMap | None:
         """The dtypes specified for the table"""
         return self._table.specified_dtypes
 
-    def to_arrow(self) -> pa.RecordBatch:
+    def to_arrow(self) -> "pa.RecordBatch":
         """Converts the table to a pyarrow `RecordBatch`"""
+        if not _PYARROW_AVAILABLE:
+            raise ImportError(
+                "pyarrow is required for to_arrow(). Install with: pip install 'fastexcel[pyarrow]'"
+            )
         return self._table.to_arrow()
 
     def to_pandas(self) -> "pd.DataFrame":
         """Converts the table to a Pandas `DataFrame`.
 
         Requires the `pandas` extra to be installed.
         """
-        # We know for sure that the table will yield exactly one RecordBatch
+        # Note: pandas PyCapsule interface requires __dataframe__ or __arrow_c_stream__
+        # which we don't implement. Using pyarrow conversion for now.
+        # (see https://pandas.pydata.org/docs/reference/api/pandas.api.interchange.from_dataframe.html)
         return self.to_arrow().to_pandas()
 
     def to_polars(self) -> "pl.DataFrame":
         """Converts the table to a Polars `DataFrame`.
 
+        Uses the Arrow PyCapsule Interface for zero-copy data exchange.
         Requires the `polars` extra to be installed.
         """
-        return _recordbatch_to_polars(self.to_arrow())
+        import polars as pl
+
+        return pl.DataFrame(self)
+
+    def __arrow_c_schema__(self) -> object:
+        """Arrow PyCapsule Interface: Export schema as a PyCapsule.
+
+        This method allows zero-copy data exchange with Arrow-compatible libraries
+        like Polars without requiring PyArrow as a dependency.
+        """
+        return self._table.__arrow_c_schema__()
+
+    def __arrow_c_array__(self, requested_schema: object | None = None) -> tuple[object, object]:
+        """Arrow PyCapsule Interface: Export array and schema as PyCapsules.
+
+        Returns data as a tuple of (schema_capsule, array_capsule) for zero-copy
+        data exchange with Arrow-compatible libraries.
+        """
+        return self._table.__arrow_c_array__(requested_schema)
 
 
 class ExcelReader:
@@ -331,7 +384,7 @@ def load_table(
         | None = None,
         dtypes: DType | DTypeMap | None = None,
         eager: Literal[True] = ...,
-    ) -> pa.RecordBatch: ...
+    ) -> "pa.RecordBatch": ...
     def load_table(
         self,
         name: str,
@@ -349,7 +402,7 @@ def load_table(
         | None = None,
         dtypes: DType | DTypeMap | None = None,
         eager: bool = False,
-    ) -> ExcelTable | pa.RecordBatch:
+    ) -> "ExcelTable | pa.RecordBatch":
         """Loads a table by name.
 
         :param name: The name of the table to load.
@@ -413,7 +466,7 @@ def load_sheet_eager(
         dtype_coercion: Literal["coerce", "strict"] = "coerce",
         use_columns: list[str] | list[int] | str | None = None,
         dtypes: DType | DTypeMap | None = None,
-    ) -> pa.RecordBatch:
+    ) -> "pa.RecordBatch":
         """Loads a sheet eagerly by index or name.
 
         For xlsx files, this will be faster and more memory-efficient, as it will use
 
@@ -1,9 +1,10 @@
 from __future__ import annotations
 
 import typing
-from typing import Callable, Literal
+from typing import TYPE_CHECKING, Callable, Literal
 
-import pyarrow as pa
+if TYPE_CHECKING:
+    import pyarrow as pa
 
 DType = Literal["null", "int", "float", "string", "boolean", "datetime", "date", "duration"]
 DTypeMap = dict[str | int, DType]
@@ -82,14 +83,18 @@ class _ExcelSheet:
     @property
     def visible(self) -> SheetVisible:
         """The visibility of the sheet"""
-    def to_arrow(self) -> pa.RecordBatch:
+    def to_arrow(self) -> "pa.RecordBatch":
         """Converts the sheet to a pyarrow `RecordBatch`"""
-    def to_arrow_with_errors(self) -> tuple[pa.RecordBatch, CellErrors]:
+    def to_arrow_with_errors(self) -> "tuple[pa.RecordBatch, CellErrors]":
         """Converts the sheet to a pyarrow `RecordBatch` with error information.
 
         Stores the positions of any values that cannot be parsed as the specified type and were
         therefore converted to None.
         """
+    def __arrow_c_schema__(self) -> object:
+        """Arrow PyCapsule Interface: Export schema as a PyCapsule"""
+    def __arrow_c_array__(self, requested_schema: object = None) -> tuple[object, object]:
+        """Arrow PyCapsule Interface: Export array and schema as PyCapsules"""
 
 class _ExcelTable:
     @property
@@ -118,8 +123,12 @@ class _ExcelTable:
     @property
     def specified_dtypes(self) -> DTypeMap | None:
         """The dtypes specified for the table"""
-    def to_arrow(self) -> pa.RecordBatch:
+    def to_arrow(self) -> "pa.RecordBatch":
         """Converts the table to a pyarrow `RecordBatch`"""
+    def __arrow_c_schema__(self) -> object:
+        """Arrow PyCapsule Interface: Export schema as a PyCapsule"""
+    def __arrow_c_array__(self, requested_schema: object = None) -> tuple[object, object]:
+        """Arrow PyCapsule Interface: Export array and schema as PyCapsules"""
 
 class _ExcelReader:
     """A class representing an open Excel file and allowing to read its sheets"""