Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions docs/guides/architecture/arrow-integration.md
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,29 @@ result = await session.select_to_arrow("SELECT * FROM users")
df = result.to_pandas() # Zero-copy if native path
```

### Converting SQLResult to DataFrames

Both `SQLResult` and `ArrowResult` support convenient conversion methods for data science workflows:

```python
# Standard execute returns SQLResult
result = await session.execute("SELECT * FROM users")

# Convert to pandas DataFrame
df = result.to_pandas()

# Convert to Polars DataFrame
pl_df = result.to_polars()

# Convert to Arrow Table (for SQLResult only)
arrow_table = result.to_arrow()
```

**Key differences**:
- `SQLResult.to_arrow()` performs dict→Arrow conversion
- `ArrowResult` data is already in Arrow format
- Both provide consistent API for pandas and Polars conversion

### From DataFrame-Centric to Arrow-First

**Before**:
Expand Down
71 changes: 71 additions & 0 deletions sqlspec/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,77 @@ def scalar_or_none(self) -> Any:

return next(iter(row.values()))

def to_arrow(self) -> "ArrowTable":
"""Convert result data to Apache Arrow Table.

Returns:
Arrow Table containing the result data.

Raises:
ValueError: If no data available.

Examples:
>>> result = session.select("SELECT * FROM users")
>>> table = result.to_arrow()
>>> print(table.num_rows)
3
"""
if self.data is None:
msg = "No data available"
raise ValueError(msg)

from sqlspec.utils.arrow_helpers import convert_dict_to_arrow

return convert_dict_to_arrow(self.data, return_format="table")

def to_pandas(self) -> "PandasDataFrame":
"""Convert result data to pandas DataFrame.

Returns:
pandas DataFrame containing the result data.

Raises:
ValueError: If no data available.

Examples:
>>> result = session.select("SELECT * FROM users")
>>> df = result.to_pandas()
>>> print(df.head())
"""
if self.data is None:
msg = "No data available"
raise ValueError(msg)

ensure_pandas()

import pandas as pd

return pd.DataFrame(self.data)

def to_polars(self) -> "PolarsDataFrame":
"""Convert result data to Polars DataFrame.

Returns:
Polars DataFrame containing the result data.

Raises:
ValueError: If no data available.

Examples:
>>> result = session.select("SELECT * FROM users")
>>> df = result.to_polars()
>>> print(df.head())
"""
if self.data is None:
msg = "No data available"
raise ValueError(msg)

ensure_polars()

import polars as pl

return pl.DataFrame(self.data)

def write_to_storage_sync(
self,
destination: "StorageDestination",
Expand Down
159 changes: 159 additions & 0 deletions tests/unit/test_sql_result_conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
"""Tests for SQLResult DataFrame conversion methods."""

from typing import Any

import pytest

from sqlspec.core import SQL, SQLResult
from sqlspec.typing import PYARROW_INSTALLED


@pytest.fixture
def sample_data() -> list[dict[str, Any]]:
"""Create sample dict data for testing."""
return [
{"id": 1, "name": "Alice", "age": 30},
{"id": 2, "name": "Bob", "age": 25},
{"id": 3, "name": "Charlie", "age": 35},
]


@pytest.fixture
def sql_result(sample_data: list[dict[str, Any]]) -> SQLResult:
"""Create an SQLResult with sample data."""
stmt = SQL("SELECT * FROM users")
return SQLResult(statement=stmt, data=sample_data, rows_affected=3)


@pytest.mark.skipif(not PYARROW_INSTALLED, reason="pyarrow not installed")
def test_sql_result_to_arrow(sql_result: SQLResult) -> None:
"""Test converting SQLResult to Arrow Table."""
import pyarrow as pa

table = sql_result.to_arrow()

assert isinstance(table, pa.Table)
assert table.num_rows == 3
assert table.column_names == ["id", "name", "age"]


@pytest.mark.skipif(not PYARROW_INSTALLED, reason="pyarrow not installed")
def test_sql_result_to_arrow_empty_data() -> None:
"""Test to_arrow() with empty data list."""
import pyarrow as pa

stmt = SQL("SELECT * FROM users WHERE 1=0")
result = SQLResult(statement=stmt, data=[])

table = result.to_arrow()

assert isinstance(table, pa.Table)
assert table.num_rows == 0


def test_sql_result_to_pandas(sql_result: SQLResult) -> None:
"""Test converting SQLResult to pandas DataFrame."""
pandas = pytest.importorskip("pandas")

df = sql_result.to_pandas()

assert isinstance(df, pandas.DataFrame)
assert len(df) == 3
assert list(df.columns) == ["id", "name", "age"]
assert df["name"].tolist() == ["Alice", "Bob", "Charlie"]


def test_sql_result_to_pandas_empty_data() -> None:
"""Test to_pandas() with empty data list."""
pandas = pytest.importorskip("pandas")

stmt = SQL("SELECT * FROM users WHERE 1=0")
result = SQLResult(statement=stmt, data=[])

df = result.to_pandas()

assert isinstance(df, pandas.DataFrame)
assert len(df) == 0


def test_sql_result_to_pandas_with_null_values() -> None:
"""Test to_pandas() correctly handles NULL values."""
pandas = pytest.importorskip("pandas")

data: list[dict[str, Any]] = [
{"id": 1, "name": "Alice", "email": "[email protected]"},
{"id": 2, "name": "Bob", "email": None},
{"id": 3, "name": None, "email": "[email protected]"},
]
stmt = SQL("SELECT * FROM users")
result = SQLResult(statement=stmt, data=data)

df = result.to_pandas()

assert pandas.isna(df.loc[1, "email"])
assert pandas.isna(df.loc[2, "name"])


def test_sql_result_to_polars(sql_result: SQLResult) -> None:
"""Test converting SQLResult to Polars DataFrame."""
polars = pytest.importorskip("polars")

df = sql_result.to_polars()

assert isinstance(df, polars.DataFrame)
assert len(df) == 3
assert df.columns == ["id", "name", "age"]
assert df["name"].to_list() == ["Alice", "Bob", "Charlie"]


def test_sql_result_to_polars_empty_data() -> None:
"""Test to_polars() with empty data list."""
polars = pytest.importorskip("polars")

stmt = SQL("SELECT * FROM users WHERE 1=0")
result = SQLResult(statement=stmt, data=[])

df = result.to_polars()

assert isinstance(df, polars.DataFrame)
assert len(df) == 0


def test_sql_result_to_polars_with_null_values() -> None:
"""Test to_polars() correctly handles NULL values."""
pytest.importorskip("polars")

data: list[dict[str, Any]] = [
{"id": 1, "name": "Alice", "email": "[email protected]"},
{"id": 2, "name": "Bob", "email": None},
{"id": 3, "name": None, "email": "[email protected]"},
]
stmt = SQL("SELECT * FROM users")
result = SQLResult(statement=stmt, data=data)

df = result.to_polars()

assert df["email"][1] is None
assert df["name"][2] is None


def test_sql_result_methods_with_none_data_raise() -> None:
"""Test that methods raise ValueError when data is None."""
stmt = SQL("SELECT * FROM users")
result = SQLResult(statement=stmt, data=None)

with pytest.raises(ValueError, match="No data available"):
result.to_pandas()

with pytest.raises(ValueError, match="No data available"):
result.to_polars()


@pytest.mark.skipif(not PYARROW_INSTALLED, reason="pyarrow not installed")
def test_sql_result_to_arrow_with_none_data_raises() -> None:
"""Test that to_arrow() raises ValueError when data is None."""
stmt = SQL("SELECT * FROM users")
result = SQLResult(statement=stmt, data=None)

with pytest.raises(ValueError, match="No data available"):
result.to_arrow()