Skip to content

Commit ee4ea74

Browse files
authored
feat(result): add conversion methods to Arrow, Pandas, and Polars (#269)
Add conversion methods to Arrow, Pandas, and Polars DataFrames
1 parent d774e7e commit ee4ea74

File tree

3 files changed

+253
-0
lines changed

3 files changed

+253
-0
lines changed

docs/guides/architecture/arrow-integration.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,29 @@ result = await session.select_to_arrow("SELECT * FROM users")
611611
df = result.to_pandas() # Zero-copy if native path
612612
```
613613

614+
### Converting SQLResult to DataFrames
615+
616+
Both `SQLResult` and `ArrowResult` support convenient conversion methods for data science workflows:
617+
618+
```python
619+
# Standard execute returns SQLResult
620+
result = await session.execute("SELECT * FROM users")
621+
622+
# Convert to pandas DataFrame
623+
df = result.to_pandas()
624+
625+
# Convert to Polars DataFrame
626+
pl_df = result.to_polars()
627+
628+
# Convert to Arrow Table (for SQLResult only)
629+
arrow_table = result.to_arrow()
630+
```
631+
632+
**Key differences**:
633+
- `SQLResult.to_arrow()` performs dict→Arrow conversion
634+
- `ArrowResult` data is already in Arrow format
635+
- Both provide consistent API for pandas and Polars conversion
636+
614637
### From DataFrame-Centric to Arrow-First
615638

616639
**Before**:

sqlspec/core/result.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,77 @@ def scalar_or_none(self) -> Any:
574574

575575
return next(iter(row.values()))
576576

577+
def to_arrow(self) -> "ArrowTable":
578+
"""Convert result data to Apache Arrow Table.
579+
580+
Returns:
581+
Arrow Table containing the result data.
582+
583+
Raises:
584+
ValueError: If no data available.
585+
586+
Examples:
587+
>>> result = session.select("SELECT * FROM users")
588+
>>> table = result.to_arrow()
589+
>>> print(table.num_rows)
590+
3
591+
"""
592+
if self.data is None:
593+
msg = "No data available"
594+
raise ValueError(msg)
595+
596+
from sqlspec.utils.arrow_helpers import convert_dict_to_arrow
597+
598+
return convert_dict_to_arrow(self.data, return_format="table")
599+
600+
def to_pandas(self) -> "PandasDataFrame":
601+
"""Convert result data to pandas DataFrame.
602+
603+
Returns:
604+
pandas DataFrame containing the result data.
605+
606+
Raises:
607+
ValueError: If no data available.
608+
609+
Examples:
610+
>>> result = session.select("SELECT * FROM users")
611+
>>> df = result.to_pandas()
612+
>>> print(df.head())
613+
"""
614+
if self.data is None:
615+
msg = "No data available"
616+
raise ValueError(msg)
617+
618+
ensure_pandas()
619+
620+
import pandas as pd
621+
622+
return pd.DataFrame(self.data)
623+
624+
def to_polars(self) -> "PolarsDataFrame":
625+
"""Convert result data to Polars DataFrame.
626+
627+
Returns:
628+
Polars DataFrame containing the result data.
629+
630+
Raises:
631+
ValueError: If no data available.
632+
633+
Examples:
634+
>>> result = session.select("SELECT * FROM users")
635+
>>> df = result.to_polars()
636+
>>> print(df.head())
637+
"""
638+
if self.data is None:
639+
msg = "No data available"
640+
raise ValueError(msg)
641+
642+
ensure_polars()
643+
644+
import polars as pl
645+
646+
return pl.DataFrame(self.data)
647+
577648
def write_to_storage_sync(
578649
self,
579650
destination: "StorageDestination",
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
"""Tests for SQLResult DataFrame conversion methods."""
2+
3+
from typing import Any
4+
5+
import pytest
6+
7+
from sqlspec.core import SQL, SQLResult
8+
from sqlspec.typing import PYARROW_INSTALLED
9+
10+
11+
@pytest.fixture
12+
def sample_data() -> list[dict[str, Any]]:
13+
"""Create sample dict data for testing."""
14+
return [
15+
{"id": 1, "name": "Alice", "age": 30},
16+
{"id": 2, "name": "Bob", "age": 25},
17+
{"id": 3, "name": "Charlie", "age": 35},
18+
]
19+
20+
21+
@pytest.fixture
22+
def sql_result(sample_data: list[dict[str, Any]]) -> SQLResult:
23+
"""Create an SQLResult with sample data."""
24+
stmt = SQL("SELECT * FROM users")
25+
return SQLResult(statement=stmt, data=sample_data, rows_affected=3)
26+
27+
28+
@pytest.mark.skipif(not PYARROW_INSTALLED, reason="pyarrow not installed")
29+
def test_sql_result_to_arrow(sql_result: SQLResult) -> None:
30+
"""Test converting SQLResult to Arrow Table."""
31+
import pyarrow as pa
32+
33+
table = sql_result.to_arrow()
34+
35+
assert isinstance(table, pa.Table)
36+
assert table.num_rows == 3
37+
assert table.column_names == ["id", "name", "age"]
38+
39+
40+
@pytest.mark.skipif(not PYARROW_INSTALLED, reason="pyarrow not installed")
41+
def test_sql_result_to_arrow_empty_data() -> None:
42+
"""Test to_arrow() with empty data list."""
43+
import pyarrow as pa
44+
45+
stmt = SQL("SELECT * FROM users WHERE 1=0")
46+
result = SQLResult(statement=stmt, data=[])
47+
48+
table = result.to_arrow()
49+
50+
assert isinstance(table, pa.Table)
51+
assert table.num_rows == 0
52+
53+
54+
def test_sql_result_to_pandas(sql_result: SQLResult) -> None:
55+
"""Test converting SQLResult to pandas DataFrame."""
56+
pandas = pytest.importorskip("pandas")
57+
58+
df = sql_result.to_pandas()
59+
60+
assert isinstance(df, pandas.DataFrame)
61+
assert len(df) == 3
62+
assert list(df.columns) == ["id", "name", "age"]
63+
assert df["name"].tolist() == ["Alice", "Bob", "Charlie"]
64+
65+
66+
def test_sql_result_to_pandas_empty_data() -> None:
67+
"""Test to_pandas() with empty data list."""
68+
pandas = pytest.importorskip("pandas")
69+
70+
stmt = SQL("SELECT * FROM users WHERE 1=0")
71+
result = SQLResult(statement=stmt, data=[])
72+
73+
df = result.to_pandas()
74+
75+
assert isinstance(df, pandas.DataFrame)
76+
assert len(df) == 0
77+
78+
79+
def test_sql_result_to_pandas_with_null_values() -> None:
80+
"""Test to_pandas() correctly handles NULL values."""
81+
pandas = pytest.importorskip("pandas")
82+
83+
data: list[dict[str, Any]] = [
84+
{"id": 1, "name": "Alice", "email": "[email protected]"},
85+
{"id": 2, "name": "Bob", "email": None},
86+
{"id": 3, "name": None, "email": "[email protected]"},
87+
]
88+
stmt = SQL("SELECT * FROM users")
89+
result = SQLResult(statement=stmt, data=data)
90+
91+
df = result.to_pandas()
92+
93+
assert pandas.isna(df.loc[1, "email"])
94+
assert pandas.isna(df.loc[2, "name"])
95+
96+
97+
def test_sql_result_to_polars(sql_result: SQLResult) -> None:
98+
"""Test converting SQLResult to Polars DataFrame."""
99+
polars = pytest.importorskip("polars")
100+
101+
df = sql_result.to_polars()
102+
103+
assert isinstance(df, polars.DataFrame)
104+
assert len(df) == 3
105+
assert df.columns == ["id", "name", "age"]
106+
assert df["name"].to_list() == ["Alice", "Bob", "Charlie"]
107+
108+
109+
def test_sql_result_to_polars_empty_data() -> None:
110+
"""Test to_polars() with empty data list."""
111+
polars = pytest.importorskip("polars")
112+
113+
stmt = SQL("SELECT * FROM users WHERE 1=0")
114+
result = SQLResult(statement=stmt, data=[])
115+
116+
df = result.to_polars()
117+
118+
assert isinstance(df, polars.DataFrame)
119+
assert len(df) == 0
120+
121+
122+
def test_sql_result_to_polars_with_null_values() -> None:
123+
"""Test to_polars() correctly handles NULL values."""
124+
pytest.importorskip("polars")
125+
126+
data: list[dict[str, Any]] = [
127+
{"id": 1, "name": "Alice", "email": "[email protected]"},
128+
{"id": 2, "name": "Bob", "email": None},
129+
{"id": 3, "name": None, "email": "[email protected]"},
130+
]
131+
stmt = SQL("SELECT * FROM users")
132+
result = SQLResult(statement=stmt, data=data)
133+
134+
df = result.to_polars()
135+
136+
assert df["email"][1] is None
137+
assert df["name"][2] is None
138+
139+
140+
def test_sql_result_methods_with_none_data_raise() -> None:
141+
"""Test that methods raise ValueError when data is None."""
142+
stmt = SQL("SELECT * FROM users")
143+
result = SQLResult(statement=stmt, data=None)
144+
145+
with pytest.raises(ValueError, match="No data available"):
146+
result.to_pandas()
147+
148+
with pytest.raises(ValueError, match="No data available"):
149+
result.to_polars()
150+
151+
152+
@pytest.mark.skipif(not PYARROW_INSTALLED, reason="pyarrow not installed")
153+
def test_sql_result_to_arrow_with_none_data_raises() -> None:
154+
"""Test that to_arrow() raises ValueError when data is None."""
155+
stmt = SQL("SELECT * FROM users")
156+
result = SQLResult(statement=stmt, data=None)
157+
158+
with pytest.raises(ValueError, match="No data available"):
159+
result.to_arrow()

0 commit comments

Comments
 (0)