From 50d3727bbc6f2e2d9713dffb17f35e2cb5254512 Mon Sep 17 00:00:00 2001 From: Cody Fincher Date: Wed, 26 Nov 2025 19:26:55 +0000 Subject: [PATCH 1/2] feat(result): add conversion methods to Arrow, Pandas, and Polars DataFrames --- sqlspec/core/result.py | 71 ++++++++++ tests/unit/test_sql_result_conversion.py | 159 +++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 tests/unit/test_sql_result_conversion.py diff --git a/sqlspec/core/result.py b/sqlspec/core/result.py index 349fc9b7a..0244bcf39 100644 --- a/sqlspec/core/result.py +++ b/sqlspec/core/result.py @@ -574,6 +574,77 @@ def scalar_or_none(self) -> Any: return next(iter(row.values())) + def to_arrow(self) -> "ArrowTable": + """Convert result data to Apache Arrow Table. + + Returns: + Arrow Table containing the result data. + + Raises: + ValueError: If no data available. + + Examples: + >>> result = session.select("SELECT * FROM users") + >>> table = result.to_arrow() + >>> print(table.num_rows) + 3 + """ + if self.data is None: + msg = "No data available" + raise ValueError(msg) + + from sqlspec.utils.arrow_helpers import convert_dict_to_arrow + + return convert_dict_to_arrow(self.data, return_format="table") + + def to_pandas(self) -> "PandasDataFrame": + """Convert result data to pandas DataFrame. + + Returns: + pandas DataFrame containing the result data. + + Raises: + ValueError: If no data available. + + Examples: + >>> result = session.select("SELECT * FROM users") + >>> df = result.to_pandas() + >>> print(df.head()) + """ + if self.data is None: + msg = "No data available" + raise ValueError(msg) + + ensure_pandas() + + import pandas as pd + + return pd.DataFrame(self.data) + + def to_polars(self) -> "PolarsDataFrame": + """Convert result data to Polars DataFrame. + + Returns: + Polars DataFrame containing the result data. + + Raises: + ValueError: If no data available. + + Examples: + >>> result = session.select("SELECT * FROM users") + >>> df = result.to_polars() + >>> print(df.head()) + """ + if self.data is None: + msg = "No data available" + raise ValueError(msg) + + ensure_polars() + + import polars as pl + + return pl.DataFrame(self.data) + def write_to_storage_sync( self, destination: "StorageDestination", diff --git a/tests/unit/test_sql_result_conversion.py b/tests/unit/test_sql_result_conversion.py new file mode 100644 index 000000000..9b0485873 --- /dev/null +++ b/tests/unit/test_sql_result_conversion.py @@ -0,0 +1,159 @@ +"""Tests for SQLResult DataFrame conversion methods.""" + +from typing import Any + +import pytest + +from sqlspec.core import SQL, SQLResult +from sqlspec.typing import PYARROW_INSTALLED + + +@pytest.fixture +def sample_data() -> list[dict[str, Any]]: + """Create sample dict data for testing.""" + return [ + {"id": 1, "name": "Alice", "age": 30}, + {"id": 2, "name": "Bob", "age": 25}, + {"id": 3, "name": "Charlie", "age": 35}, + ] + + +@pytest.fixture +def sql_result(sample_data: list[dict[str, Any]]) -> SQLResult: + """Create an SQLResult with sample data.""" + stmt = SQL("SELECT * FROM users") + return SQLResult(statement=stmt, data=sample_data, rows_affected=3) + + +@pytest.mark.skipif(not PYARROW_INSTALLED, reason="pyarrow not installed") +def test_sql_result_to_arrow(sql_result: SQLResult) -> None: + """Test converting SQLResult to Arrow Table.""" + import pyarrow as pa + + table = sql_result.to_arrow() + + assert isinstance(table, pa.Table) + assert table.num_rows == 3 + assert table.column_names == ["id", "name", "age"] + + +@pytest.mark.skipif(not PYARROW_INSTALLED, reason="pyarrow not installed") +def test_sql_result_to_arrow_empty_data() -> None: + """Test to_arrow() with empty data list.""" + import pyarrow as pa + + stmt = SQL("SELECT * FROM users WHERE 1=0") + result = SQLResult(statement=stmt, data=[]) + + table = result.to_arrow() + + assert isinstance(table, pa.Table) + assert table.num_rows == 0 + + +def test_sql_result_to_pandas(sql_result: SQLResult) -> None: + """Test converting SQLResult to pandas DataFrame.""" + pandas = pytest.importorskip("pandas") + + df = sql_result.to_pandas() + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 + assert list(df.columns) == ["id", "name", "age"] + assert df["name"].tolist() == ["Alice", "Bob", "Charlie"] + + +def test_sql_result_to_pandas_empty_data() -> None: + """Test to_pandas() with empty data list.""" + pandas = pytest.importorskip("pandas") + + stmt = SQL("SELECT * FROM users WHERE 1=0") + result = SQLResult(statement=stmt, data=[]) + + df = result.to_pandas() + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 0 + + +def test_sql_result_to_pandas_with_null_values() -> None: + """Test to_pandas() correctly handles NULL values.""" + pandas = pytest.importorskip("pandas") + + data: list[dict[str, Any]] = [ + {"id": 1, "name": "Alice", "email": "alice@example.com"}, + {"id": 2, "name": "Bob", "email": None}, + {"id": 3, "name": None, "email": "charlie@example.com"}, + ] + stmt = SQL("SELECT * FROM users") + result = SQLResult(statement=stmt, data=data) + + df = result.to_pandas() + + assert pandas.isna(df.loc[1, "email"]) + assert pandas.isna(df.loc[2, "name"]) + + +def test_sql_result_to_polars(sql_result: SQLResult) -> None: + """Test converting SQLResult to Polars DataFrame.""" + polars = pytest.importorskip("polars") + + df = sql_result.to_polars() + + assert isinstance(df, polars.DataFrame) + assert len(df) == 3 + assert df.columns == ["id", "name", "age"] + assert df["name"].to_list() == ["Alice", "Bob", "Charlie"] + + +def test_sql_result_to_polars_empty_data() -> None: + """Test to_polars() with empty data list.""" + polars = pytest.importorskip("polars") + + stmt = SQL("SELECT * FROM users WHERE 1=0") + result = SQLResult(statement=stmt, data=[]) + + df = result.to_polars() + + assert isinstance(df, polars.DataFrame) + assert len(df) == 0 + + +def test_sql_result_to_polars_with_null_values() -> None: + """Test to_polars() correctly handles NULL values.""" + pytest.importorskip("polars") + + data: list[dict[str, Any]] = [ + {"id": 1, "name": "Alice", "email": "alice@example.com"}, + {"id": 2, "name": "Bob", "email": None}, + {"id": 3, "name": None, "email": "charlie@example.com"}, + ] + stmt = SQL("SELECT * FROM users") + result = SQLResult(statement=stmt, data=data) + + df = result.to_polars() + + assert df["email"][1] is None + assert df["name"][2] is None + + +def test_sql_result_methods_with_none_data_raise() -> None: + """Test that methods raise ValueError when data is None.""" + stmt = SQL("SELECT * FROM users") + result = SQLResult(statement=stmt, data=None) + + with pytest.raises(ValueError, match="No data available"): + result.to_pandas() + + with pytest.raises(ValueError, match="No data available"): + result.to_polars() + + +@pytest.mark.skipif(not PYARROW_INSTALLED, reason="pyarrow not installed") +def test_sql_result_to_arrow_with_none_data_raises() -> None: + """Test that to_arrow() raises ValueError when data is None.""" + stmt = SQL("SELECT * FROM users") + result = SQLResult(statement=stmt, data=None) + + with pytest.raises(ValueError, match="No data available"): + result.to_arrow() From a8ac53defb9ca8862abb917ab24b97bfe537f759 Mon Sep 17 00:00:00 2001 From: Cody Fincher Date: Wed, 26 Nov 2025 19:34:09 +0000 Subject: [PATCH 2/2] feat(arrow): add conversion methods for SQLResult to DataFrames --- docs/guides/architecture/arrow-integration.md | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docs/guides/architecture/arrow-integration.md b/docs/guides/architecture/arrow-integration.md index 4ab46da47..16c37aead 100644 --- a/docs/guides/architecture/arrow-integration.md +++ b/docs/guides/architecture/arrow-integration.md @@ -611,6 +611,29 @@ result = await session.select_to_arrow("SELECT * FROM users") df = result.to_pandas() # Zero-copy if native path ``` +### Converting SQLResult to DataFrames + +Both `SQLResult` and `ArrowResult` support convenient conversion methods for data science workflows: + +```python +# Standard execute returns SQLResult +result = await session.execute("SELECT * FROM users") + +# Convert to pandas DataFrame +df = result.to_pandas() + +# Convert to Polars DataFrame +pl_df = result.to_polars() + +# Convert to Arrow Table (for SQLResult only) +arrow_table = result.to_arrow() +``` + +**Key differences**: +- `SQLResult.to_arrow()` performs dict→Arrow conversion +- `ArrowResult` data is already in Arrow format +- Both provide consistent API for pandas and Polars conversion + ### From DataFrame-Centric to Arrow-First **Before**: