posit-dev
diff --git a/‎pkg-py/CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions b/‎pkg-py/CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎pkg-py/src/querychat/_datasource.py‎
Lines changed: 32 additions & 41 deletions b/‎pkg-py/src/querychat/_datasource.py‎
Lines changed: 32 additions & 41 deletions
diff --git a/‎pkg-py/src/querychat/_df_compat.py‎
Lines changed: 70 additions & 0 deletions b/‎pkg-py/src/querychat/_df_compat.py‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎pkg-py/src/querychat/_querychat.py‎
Lines changed: 14 additions & 5 deletions b/‎pkg-py/src/querychat/_querychat.py‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎pkg-py/src/querychat/_querychat_module.py‎
Lines changed: 4 additions & 4 deletions b/‎pkg-py/src/querychat/_querychat_module.py‎
Lines changed: 4 additions & 4 deletions
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [UNRELEASED]
 
+### Breaking Changes
+
+* Methods like `execute_query()`, `get_data()`, and `df()` now return a `narwhals.DataFrame` instead of a `pandas.DataFrame`. This querychat to drop its `pandas` dependency, and for you to use any `narwhals`-compatible dataframe of your choosing.
+  * If this breaks existing code, note you can call `.to_native()` on the new dataframe value to get your `pandas` dataframe back.
+  * Note that `polars` or `pandas` will be needed to realize a `sqlalchemy` connection query as a dataframe. Install with `pip install querychat[pandas]` or `pip install querychat[polars]`
+
 ### New features
 
 * `QueryChat.sidebar()`, `QueryChat.ui()`, and `QueryChat.server()` now support an optional `id` parameter to create multiple chat instances from a single `QueryChat` object. (#172)
 
@@ -5,12 +5,12 @@
 
 import duckdb
 import narwhals.stable.v1 as nw
-import pandas as pd
 from sqlalchemy import inspect, text
 from sqlalchemy.sql import sqltypes
 
+from ._df_compat import duckdb_result_to_nw, read_sql
+
 if TYPE_CHECKING:
-    from narwhals.stable.v1.typing import IntoFrame
     from sqlalchemy.engine import Connection, Engine
 
 
@@ -53,7 +53,7 @@ def get_schema(self, *, categorical_threshold: int) -> str:
         ...
 
     @abstractmethod
-    def execute_query(self, query: str) -> pd.DataFrame:
+    def execute_query(self, query: str) -> nw.DataFrame:
         """
         Execute SQL query and return results as DataFrame.
 
@@ -65,20 +65,20 @@ def execute_query(self, query: str) -> pd.DataFrame:
         Returns
         -------
         :
-            Query results as a pandas DataFrame
+            Query results as a narwhals DataFrame
 
         """
         ...
 
     @abstractmethod
-    def get_data(self) -> pd.DataFrame:
+    def get_data(self) -> nw.DataFrame:
         """
         Return the unfiltered data as a DataFrame.
 
         Returns
         -------
         :
-            The complete dataset as a pandas DataFrame
+            The complete dataset as a narwhals DataFrame
 
         """
         ...
@@ -99,27 +99,26 @@ def cleanup(self) -> None:
 
 
 class DataFrameSource(DataSource):
-    """A DataSource implementation that wraps a pandas DataFrame using DuckDB."""
+    """A DataSource implementation that wraps a DataFrame using DuckDB."""
 
-    _df: nw.DataFrame | nw.LazyFrame
+    _df: nw.DataFrame
 
-    def __init__(self, df: IntoFrame, table_name: str):
+    def __init__(self, df: nw.DataFrame, table_name: str):
         """
-        Initialize with a pandas DataFrame.
+        Initialize with a DataFrame.
 
         Parameters
         ----------
         df
-            The DataFrame to wrap
+            The DataFrame to wrap (pandas, polars, or any narwhals-compatible frame)
         table_name
             Name of the table in SQL queries
 
         """
         self._conn = duckdb.connect(database=":memory:")
-        self._df = nw.from_native(df)
+        self._df = nw.from_native(df) if not isinstance(df, nw.DataFrame) else df
         self.table_name = table_name
-        # TODO(@gadenbuie): If the data frame is already SQL-backed, maybe we shouldn't be making a new copy here.
-        self._conn.register(table_name, self._df.lazy().collect().to_pandas())
+        self._conn.register(table_name, self._df.to_native())
 
     def get_db_type(self) -> str:
         """
@@ -151,16 +150,8 @@ def get_schema(self, *, categorical_threshold: int) -> str:
         """
         schema = [f"Table: {self.table_name}", "Columns:"]
 
-        # Ensure we're working with a DataFrame, not a LazyFrame
-        ndf = (
-            self._df.head(10).collect()
-            if isinstance(self._df, nw.LazyFrame)
-            else self._df
-        )
-
-        for column in ndf.columns:
-            # Map pandas dtypes to SQL-like types
-            dtype = ndf[column].dtype
+        for column in self._df.columns:
+            dtype = self._df[column].dtype
             if dtype.is_integer():
                 sql_type = "INTEGER"
             elif dtype.is_float():
@@ -176,17 +167,14 @@ def get_schema(self, *, categorical_threshold: int) -> str:
 
             column_info = [f"- {column} ({sql_type})"]
 
-            # For TEXT columns, check if they're categorical
             if sql_type == "TEXT":
-                unique_values = ndf[column].drop_nulls().unique()
+                unique_values = self._df[column].drop_nulls().unique()
                 if unique_values.len() <= categorical_threshold:
                     categories = unique_values.to_list()
                     categories_str = ", ".join([f"'{c}'" for c in categories])
                     column_info.append(f"  Categorical values: {categories_str}")
-
-            # For numeric columns, include range
             elif sql_type in ["INTEGER", "FLOAT", "DATE", "TIME"]:
-                rng = ndf[column].min(), ndf[column].max()
+                rng = self._df[column].min(), self._df[column].max()
                 if rng[0] is None and rng[1] is None:
                     column_info.append("  Range: NULL to NULL")
                 else:
@@ -196,10 +184,12 @@ def get_schema(self, *, categorical_threshold: int) -> str:
 
         return "\n".join(schema)
 
-    def execute_query(self, query: str) -> pd.DataFrame:
+    def execute_query(self, query: str) -> nw.DataFrame:
         """
         Execute query using DuckDB.
 
+        Uses polars if available, otherwise falls back to pandas.
+
         Parameters
         ----------
         query
@@ -208,23 +198,22 @@ def execute_query(self, query: str) -> pd.DataFrame:
         Returns
         -------
         :
-            Query results as pandas DataFrame
+            Query results as narwhals DataFrame
 
         """
-        return self._conn.execute(query).df()
+        return duckdb_result_to_nw(self._conn.execute(query))
 
-    def get_data(self) -> pd.DataFrame:
+    def get_data(self) -> nw.DataFrame:
         """
         Return the unfiltered data as a DataFrame.
 
         Returns
         -------
         :
-            The complete dataset as a pandas DataFrame
+            The complete dataset as a narwhals DataFrame
 
         """
-        # TODO(@gadenbuie): This should just return `self._df` and not a pandas DataFrame
-        return self._df.lazy().collect().to_pandas()
+        return self._df
 
     def cleanup(self) -> None:
         """
@@ -412,10 +401,12 @@ def get_schema(self, *, categorical_threshold: int) -> str:  # noqa: PLR0912
 
         return "\n".join(schema)
 
-    def execute_query(self, query: str) -> pd.DataFrame:
+    def execute_query(self, query: str) -> nw.DataFrame:
         """
         Execute SQL query and return results as DataFrame.
 
+        Uses polars if available, otherwise falls back to pandas.
+
         Parameters
         ----------
         query
@@ -424,20 +415,20 @@ def execute_query(self, query: str) -> pd.DataFrame:
         Returns
         -------
         :
-            Query results as pandas DataFrame
+            Query results as narwhals DataFrame
 
         """
         with self._get_connection() as conn:
-            return pd.read_sql_query(text(query), conn)
+            return read_sql(text(query), conn)
 
-    def get_data(self) -> pd.DataFrame:
+    def get_data(self) -> nw.DataFrame:
         """
         Return the unfiltered data as a DataFrame.
 
         Returns
         -------
         :
-            The complete dataset as a pandas DataFrame
+            The complete dataset as a narwhals DataFrame
 
         """
         return self.execute_query(f"SELECT * FROM {self.table_name}")
 
@@ -0,0 +1,70 @@
+"""
+DataFrame compatibility: try polars first, fall back to pandas.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import duckdb
+import narwhals.stable.v1 as nw
+
+if TYPE_CHECKING:
+    from sqlalchemy.engine import Connection
+    from sqlalchemy.sql.elements import TextClause
+
+_INSTALL_MSG = "Install one with: pip install polars  OR  pip install pandas"
+
+
+def read_sql(query: TextClause, conn: Connection) -> nw.DataFrame:
+    try:
+        import polars as pl  # noqa: PLC0415  # pyright: ignore[reportMissingImports]
+
+        return nw.from_native(pl.read_database(query, connection=conn))
+    except ImportError:
+        pass
+
+    try:
+        import pandas as pd  # noqa: PLC0415  # pyright: ignore[reportMissingImports]
+
+        return nw.from_native(pd.read_sql_query(query, conn))
+    except ImportError:
+        pass
+
+    raise ImportError(f"SQLAlchemySource requires 'polars' or 'pandas'. {_INSTALL_MSG}")
+
+
+def duckdb_result_to_nw(result: duckdb.DuckDBPyResult) -> nw.DataFrame:
+    try:
+        import polars  # noqa: PLC0415  # pyright: ignore[reportMissingImports,reportUnusedImport]
+
+        return nw.from_native(result.pl())
+    except ImportError:
+        pass
+
+    try:
+        import pandas  # noqa: PLC0415  # pyright: ignore[reportMissingImports,reportUnusedImport]
+
+        return nw.from_native(result.df())
+    except ImportError:
+        pass
+
+    raise ImportError(f"DataFrameSource requires 'polars' or 'pandas'. {_INSTALL_MSG}")
+
+
+def read_csv(path: str) -> nw.DataFrame:
+    try:
+        import polars as pl  # noqa: PLC0415  # pyright: ignore[reportMissingImports]
+
+        return nw.from_native(pl.read_csv(path))
+    except ImportError:
+        pass
+
+    try:
+        import pandas as pd  # noqa: PLC0415  # pyright: ignore[reportMissingImports]
+
+        return nw.from_native(pd.read_csv(path, compression="gzip"))
+    except ImportError:
+        pass
+
+    raise ImportError(f"Loading data requires 'polars' or 'pandas'. {_INSTALL_MSG}")
@@ -8,6 +8,7 @@
 
 import chatlas
 import chevron
+import narwhals.stable.v1 as nw
 import sqlalchemy
 from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui
 from shiny.express._stub_session import ExpressStubSession
@@ -29,8 +30,7 @@
 if TYPE_CHECKING:
     from collections.abc import Callable
 
-    import pandas as pd
-    from narwhals.stable.v1.typing import IntoFrame
+    from narwhals.typing import IntoDataFrame, IntoFrame, IntoLazyFrame
 
 TOOL_GROUPS = Literal["update", "query"]
 
@@ -797,14 +797,14 @@ def __init__(
             enable_bookmarking=enable,
         )
 
-    def df(self) -> pd.DataFrame:
+    def df(self) -> nw.DataFrame:
         """
         Reactively read the current filtered data frame that is in effect.
 
         Returns
         -------
         :
-            The current filtered data frame as a pandas DataFrame. If no query
+            The current filtered data frame as a narwhals DataFrame. If no query
             has been set, this will return the unfiltered data frame from the
             data source.
 
@@ -883,7 +883,16 @@ def normalize_data_source(
         return data_source
     if isinstance(data_source, sqlalchemy.Engine):
         return SQLAlchemySource(data_source, table_name)
-    return DataFrameSource(data_source, table_name)
+    src = nw.from_native(data_source, pass_through=True)
+    if isinstance(src, nw.DataFrame):
+        return DataFrameSource(src, table_name)
+    if isinstance(src, nw.LazyFrame):
+        raise NotImplementedError("LazyFrame data sources are not yet supported (they will be soon).")
+    raise TypeError(
+        f"Unsupported data source type: {type(data_source)}."
+        "If you believe this type should be supported, please open an issue at"
+        "https://github.com/posit-dev/querychat/issues"
+    )
 
 
 def as_querychat_client(client: str | chatlas.Chat | None) -> chatlas.Chat:
 
@@ -15,7 +15,7 @@
 if TYPE_CHECKING:
     from collections.abc import Callable
 
-    import pandas as pd
+    import narwhals.stable.v1 as nw
     from shiny import Inputs, Outputs, Session
     from shiny.bookmark import BookmarkState, RestoreState
 
@@ -78,7 +78,7 @@ class ServerValues:
 
     """
 
-    df: Callable[[], pd.DataFrame]
+    df: Callable[[], nw.DataFrame]
     sql: ReactiveStringOrNone
     title: ReactiveStringOrNone
     client: chatlas.Chat
@@ -182,14 +182,14 @@ def _():
 
         @session.bookmark.on_bookmark
         def _on_bookmark(x: BookmarkState) -> None:
-            vals = x.values  # noqa: PD011
+            vals = x.values
             vals["querychat_sql"] = sql.get()
             vals["querychat_title"] = title.get()
             vals["querychat_has_greeted"] = has_greeted.get()
 
         @session.bookmark.on_restore
         def _on_restore(x: RestoreState) -> None:
-            vals = x.values  # noqa: PD011
+            vals = x.values
             if "querychat_sql" in vals:
                 sql.set(vals["querychat_sql"])
             if "querychat_title" in vals: