posit-dev
diff --git a/‎CLAUDE.md‎
Lines changed: 8 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎pkg-py/CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions b/‎pkg-py/CHANGELOG.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎pkg-py/docs/build.qmd‎
Lines changed: 1 addition & 1 deletion b/‎pkg-py/docs/build.qmd‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg-py/docs/data-sources.qmd‎
Lines changed: 1 addition & 1 deletion b/‎pkg-py/docs/data-sources.qmd‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg-py/src/querychat/_datasource.py‎
Lines changed: 41 additions & 51 deletions b/‎pkg-py/src/querychat/_datasource.py‎
Lines changed: 41 additions & 51 deletions
@@ -69,6 +69,14 @@ make py-build
 make py-docs
 ```
 
+Before finishing your implementation or committing any code, you should run:
+
+```bash
+uv run ruff check --fix pkg-py --config pyproject.toml
+```
+
+To get help with making sure code adheres to project standards.
+
 ### R Package
 
 ```bash
 
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [UNRELEASED]
 
+### Breaking Changes
+
+* Methods like `execute_query()`, `get_data()`, and `df()` now return a `narwhals.DataFrame` instead of a `pandas.DataFrame`. This allows querychat to drop its `pandas` dependency, and for you to use any `narwhals`-compatible dataframe of your choosing.
+  * If this breaks existing code, note you can call `.to_native()` on the new dataframe value to get your `pandas` dataframe back.
+  * Note that `polars` or `pandas` will be needed to realize a `sqlalchemy` connection query as a dataframe. Install with `pip install querychat[pandas]` or `pip install querychat[polars]`
+
 ### New features
 
 * `QueryChat.sidebar()`, `QueryChat.ui()`, and `QueryChat.server()` now support an optional `id` parameter to create multiple chat instances from a single `QueryChat` object. (#172)
 
@@ -220,7 +220,7 @@ with ui.layout_columns():
 
         @render_plotly
         def survival_plot():
-            d = qc.df()
+            d = qc.df().to_native()  # Convert for pandas groupby()
             summary = d.groupby('pclass')['survived'].mean().reset_index()
             return px.bar(summary, x='pclass', y='survived')
 ```
 
@@ -63,7 +63,7 @@ app = qc.app()
 
 :::
 
-If you're [building an app](build.qmd), note you can read the queried data frame reactively using the `df()` method, which returns a `pandas.DataFrame` by default. 
+If you're [building an app](build.qmd), note you can read the queried data frame reactively using the `df()` method, which returns a `narwhals.DataFrame`. Call `.to_native()` on the result to get the underlying pandas or polars DataFrame. 
 
 ## Databases
 
 
@@ -5,14 +5,13 @@
 
 import duckdb
 import narwhals.stable.v1 as nw
-import pandas as pd
 from sqlalchemy import inspect, text
 from sqlalchemy.sql import sqltypes
 
+from ._df_compat import duckdb_result_to_nw, read_sql
 from ._utils import check_query
 
 if TYPE_CHECKING:
-    from narwhals.stable.v1.typing import IntoFrame
     from sqlalchemy.engine import Connection, Engine
 
 
@@ -59,7 +58,7 @@ def get_schema(self, *, categorical_threshold: int) -> str:
         ...
 
     @abstractmethod
-    def execute_query(self, query: str) -> pd.DataFrame:
+    def execute_query(self, query: str) -> nw.DataFrame:
         """
         Execute SQL query and return results as DataFrame.
 
@@ -71,15 +70,15 @@ def execute_query(self, query: str) -> pd.DataFrame:
         Returns
         -------
         :
-            Query results as a pandas DataFrame
+            Query results as a narwhals DataFrame
 
         """
         ...
 
     @abstractmethod
     def test_query(
         self, query: str, *, require_all_columns: bool = False
-    ) -> pd.DataFrame:
+    ) -> nw.DataFrame:
         """
         Test SQL query by fetching only one row.
 
@@ -94,7 +93,7 @@ def test_query(
         Returns
         -------
         :
-            Query results as a pandas DataFrame with at most one row
+            Query results as a narwhals DataFrame with at most one row
 
         Raises
         ------
@@ -105,14 +104,14 @@ def test_query(
         ...
 
     @abstractmethod
-    def get_data(self) -> pd.DataFrame:
+    def get_data(self) -> nw.DataFrame:
         """
         Return the unfiltered data as a DataFrame.
 
         Returns
         -------
         :
-            The complete dataset as a pandas DataFrame
+            The complete dataset as a narwhals DataFrame
 
         """
         ...
@@ -133,28 +132,27 @@ def cleanup(self) -> None:
 
 
 class DataFrameSource(DataSource):
-    """A DataSource implementation that wraps a pandas DataFrame using DuckDB."""
+    """A DataSource implementation that wraps a DataFrame using DuckDB."""
 
-    _df: nw.DataFrame | nw.LazyFrame
+    _df: nw.DataFrame
 
-    def __init__(self, df: IntoFrame, table_name: str):
+    def __init__(self, df: nw.DataFrame, table_name: str):
         """
-        Initialize with a pandas DataFrame.
+        Initialize with a DataFrame.
 
         Parameters
         ----------
         df
-            The DataFrame to wrap
+            The DataFrame to wrap (pandas, polars, or any narwhals-compatible frame)
         table_name
             Name of the table in SQL queries
 
         """
-        self._df = nw.from_native(df)
+        self._df = nw.from_native(df) if not isinstance(df, nw.DataFrame) else df
         self.table_name = table_name
 
         self._conn = duckdb.connect(database=":memory:")
-        # TODO(@gadenbuie): What if the data frame is already SQL-backed?
-        self._conn.register(table_name, self._df.lazy().collect().to_pandas())
+        self._conn.register(table_name, self._df.to_native())
         self._conn.execute("""
 -- extensions: lock down supply chain + auto behaviors
 SET allow_community_extensions = false;
@@ -203,16 +201,8 @@ def get_schema(self, *, categorical_threshold: int) -> str:
         """
         schema = [f"Table: {self.table_name}", "Columns:"]
 
-        # Ensure we're working with a DataFrame, not a LazyFrame
-        ndf = (
-            self._df.head(10).collect()
-            if isinstance(self._df, nw.LazyFrame)
-            else self._df
-        )
-
-        for column in ndf.columns:
-            # Map pandas dtypes to SQL-like types
-            dtype = ndf[column].dtype
+        for column in self._df.columns:
+            dtype = self._df[column].dtype
             if dtype.is_integer():
                 sql_type = "INTEGER"
             elif dtype.is_float():
@@ -228,17 +218,14 @@ def get_schema(self, *, categorical_threshold: int) -> str:
 
             column_info = [f"- {column} ({sql_type})"]
 
-            # For TEXT columns, check if they're categorical
             if sql_type == "TEXT":
-                unique_values = ndf[column].drop_nulls().unique()
+                unique_values = self._df[column].drop_nulls().unique()
                 if unique_values.len() <= categorical_threshold:
                     categories = unique_values.to_list()
                     categories_str = ", ".join([f"'{c}'" for c in categories])
                     column_info.append(f"  Categorical values: {categories_str}")
-
-            # For numeric columns, include range
             elif sql_type in ["INTEGER", "FLOAT", "DATE", "TIME"]:
-                rng = ndf[column].min(), ndf[column].max()
+                rng = self._df[column].min(), self._df[column].max()
                 if rng[0] is None and rng[1] is None:
                     column_info.append("  Range: NULL to NULL")
                 else:
@@ -248,10 +235,12 @@ def get_schema(self, *, categorical_threshold: int) -> str:
 
         return "\n".join(schema)
 
-    def execute_query(self, query: str) -> pd.DataFrame:
+    def execute_query(self, query: str) -> nw.DataFrame:
         """
         Execute query using DuckDB.
 
+        Uses polars if available, otherwise falls back to pandas.
+
         Parameters
         ----------
         query
@@ -260,7 +249,7 @@ def execute_query(self, query: str) -> pd.DataFrame:
         Returns
         -------
         :
-            Query results as pandas DataFrame
+            Query results as narwhals DataFrame
 
         Raises
         ------
@@ -269,11 +258,11 @@ def execute_query(self, query: str) -> pd.DataFrame:
 
         """
         check_query(query)
-        return self._conn.execute(query).df()
+        return duckdb_result_to_nw(self._conn.execute(query))
 
     def test_query(
         self, query: str, *, require_all_columns: bool = False
-    ) -> pd.DataFrame:
+    ) -> nw.DataFrame:
         """
         Test query by fetching only one row.
 
@@ -298,7 +287,7 @@ def test_query(
 
         """
         check_query(query)
-        result = self._conn.execute(f"{query} LIMIT 1").df()
+        result = duckdb_result_to_nw(self._conn.execute(f"{query} LIMIT 1"))
 
         if require_all_columns:
             result_columns = set(result.columns)
@@ -316,18 +305,17 @@ def test_query(
 
         return result
 
-    def get_data(self) -> pd.DataFrame:
+    def get_data(self) -> nw.DataFrame:
         """
         Return the unfiltered data as a DataFrame.
 
         Returns
         -------
         :
-            The complete dataset as a pandas DataFrame
+            The complete dataset as a narwhals DataFrame
 
         """
-        # TODO(@gadenbuie): This should just return `self._df` and not a pandas DataFrame
-        return self._df.lazy().collect().to_pandas()
+        return self._df
 
     def cleanup(self) -> None:
         """
@@ -519,10 +507,12 @@ def get_schema(self, *, categorical_threshold: int) -> str:  # noqa: PLR0912
 
         return "\n".join(schema)
 
-    def execute_query(self, query: str) -> pd.DataFrame:
+    def execute_query(self, query: str) -> nw.DataFrame:
         """
         Execute SQL query and return results as DataFrame.
 
+        Uses polars if available, otherwise falls back to pandas.
+
         Parameters
         ----------
         query
@@ -531,7 +521,7 @@ def execute_query(self, query: str) -> pd.DataFrame:
         Returns
         -------
         :
-            Query results as pandas DataFrame
+            Query results as narwhals DataFrame
 
         Raises
         ------
@@ -541,11 +531,11 @@ def execute_query(self, query: str) -> pd.DataFrame:
         """
         check_query(query)
         with self._get_connection() as conn:
-            return pd.read_sql_query(text(query), conn)
+            return read_sql(text(query), conn)
 
     def test_query(
         self, query: str, *, require_all_columns: bool = False
-    ) -> pd.DataFrame:
+    ) -> nw.DataFrame:
         """
         Test query by fetching only one row.
 
@@ -571,16 +561,16 @@ def test_query(
         """
         check_query(query)
         with self._get_connection() as conn:
-            # Use pandas read_sql_query with limit to get at most one row
+            # Use read_sql with limit to get at most one row
             limit_query = f"SELECT * FROM ({query}) AS subquery LIMIT 1"
             try:
-                df = pd.read_sql_query(text(limit_query), conn)
+                result = read_sql(text(limit_query), conn)
             except Exception:
                 # If LIMIT syntax doesn't work, fall back to regular read and take first row
-                df = pd.read_sql_query(text(query), conn).head(1)
+                result = read_sql(text(query), conn).head(1)
 
             if require_all_columns:
-                result_columns = set(df.columns)
+                result_columns = set(result.columns)
                 original_columns_set = set(self._colnames)
                 missing_columns = original_columns_set - result_columns
 
@@ -595,16 +585,16 @@ def test_query(
                         f"Original columns: {original_list}"
                     )
 
-            return df
+            return result
 
-    def get_data(self) -> pd.DataFrame:
+    def get_data(self) -> nw.DataFrame:
         """
         Return the unfiltered data as a DataFrame.
 
         Returns
         -------
         :
-            The complete dataset as a pandas DataFrame
+            The complete dataset as a narwhals DataFrame
 
         """
         return self.execute_query(f"SELECT * FROM {self.table_name}")