posit-dev · cpsievert · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025
diff --git a/pkg-py/CHANGELOG.md b/pkg-py/CHANGELOG.md
@@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 * The current SQL query and title can now be programmatically set through the `.sql()` and `.title()` methods of `QueryChat()`. (#98, #101)
 
+* New `querychat.data` module provides sample datasets (`titanic()` and `tips()`) to make it easier to get started without external dependencies. (#118)
+
 * Added a `.generate_greeting()` method to help you create a greeting message for your querychat bot. (#87)
 
 * Added `querychat_reset_dashboard()` tool for easily resetting the dashboard filters when asked by the user. (#81)

diff --git a/pkg-py/docs/build.qmd b/pkg-py/docs/build.qmd
@@ -181,13 +181,13 @@ Thanks to Shiny's support for [Jupyter Widgets](https://shiny.posit.co/py/docs/j
 ```python
 import plotly.express as px
 
-from seaborn import load_dataset
 from shiny.express import render, ui
 from shinywidgets import render_plotly
 
 from querychat.express import QueryChat
+from querychat.data import titanic
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 qc = QueryChat(titanic, "titanic")
 qc.sidebar()
 
@@ -223,11 +223,11 @@ A more useful, but slightly more involved example like the one below might incor
 from shiny.express import render, ui
 from shinywidgets import render_plotly
 from querychat.express import QueryChat
-from seaborn import load_dataset
+from querychat.data import titanic
 from faicons import icon_svg
 import plotly.express as px
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 qc = QueryChat(titanic, "titanic")
 qc.sidebar()
 
@@ -356,8 +356,9 @@ You can use multiple QueryChat instances in a single app to explore different da
 from seaborn import load_dataset
 from shiny.express import render, ui
 from querychat.express import QueryChat
+from querychat.data import titanic
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 penguins = load_dataset("penguins")
 
 qc_titanic = QueryChat(titanic, "titanic")
@@ -396,11 +397,11 @@ Here's a complete example bringing together multiple concepts - a Titanic surviv
 ```python
 from shiny.express import render, ui
 from querychat.express import QueryChat
-from seaborn import load_dataset
+from querychat.data import titanic
 import plotly.express as px
 
 # Load data
-titanic = load_dataset("titanic")
+titanic = titanic()
 
 # Create QueryChat
 qc = QueryChat(

diff --git a/pkg-py/docs/context.qmd b/pkg-py/docs/context.qmd
@@ -12,9 +12,9 @@ For full visibility into the full system prompt that Querychat generates for the
 
 ```python
 from querychat import QueryChat
-from seaborn import load_dataset
+from querychat.data import titanic
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 
 qc = QueryChat(titanic, "titanic")
 print(qc.system_prompt)

diff --git a/pkg-py/docs/data-sources.qmd b/pkg-py/docs/data-sources.qmd
@@ -160,9 +160,9 @@ Or, if you have a pandas DataFrame, you can create the DuckDB database like so:
 ```{.python filename="create-duckdb-from-pandas.py"}
 import duckdb
 import pandas as pd
+from querychat.data import titanic
 
-from seaborn import load_dataset
-titanic = load_dataset("titanic")
+titanic = titanic()
 
 conn = duckdb.connect("my_database.duckdb")
 conn.register('titanic_df', titanic)

diff --git a/pkg-py/docs/index.qmd b/pkg-py/docs/index.qmd
@@ -40,10 +40,10 @@ The quickest way to start chatting is to call the `.app()` method, which returns
 
 
 ```{.python filename="titanic-app.py"}
-from seaborn import load_dataset
 from querychat import QueryChat
+from querychat.data import titanic
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 qc = QueryChat(titanic, "titanic", client="openai/gpt-4.1")
 app = qc.app()
 ```

diff --git a/pkg-py/docs/models.qmd b/pkg-py/docs/models.qmd
@@ -10,8 +10,9 @@ To use a particular model, pass a `"{provider}/{model}"` string to the `client`
 
 ```python
 from querychat import QueryChat
-from seaborn import load_dataset
-titanic = load_dataset("titanic")
+from querychat.data import titanic
+
+titanic = titanic()
 
 qc = QueryChat(
     titanic,

diff --git a/pkg-py/docs/tools.qmd b/pkg-py/docs/tools.qmd
@@ -23,9 +23,9 @@ Here's a basic example of this tool in action with the `.app()` method. Notice h
 
 ```{.python filename="titanic-app.py"}
 from querychat import QueryChat
-from seaborn import load_dataset
+from querychat.data import titanic
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 qc = QueryChat(titanic, "titanic")
 app = qc.app()
 ```
@@ -46,9 +46,9 @@ Here's an example of it in action:
 
 ```{.python filename="titanic-app.py"}
 from querychat import QueryChat
-from seaborn import load_dataset
+from querychat.data import titanic
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 qc = QueryChat(titanic, "titanic")
 app = qc.app()
 ```

diff --git a/pkg-py/examples/01-hello-app.py b/pkg-py/examples/01-hello-app.py
@@ -1,6 +1,6 @@
-from seaborn import load_dataset
 from querychat import QueryChat
+from querychat.data import titanic
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 qc = QueryChat(titanic, "titanic")
 app = qc.app()
diff --git a/pkg-py/examples/02-prompt-app.py b/pkg-py/examples/02-prompt-app.py
@@ -1,9 +1,9 @@
 
 from pathlib import Path
-from seaborn import load_dataset
 from querychat import QueryChat
+from querychat.data import titanic
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 
 greeting = Path(__file__).parent / "greeting.md"
 data_desc = Path(__file__).parent / "data_description.md"

diff --git a/pkg-py/examples/03-sidebar-core-app.py b/pkg-py/examples/03-sidebar-core-app.py
@@ -1,8 +1,8 @@
-from seaborn import load_dataset
 from shiny import App, render, ui
 from querychat import QueryChat
+from querychat.data import titanic
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 
 # 1. Provide data source to QueryChat
 qc = QueryChat(titanic, "titanic")

diff --git a/pkg-py/examples/03-sidebar-express-app.py b/pkg-py/examples/03-sidebar-express-app.py
@@ -1,8 +1,8 @@
-from seaborn import load_dataset
 from shiny.express import render, ui
 from querychat.express import QueryChat
+from querychat.data import titanic
 
-titanic = load_dataset("titanic")
+titanic = titanic()
 
 # 1. Provide data source to QueryChat
 qc = QueryChat(titanic, "titanic")

diff --git a/pkg-py/src/querychat/data/__init__.py b/pkg-py/src/querychat/data/__init__.py
@@ -0,0 +1,68 @@
+"""
+Sample datasets for getting started with querychat.
+
+This module provides easy access to sample datasets that can be used with QueryChat
+to quickly get started without needing to install additional dependencies.
+"""
+
+from __future__ import annotations
+
+from importlib.resources import files
+
+import pandas as pd
+
+
+def titanic() -> pd.DataFrame:
+    """
+    Load the Titanic dataset.
+
+    This dataset contains information about passengers on the Titanic, including
+    whether they survived, their class, age, sex, and other demographic information.
+
+    Returns
+    -------
+    pandas.DataFrame
+        A DataFrame with 891 rows and 15 columns containing Titanic passenger data.
+
+    Examples
+    --------
+    >>> from querychat.data import titanic
+    >>> from querychat import QueryChat
+    >>> df = titanic()
+    >>> qc = QueryChat(df, "titanic")
+    >>> app = qc.app()
+
+    """
+    # Get the path to the gzipped CSV file using importlib.resources
+    data_file = files("querychat.data") / "titanic.csv.gz"
+    return pd.read_csv(str(data_file), compression="gzip")
+
+
+def tips() -> pd.DataFrame:
+    """
+    Load the tips dataset.
+
+    This dataset contains information about restaurant tips, including the total
+    bill, tip amount, and information about the party (sex, smoker status, day,
+    time, and party size).
+
+    Returns
+    -------
+    pandas.DataFrame
+        A DataFrame with 244 rows and 7 columns containing restaurant tip data.
+
+    Examples
+    --------
+    >>> from querychat.data import tips
+    >>> from querychat import QueryChat
+    >>> df = tips()
+    >>> qc = QueryChat(df, "tips")
+    >>> app = qc.app()
+
+    """
+    # Get the path to the gzipped CSV file using importlib.resources
+    data_file = files("querychat.data") / "tips.csv.gz"
+    return pd.read_csv(str(data_file), compression="gzip")
+
+
+__all__ = ["tips", "titanic"]
diff --git a/pkg-py/src/querychat/data/tips.csv.gz b/pkg-py/src/querychat/data/tips.csv.gz
diff --git a/pkg-py/src/querychat/data/titanic.csv.gz b/pkg-py/src/querychat/data/titanic.csv.gz
diff --git a/pkg-py/tests/test_data.py b/pkg-py/tests/test_data.py
@@ -0,0 +1,127 @@
+"""Tests for the querychat.data module."""
+
+import pandas as pd
+from querychat.data import tips, titanic
+
+
+def test_titanic_returns_dataframe():
+    """Test that titanic() returns a pandas DataFrame."""
+    df = titanic()
+    assert isinstance(df, pd.DataFrame)
+
+
+def test_titanic_has_expected_shape():
+    """Test that the Titanic dataset has the expected number of rows and columns."""
+    df = titanic()
+    assert df.shape == (891, 15), f"Expected (891, 15) but got {df.shape}"
+
+
+def test_titanic_has_expected_columns():
+    """Test that the Titanic dataset has the expected column names."""
+    df = titanic()
+    expected_columns = [
+        "survived",
+        "pclass",
+        "sex",
+        "age",
+        "sibsp",
+        "parch",
+        "fare",
+        "embarked",
+        "class",
+        "who",
+        "adult_male",
+        "deck",
+        "embark_town",
+        "alive",
+        "alone",
+    ]
+    assert list(df.columns) == expected_columns
+
+
+def test_titanic_data_integrity():
+    """Test basic data integrity of the Titanic dataset."""
+    df = titanic()
+
+    # Check that survived column has only 0 and 1 values
+    assert set(df["survived"].dropna().unique()) <= {0, 1}
+
+    # Check that pclass has only 1, 2, 3
+    assert set(df["pclass"].dropna().unique()) <= {1, 2, 3}
+
+    # Check that sex has only 'male' and 'female'
+    assert set(df["sex"].dropna().unique()) <= {"male", "female"}
+
+    # Check that fare is non-negative
+    assert (df["fare"].dropna() >= 0).all()
+
+
+def test_titanic_creates_new_copy():
+    """Test that titanic() returns a new copy each time it's called."""
+    df1 = titanic()
+    df2 = titanic()
+
+    # They should not be the same object
+    assert df1 is not df2
+
+    # But they should have the same data
+    assert df1.equals(df2)
+
+
+def test_tips_returns_dataframe():
+    """Test that tips() returns a pandas DataFrame."""
+    df = tips()
+    assert isinstance(df, pd.DataFrame)
+
+
+def test_tips_has_expected_shape():
+    """Test that the tips dataset has the expected number of rows and columns."""
+    df = tips()
+    assert df.shape == (244, 7), f"Expected (244, 7) but got {df.shape}"
+
+
+def test_tips_has_expected_columns():
+    """Test that the tips dataset has the expected column names."""
+    df = tips()
+    expected_columns = [
+        "total_bill",
+        "tip",
+        "sex",
+        "smoker",
+        "day",
+        "time",
+        "size",
+    ]
+    assert list(df.columns) == expected_columns
+
+
+def test_tips_data_integrity():
+    """Test basic data integrity of the tips dataset."""
+    df = tips()
+
+    # Check that total_bill is positive
+    assert (df["total_bill"] > 0).all()
+
+    # Check that tip is non-negative
+    assert (df["tip"] >= 0).all()
+
+    # Check that sex has only expected values
+    assert set(df["sex"].dropna().unique()) <= {"Male", "Female"}
+
+    # Check that smoker has only expected values
+    assert set(df["smoker"].dropna().unique()) <= {"Yes", "No"}
+
+    # Check that size is positive
+    assert (df["size"] > 0).all()
+
+
+def test_tips_creates_new_copy():
+    """Test that tips() returns a new copy each time it's called."""
+    df1 = tips()
+    df2 = tips()
+
+    # They should not be the same object
+    assert df1 is not df2
+
+    # But they should have the same data
+    assert df1.equals(df2)
diff --git a/pkg-py/tests/test_querychat.py b/pkg-py/tests/test_querychat.py
@@ -60,4 +60,3 @@ def test_querychat_custom_id(sample_df):
     )
 
     assert qc.id == "custom_id"
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -60,4 +60,3 @@ def test_querychat_custom_id(sample_df):
		)

		assert qc.id == "custom_id"