Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg-py/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

* The current SQL query and title can now be programmatically set through the `.sql()` and `.title()` methods of `QueryChat()`. (#98, #101)

* New `querychat.data` module provides sample datasets (`titanic()` and `tips()`) to make it easier to get started without external dependencies. (#118)

* Added a `.generate_greeting()` method to help you create a greeting message for your querychat bot. (#87)

* Added `querychat_reset_dashboard()` tool for easily resetting the dashboard filters when asked by the user. (#81)
Expand Down
15 changes: 8 additions & 7 deletions pkg-py/docs/build.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,13 @@ Thanks to Shiny's support for [Jupyter Widgets](https://shiny.posit.co/py/docs/j
```python
import plotly.express as px

from seaborn import load_dataset
from shiny.express import render, ui
from shinywidgets import render_plotly

from querychat.express import QueryChat
from querychat.data import titanic

titanic = load_dataset("titanic")
titanic = titanic()
qc = QueryChat(titanic, "titanic")
qc.sidebar()

Expand Down Expand Up @@ -223,11 +223,11 @@ A more useful, but slightly more involved example like the one below might incor
from shiny.express import render, ui
from shinywidgets import render_plotly
from querychat.express import QueryChat
from seaborn import load_dataset
from querychat.data import titanic
from faicons import icon_svg
import plotly.express as px

titanic = load_dataset("titanic")
titanic = titanic()
qc = QueryChat(titanic, "titanic")
qc.sidebar()

Expand Down Expand Up @@ -356,8 +356,9 @@ You can use multiple QueryChat instances in a single app to explore different da
from seaborn import load_dataset
from shiny.express import render, ui
from querychat.express import QueryChat
from querychat.data import titanic

titanic = load_dataset("titanic")
titanic = titanic()
penguins = load_dataset("penguins")

qc_titanic = QueryChat(titanic, "titanic")
Expand Down Expand Up @@ -396,11 +397,11 @@ Here's a complete example bringing together multiple concepts - a Titanic surviv
```python
from shiny.express import render, ui
from querychat.express import QueryChat
from seaborn import load_dataset
from querychat.data import titanic
import plotly.express as px

# Load data
titanic = load_dataset("titanic")
titanic = titanic()

# Create QueryChat
qc = QueryChat(
Expand Down
4 changes: 2 additions & 2 deletions pkg-py/docs/context.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ For full visibility into the full system prompt that Querychat generates for the

```python
from querychat import QueryChat
from seaborn import load_dataset
from querychat.data import titanic

titanic = load_dataset("titanic")
titanic = titanic()

qc = QueryChat(titanic, "titanic")
print(qc.system_prompt)
Expand Down
4 changes: 2 additions & 2 deletions pkg-py/docs/data-sources.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,9 @@ Or, if you have a pandas DataFrame, you can create the DuckDB database like so:
```{.python filename="create-duckdb-from-pandas.py"}
import duckdb
import pandas as pd
from querychat.data import titanic

from seaborn import load_dataset
titanic = load_dataset("titanic")
titanic = titanic()

conn = duckdb.connect("my_database.duckdb")
conn.register('titanic_df', titanic)
Expand Down
4 changes: 2 additions & 2 deletions pkg-py/docs/index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ The quickest way to start chatting is to call the `.app()` method, which returns


```{.python filename="titanic-app.py"}
from seaborn import load_dataset
from querychat import QueryChat
from querychat.data import titanic

titanic = load_dataset("titanic")
titanic = titanic()
qc = QueryChat(titanic, "titanic", client="openai/gpt-4.1")
app = qc.app()
```
Expand Down
5 changes: 3 additions & 2 deletions pkg-py/docs/models.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ To use a particular model, pass a `"{provider}/{model}"` string to the `client`

```python
from querychat import QueryChat
from seaborn import load_dataset
titanic = load_dataset("titanic")
from querychat.data import titanic

titanic = titanic()

qc = QueryChat(
titanic,
Expand Down
8 changes: 4 additions & 4 deletions pkg-py/docs/tools.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ Here's a basic example of this tool in action with the `.app()` method. Notice h

```{.python filename="titanic-app.py"}
from querychat import QueryChat
from seaborn import load_dataset
from querychat.data import titanic

titanic = load_dataset("titanic")
titanic = titanic()
qc = QueryChat(titanic, "titanic")
app = qc.app()
```
Expand All @@ -46,9 +46,9 @@ Here's an example of it in action:

```{.python filename="titanic-app.py"}
from querychat import QueryChat
from seaborn import load_dataset
from querychat.data import titanic

titanic = load_dataset("titanic")
titanic = titanic()
qc = QueryChat(titanic, "titanic")
app = qc.app()
```
Expand Down
4 changes: 2 additions & 2 deletions pkg-py/examples/01-hello-app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from seaborn import load_dataset
from querychat import QueryChat
from querychat.data import titanic

titanic = load_dataset("titanic")
titanic = titanic()
qc = QueryChat(titanic, "titanic")
app = qc.app()
4 changes: 2 additions & 2 deletions pkg-py/examples/02-prompt-app.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@

from pathlib import Path
from seaborn import load_dataset
from querychat import QueryChat
from querychat.data import titanic

titanic = load_dataset("titanic")
titanic = titanic()

greeting = Path(__file__).parent / "greeting.md"
data_desc = Path(__file__).parent / "data_description.md"
Expand Down
4 changes: 2 additions & 2 deletions pkg-py/examples/03-sidebar-core-app.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from seaborn import load_dataset
from shiny import App, render, ui
from querychat import QueryChat
from querychat.data import titanic

titanic = load_dataset("titanic")
titanic = titanic()

# 1. Provide data source to QueryChat
qc = QueryChat(titanic, "titanic")
Expand Down
4 changes: 2 additions & 2 deletions pkg-py/examples/03-sidebar-express-app.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from seaborn import load_dataset
from shiny.express import render, ui
from querychat.express import QueryChat
from querychat.data import titanic

titanic = load_dataset("titanic")
titanic = titanic()

# 1. Provide data source to QueryChat
qc = QueryChat(titanic, "titanic")
Expand Down
68 changes: 68 additions & 0 deletions pkg-py/src/querychat/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
Sample datasets for getting started with querychat.

This module provides easy access to sample datasets that can be used with QueryChat
to quickly get started without needing to install additional dependencies.
"""

from __future__ import annotations

from importlib.resources import files

import pandas as pd


def titanic() -> pd.DataFrame:
"""
Load the Titanic dataset.

This dataset contains information about passengers on the Titanic, including
whether they survived, their class, age, sex, and other demographic information.

Returns
-------
pandas.DataFrame
A DataFrame with 891 rows and 15 columns containing Titanic passenger data.

Examples
--------
>>> from querychat.data import titanic
>>> from querychat import QueryChat
>>> df = titanic()
>>> qc = QueryChat(df, "titanic")
>>> app = qc.app()

"""
# Get the path to the gzipped CSV file using importlib.resources
data_file = files("querychat.data") / "titanic.csv.gz"
return pd.read_csv(str(data_file), compression="gzip")


def tips() -> pd.DataFrame:
"""
Load the tips dataset.

This dataset contains information about restaurant tips, including the total
bill, tip amount, and information about the party (sex, smoker status, day,
time, and party size).

Returns
-------
pandas.DataFrame
A DataFrame with 244 rows and 7 columns containing restaurant tip data.

Examples
--------
>>> from querychat.data import tips
>>> from querychat import QueryChat
>>> df = tips()
>>> qc = QueryChat(df, "tips")
>>> app = qc.app()

"""
# Get the path to the gzipped CSV file using importlib.resources
data_file = files("querychat.data") / "tips.csv.gz"
return pd.read_csv(str(data_file), compression="gzip")


__all__ = ["tips", "titanic"]
Binary file added pkg-py/src/querychat/data/tips.csv.gz
Binary file not shown.
Binary file added pkg-py/src/querychat/data/titanic.csv.gz
Binary file not shown.
127 changes: 127 additions & 0 deletions pkg-py/tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""Tests for the querychat.data module."""

import pandas as pd
from querychat.data import tips, titanic


def test_titanic_returns_dataframe():
"""Test that titanic() returns a pandas DataFrame."""
df = titanic()
assert isinstance(df, pd.DataFrame)


def test_titanic_has_expected_shape():
"""Test that the Titanic dataset has the expected number of rows and columns."""
df = titanic()
assert df.shape == (891, 15), f"Expected (891, 15) but got {df.shape}"


def test_titanic_has_expected_columns():
"""Test that the Titanic dataset has the expected column names."""
df = titanic()
expected_columns = [
"survived",
"pclass",
"sex",
"age",
"sibsp",
"parch",
"fare",
"embarked",
"class",
"who",
"adult_male",
"deck",
"embark_town",
"alive",
"alone",
]
assert list(df.columns) == expected_columns


def test_titanic_data_integrity():
"""Test basic data integrity of the Titanic dataset."""
df = titanic()

# Check that survived column has only 0 and 1 values
assert set(df["survived"].dropna().unique()) <= {0, 1}

# Check that pclass has only 1, 2, 3
assert set(df["pclass"].dropna().unique()) <= {1, 2, 3}

# Check that sex has only 'male' and 'female'
assert set(df["sex"].dropna().unique()) <= {"male", "female"}

# Check that fare is non-negative
assert (df["fare"].dropna() >= 0).all()


def test_titanic_creates_new_copy():
"""Test that titanic() returns a new copy each time it's called."""
df1 = titanic()
df2 = titanic()

# They should not be the same object
assert df1 is not df2

# But they should have the same data
assert df1.equals(df2)


def test_tips_returns_dataframe():
"""Test that tips() returns a pandas DataFrame."""
df = tips()
assert isinstance(df, pd.DataFrame)


def test_tips_has_expected_shape():
"""Test that the tips dataset has the expected number of rows and columns."""
df = tips()
assert df.shape == (244, 7), f"Expected (244, 7) but got {df.shape}"


def test_tips_has_expected_columns():
"""Test that the tips dataset has the expected column names."""
df = tips()
expected_columns = [
"total_bill",
"tip",
"sex",
"smoker",
"day",
"time",
"size",
]
assert list(df.columns) == expected_columns


def test_tips_data_integrity():
"""Test basic data integrity of the tips dataset."""
df = tips()

# Check that total_bill is positive
assert (df["total_bill"] > 0).all()

# Check that tip is non-negative
assert (df["tip"] >= 0).all()

# Check that sex has only expected values
assert set(df["sex"].dropna().unique()) <= {"Male", "Female"}

# Check that smoker has only expected values
assert set(df["smoker"].dropna().unique()) <= {"Yes", "No"}

# Check that size is positive
assert (df["size"] > 0).all()


def test_tips_creates_new_copy():
"""Test that tips() returns a new copy each time it's called."""
df1 = tips()
df2 = tips()

# They should not be the same object
assert df1 is not df2

# But they should have the same data
assert df1.equals(df2)
1 change: 0 additions & 1 deletion pkg-py/tests/test_querychat.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,3 @@ def test_querychat_custom_id(sample_df):
)

assert qc.id == "custom_id"