diff --git a/pkg-py/CHANGELOG.md b/pkg-py/CHANGELOG.md index 901f47ac..9b600116 100644 --- a/pkg-py/CHANGELOG.md +++ b/pkg-py/CHANGELOG.md @@ -7,7 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [UNRELEASED] +### New features + +* `QueryChat.client()` can now create standalone querychat-enabled chat clients with configurable tools and callbacks, enabling use outside of Shiny applications. (#168) + +* `QueryChat.console()` was added to launch interactive console-based chat sessions with your data source, with persistent conversation state across invocations. (#168) +* The tools used in a `QueryChat` chatbot are now configurable. Use the new `tools` parameter of `QueryChat()` to select either or both `"query"` or `"update"` tools. Choose `tools=["update"]` if you only want QueryChat to be able to update the dashboard (useful when you want to be 100% certain that the LLM will not see _any_ raw data). (#168) ## [0.3.0] - 2025-12-10 diff --git a/pkg-py/src/querychat/_querychat.py b/pkg-py/src/querychat/_querychat.py index 1ab03f53..8a7469e4 100644 --- a/pkg-py/src/querychat/_querychat.py +++ b/pkg-py/src/querychat/_querychat.py @@ -17,11 +17,23 @@ from ._datasource import DataFrameSource, DataSource, SQLAlchemySource from ._icons import bs_icon from ._querychat_module import GREETING_PROMPT, ServerValues, mod_server, mod_ui +from ._system_prompt import QueryChatSystemPrompt +from ._utils import MISSING, MISSING_TYPE +from .tools import ( + UpdateDashboardData, + tool_query, + tool_reset_dashboard, + tool_update_dashboard, +) if TYPE_CHECKING: + from collections.abc import Callable + import pandas as pd from narwhals.stable.v1.typing import IntoFrame +TOOL_GROUPS = Literal["update", "query"] + class QueryChatBase: def __init__( @@ -32,6 +44,7 @@ def __init__( id: Optional[str] = None, greeting: Optional[str | Path] = None, client: Optional[str | chatlas.Chat] = None, + tools: TOOL_GROUPS | tuple[TOOL_GROUPS, ...] | None = ("update", "query"), data_description: Optional[str | Path] = None, categorical_threshold: int = 20, extra_instructions: Optional[str | Path] = None, @@ -47,21 +60,28 @@ def __init__( self.id = id or table_name + self.tools = normalize_tools(tools, default=("update", "query")) self.greeting = greeting.read_text() if isinstance(greeting, Path) else greeting - prompt = assemble_system_prompt( - self._data_source, + # Store prompt components for lazy assembly + if prompt_template is None: + prompt_template = Path(__file__).parent / "prompts" / "prompt.md" + + self._system_prompt = QueryChatSystemPrompt( + prompt_template=prompt_template, + data_source=self._data_source, data_description=data_description, extra_instructions=extra_instructions, categorical_threshold=categorical_threshold, - prompt_template=prompt_template, ) # Fork and empty chat now so the per-session forks are fast client = as_querychat_client(client) self._client = copy.deepcopy(client) self._client.set_turns([]) - self._client.system_prompt = prompt + + # Storage for console client + self._client_console = None def app( self, *, bookmark_store: Literal["url", "server", "disable"] = "url" @@ -241,6 +261,158 @@ def generate_greeting(self, *, echo: Literal["none", "output"] = "none"): client.set_turns([]) return str(client.chat(GREETING_PROMPT, echo=echo)) + def client( + self, + *, + tools: TOOL_GROUPS | tuple[TOOL_GROUPS, ...] | None | MISSING_TYPE = MISSING, + update_dashboard: Callable[[UpdateDashboardData], None] | None = None, + reset_dashboard: Callable[[], None] | None = None, + ) -> chatlas.Chat: + """ + Create a chat client with registered tools. + + This method creates a standalone chat client configured with the + specified tools and callbacks. Each call returns an independent client + instance with its own conversation state. + + Parameters + ---------- + tools + Which tools to include: `"update"`, `"query"`, or both. Can be: + - A single tool string: `"update"` or `"query"` + - A tuple of tools: `("update", "query")` + - `None` or `()` to skip adding any tools + - If not provided (default), uses the tools specified during initialization + update_dashboard + Optional callback function to call when the update_dashboard tool + succeeds. Takes a dict with `"query"` and `"title"` keys. Only used + if `"update"` is in tools. + reset_dashboard + Optional callback function to call when the `tool_reset_dashboard` + is invoked. Takes no arguments. Only used if `"update"` is in tools. + + Returns + ------- + chatlas.Chat + A configured chat client with tools registered based on the tools parameter. + + Examples + -------- + ```python + from querychat import QueryChat + import pandas as pd + + df = pd.DataFrame({"a": [1, 2, 3]}) + qc = QueryChat(df, "my_data") + + # Create client with all tools (default) + client = qc.client() + response = client.chat("What's the average of column a?") + + # Create client with only query tool (single string) + client = qc.client(tools="query") + + # Create client with only query tool (tuple) + client = qc.client(tools=("query",)) + + # Create client with custom callbacks + from querychat import UpdateDashboardData + + + def my_update(data: UpdateDashboardData): + print(f"Query: {data['query']}, Title: {data['title']}") + + + client = qc.client(update_dashboard=my_update) + ``` + + """ + tools = normalize_tools(tools, default=self.tools) + + chat = copy.deepcopy(self._client) + chat.set_turns([]) + + chat.system_prompt = self._system_prompt.render(tools) + + if tools is None: + return chat + + if "update" in tools: + # Default callbacks that do nothing + update_fn = update_dashboard or (lambda _: None) + reset_fn = reset_dashboard or (lambda: None) + + chat.register_tool(tool_update_dashboard(self._data_source, update_fn)) + chat.register_tool(tool_reset_dashboard(reset_fn)) + + if "query" in tools: + chat.register_tool(tool_query(self._data_source)) + + return chat + + def console( + self, + *, + new: bool = False, + tools: TOOL_GROUPS | tuple[TOOL_GROUPS, ...] | None = "query", + **kwargs, + ) -> None: + """ + Launch an interactive console chat with the data. + + This method provides a REPL (Read-Eval-Print Loop) interface for + chatting with your data from the command line. The console session + persists by default, so you can exit and return to continue your + conversation. + + Parameters + ---------- + new + If True, creates a new chat client and starts a fresh conversation. + If False (default), continues the conversation from the previous + console session. + tools + Which tools to include: "update", "query", or both. Can be: + - A single tool string: `"update"` or `"query"` + - A tuple of tools: `("update", "query")` + - `None` or `()` to skip adding any tools + - If not provided (default), defaults to `("query",)` only for + privacy (prevents the LLM from accessing data values) + Ignored if `new=False` and a console session already exists. + **kwargs + Additional arguments passed to the `client()` method when creating a + new client. + + Examples + -------- + ```python + from querychat import QueryChat + import pandas as pd + + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + qc = QueryChat(df, "my_data") + + # Start console (query tool only by default) + qc.console() + + # Start fresh console with all tools (using tuple) + qc.console(new=True, tools=("update", "query")) + + # Start fresh console with all tools (using single string for one tool) + qc.console(new=True, tools="query") + + # Continue previous console session + qc.console() # picks up where you left off + ``` + + """ + tools = normalize_tools(tools, default=("query",)) + + if new or self._client_console is None: + self._client_console = self.client(tools=tools, **kwargs) + + self._client_console.console() + @property def system_prompt(self) -> str: """ @@ -252,7 +424,7 @@ def system_prompt(self) -> str: The system prompt string. """ - return self._client.system_prompt or "" + return self._system_prompt.render(self.tools) @property def data_source(self): @@ -286,8 +458,13 @@ class QueryChat(QueryChatBase): """ Create a QueryChat instance. + QueryChat enables natural language interaction with your data through an + LLM-powered chat interface. It can be used in Shiny applications, as a + standalone chat client, or in an interactive console. + Examples -------- + **Basic Shiny app:** ```python from querychat import QueryChat @@ -295,6 +472,29 @@ class QueryChat(QueryChatBase): qc.app() ``` + **Standalone chat client:** + ```python + from querychat import QueryChat + import pandas as pd + + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + qc = QueryChat(df, "my_data") + + # Get a chat client with all tools + client = qc.client() + response = client.chat("What's the average of column a?") + + # Start an interactive console chat + qc.console() + ``` + + **Privacy-focused mode:** Only allow dashboard filtering, ensuring the LLM + can't see any raw data. + ```python + qc = QueryChat(df, "my_data", tools="update") + qc.app() + ``` + Parameters ---------- data_source @@ -324,6 +524,19 @@ class QueryChat(QueryChatBase): If `client` is not provided, querychat consults the `QUERYCHAT_CLIENT` environment variable. If that is not set, it defaults to `"openai"`. + tools + Which querychat tools to include in the chat client by default. Can be: + - A single tool string: `"update"` or `"query"` + - A tuple of tools: `("update", "query")` + - `None` or `()` to disable all tools + + Default is `("update", "query")` (both tools enabled). + + Set to `"update"` to prevent the LLM from accessing data values, only + allowing dashboard filtering without answering questions. + + The tools can be overridden per-client by passing a different `tools` + parameter to the `.client()` method. data_description Description of the data in plain text or Markdown. If a pathlib.Path object is passed, querychat will read the contents of the path into a @@ -419,7 +632,7 @@ def title(): self.id, data_source=self._data_source, greeting=self.greeting, - client=self._client, + client=self.client, enable_bookmarking=enable_bookmarking, ) @@ -648,19 +861,6 @@ def title(self, value: Optional[str] = None) -> str | None | bool: else: return self._vals.title.set(value) - @property - def client(self): - """ - Get the (session-specific) chat client. - - Returns - ------- - : - The current chat client. - - """ - return self._vals.client - def normalize_data_source( data_source: IntoFrame | sqlalchemy.Engine | DataSource, @@ -731,3 +931,20 @@ def assemble_system_prompt( "extra_instructions": extra_instructions_str, }, ) + + +def normalize_tools( + tools: TOOL_GROUPS | tuple[TOOL_GROUPS, ...] | None | MISSING_TYPE, + default: tuple[TOOL_GROUPS, ...] | None, +) -> tuple[TOOL_GROUPS, ...] | None: + if tools is None or tools == (): + return None + elif isinstance(tools, MISSING_TYPE): + return default + elif isinstance(tools, str): + return (tools,) + elif isinstance(tools, tuple): + return tools + else: + # Convert any other sequence to tuple + return tuple(tools) diff --git a/pkg-py/src/querychat/_querychat_module.py b/pkg-py/src/querychat/_querychat_module.py index 26402584..f2bed066 100644 --- a/pkg-py/src/querychat/_querychat_module.py +++ b/pkg-py/src/querychat/_querychat_module.py @@ -6,6 +6,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Union +import chatlas import shinychat from shiny import module, reactive, ui @@ -14,12 +15,12 @@ if TYPE_CHECKING: from collections.abc import Callable - import chatlas import pandas as pd from shiny import Inputs, Outputs, Session from shiny.bookmark import BookmarkState, RestoreState from ._datasource import DataSource + from .types import UpdateDashboardData ReactiveString = reactive.Value[str] """A reactive string value.""" @@ -91,7 +92,7 @@ def mod_server( *, data_source: DataSource, greeting: str | None, - client: chatlas.Chat, + client: chatlas.Chat | Callable, enable_bookmarking: bool, ): # Reactive values to store state @@ -99,18 +100,27 @@ def mod_server( title = ReactiveStringOrNone(None) has_greeted = reactive.value[bool](False) # noqa: FBT003 - # Set up the chat object for this session - chat = copy.deepcopy(client) + def update_dashboard(data: UpdateDashboardData): + sql.set(data["query"]) + title.set(data["title"]) - # Create the tool functions - update_dashboard_tool = tool_update_dashboard(data_source, sql, title) - reset_dashboard_tool = tool_reset_dashboard(sql, title) - query_tool = tool_query(data_source) + def reset_dashboard(): + sql.set(None) + title.set(None) - # Register tools with annotations for the UI - chat.register_tool(update_dashboard_tool) - chat.register_tool(query_tool) - chat.register_tool(reset_dashboard_tool) + # Set up the chat object for this session + # Support both a callable that creates a client and legacy instance pattern + if callable(client) and not isinstance(client, chatlas.Chat): + chat = client( + update_dashboard=update_dashboard, reset_dashboard=reset_dashboard + ) + else: + # Legacy pattern: client is Chat instance + chat = copy.deepcopy(client) + + chat.register_tool(tool_update_dashboard(data_source, update_dashboard)) + chat.register_tool(tool_query(data_source)) + chat.register_tool(tool_reset_dashboard(reset_dashboard)) # Execute query when SQL changes @reactive.calc @@ -168,7 +178,7 @@ def _(): title.set(new_title) if enable_bookmarking: - chat_ui.enable_bookmarking(client) + chat_ui.enable_bookmarking(chat) @session.bookmark.on_bookmark def _on_bookmark(x: BookmarkState) -> None: diff --git a/pkg-py/src/querychat/_system_prompt.py b/pkg-py/src/querychat/_system_prompt.py new file mode 100644 index 00000000..81936fb9 --- /dev/null +++ b/pkg-py/src/querychat/_system_prompt.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import chevron + +if TYPE_CHECKING: + from ._datasource import DataSource + from ._querychat import TOOL_GROUPS + + +class QueryChatSystemPrompt: + """Manages system prompt template and component assembly.""" + + def __init__( + self, + prompt_template: str | Path, + data_source: DataSource, + data_description: str | Path | None = None, + extra_instructions: str | Path | None = None, + categorical_threshold: int = 10, + ): + """ + Initialize with prompt components. + + Args: + prompt_template: Mustache template string or path to template file + data_source: DataSource instance for schema generation + data_description: Optional data context (string or path) + extra_instructions: Optional custom LLM instructions (string or path) + categorical_threshold: Threshold for categorical column detection + + """ + if isinstance(prompt_template, Path): + self.template = prompt_template.read_text() + else: + self.template = prompt_template + + if isinstance(data_description, Path): + self.data_description = data_description.read_text() + else: + self.data_description = data_description + + if isinstance(extra_instructions, Path): + self.extra_instructions = extra_instructions.read_text() + else: + self.extra_instructions = extra_instructions + + self.schema = data_source.get_schema( + categorical_threshold=categorical_threshold + ) + + self.categorical_threshold = categorical_threshold + self.data_source = data_source + + def render(self, tools: tuple[TOOL_GROUPS, ...] | None) -> str: + """ + Render system prompt with tool configuration. + + Args: + tools: Normalized tuple of tool groups to enable (already normalized by caller) + + Returns: + Fully rendered system prompt string + + """ + is_duck_db = self.data_source.get_db_type().lower() == "duckdb" + + context = { + "db_type": self.data_source.get_db_type(), + "is_duck_db": is_duck_db, + "schema": self.schema, + "data_description": self.data_description, + "extra_instructions": self.extra_instructions, + "has_tool_update": "update" in tools if tools else False, + "has_tool_query": "query" in tools if tools else False, + } + + return chevron.render(self.template, context) diff --git a/pkg-py/src/querychat/_utils.py b/pkg-py/src/querychat/_utils.py index f24b3832..1d90dfd9 100644 --- a/pkg-py/src/querychat/_utils.py +++ b/pkg-py/src/querychat/_utils.py @@ -11,6 +11,15 @@ from narwhals.stable.v1.typing import IntoFrame +class MISSING_TYPE: # noqa: N801 + """ + A singleton representing a missing value. + """ + + +MISSING = MISSING_TYPE() + + @contextmanager def temp_env_vars(env_vars: dict[str, Optional[str]]): """ diff --git a/pkg-py/src/querychat/prompts/prompt.md b/pkg-py/src/querychat/prompts/prompt.md index 8dbb348c..be276d64 100644 --- a/pkg-py/src/querychat/prompts/prompt.md +++ b/pkg-py/src/querychat/prompts/prompt.md @@ -35,9 +35,10 @@ quantile_cont(salary, 0.5) {{/is_duck_db}} ## Your Capabilities -You can handle three types of requests: +You can handle these types of requests: -### 1. Filtering and Sorting Data +{{#has_tool_update}} +### Filtering and Sorting Data When the user asks you to filter or sort the dashboard, e.g. "Show me..." or "Which ____ have the highest ____?" or "Filter to only include ____": @@ -51,7 +52,16 @@ When the user asks you to filter or sort the dashboard, e.g. "Show me..." or "Wh The user may ask to "reset" or "start over"; that means clearing the filter and title. Do this by calling `querychat_reset_dashboard()`. -### 2. Answering Questions About Data +**Filtering Example:** +User: "Show only rows where sales are above average" +Tool Call: `querychat_update_dashboard({query: "SELECT * FROM table WHERE sales > (SELECT AVG(sales) FROM table)", title: "Above average sales"})` +Response: "" + +No further response needed, the user will see the updated dashboard. + +{{/has_tool_update}} +{{#has_tool_query}} +### Answering Questions About Data When the user asks you a question about the data, e.g. "What is the average ____?" or "How many ____ are there?" or "Which ____ has the highest ____?": @@ -61,7 +71,15 @@ When the user asks you a question about the data, e.g. "What is the average ____ - Users can see your SQL queries and will ask you to explain the code if needed - If you cannot complete the request using SQL, politely decline and explain why -### 3. Providing Suggestions for Next Steps +**Question Example:** +User: "What's the average revenue?" +Tool Call: `querychat_query({query: "SELECT AVG(revenue) AS avg_revenue FROM table"})` +Response: "The average revenue is $X." + +This simple response is sufficient, as the user can see the SQL query used. + +{{/has_tool_query}} +### Providing Suggestions for Next Steps #### Suggestion Syntax @@ -125,22 +143,6 @@ You might want to explore the advanced features - **Never pretend** you have access to data you don't actually have - **Use Markdown tables** for any tabular or structured data in your responses -## Examples - -**Filtering Example:** -User: "Show only rows where sales are above average" -Tool Call: `querychat_update_dashboard({query: "SELECT * FROM table WHERE sales > (SELECT AVG(sales) FROM table)", title: "Above average sales"})` -Response: "" - -No response needed, the user will see the updated dashboard. - -**Question Example:** -User: "What's the average revenue?" -Tool Call: `querychat_query({query: "SELECT AVG(revenue) AS avg_revenue FROM table"})` -Response: "The average revenue is $X." - -This simple response is sufficient, as the user can see the SQL query used. - {{#extra_instructions}} ## Additional Instructions diff --git a/pkg-py/src/querychat/tools.py b/pkg-py/src/querychat/tools.py index 72128713..e0001de1 100644 --- a/pkg-py/src/querychat/tools.py +++ b/pkg-py/src/querychat/tools.py @@ -1,7 +1,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypedDict import chevron from chatlas import ContentToolResult, Tool @@ -14,7 +14,45 @@ from collections.abc import Callable from ._datasource import DataSource - from ._querychat_module import ReactiveStringOrNone + + +class UpdateDashboardData(TypedDict): + """ + Data passed to update_dashboard callback. + + This TypedDict defines the structure of data passed to the + `tool_update_dashboard` callback function when the LLM requests an update to + the dashboard's data based on a SQL query. + + Attributes + ---------- + query + The SQL query string to execute for filtering/sorting the dashboard. + title + A descriptive title for the query, typically displayed in the UI. + + Examples + -------- + ```python + import pandas as pd + from querychat import QueryChat + from querychat.types import UpdateDashboardData + + + def log_update(data: UpdateDashboardData): + print(f"Executing: {data['query']}") + print(f"Title: {data['title']}") + + + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + qc = QueryChat(df, "my_data") + client = qc.client(update_dashboard=log_update) + ``` + + """ + + query: str + title: str def _read_prompt_template(filename: str, **kwargs) -> str: @@ -26,8 +64,7 @@ def _read_prompt_template(filename: str, **kwargs) -> str: def _update_dashboard_impl( data_source: DataSource, - current_query: ReactiveStringOrNone, - current_title: ReactiveStringOrNone, + update_fn: Callable[[UpdateDashboardData], None], ) -> Callable[[str, str], ContentToolResult]: """Create the implementation function for updating the dashboard.""" @@ -48,11 +85,8 @@ def update_dashboard(query: str, title: str) -> ContentToolResult: Apply Filter """ - # Update state on success - if query is not None: - current_query.set(query) - if title is not None: - current_title.set(title) + # Call the callback with TypedDict data on success + update_fn({"query": query, "title": title}) except Exception as e: error = str(e) @@ -78,8 +112,7 @@ def update_dashboard(query: str, title: str) -> ContentToolResult: def tool_update_dashboard( data_source: DataSource, - current_query: ReactiveStringOrNone, - current_title: ReactiveStringOrNone, + update_fn: Callable[[UpdateDashboardData], None], ) -> Tool: """ Create a tool that modifies the data presented in the dashboard based on the SQL query. @@ -88,10 +121,8 @@ def tool_update_dashboard( ---------- data_source The data source to query against - current_query - Reactive value for storing the current SQL query - current_title - Reactive value for storing the current title + update_fn + Callback function to call with UpdateDashboardData when update succeeds Returns ------- @@ -99,7 +130,7 @@ def tool_update_dashboard( A tool that can be registered with chatlas """ - impl = _update_dashboard_impl(data_source, current_query, current_title) + impl = _update_dashboard_impl(data_source, update_fn) description = _read_prompt_template( "tool-update-dashboard.md", @@ -115,15 +146,13 @@ def tool_update_dashboard( def _reset_dashboard_impl( - current_query: ReactiveStringOrNone, - current_title: ReactiveStringOrNone, + reset_fn: Callable[[], None], ) -> Callable[[], ContentToolResult]: """Create the implementation function for resetting the dashboard.""" def reset_dashboard() -> ContentToolResult: - # Reset current query and title - current_query.set(None) - current_title.set(None) + # Call the callback to reset + reset_fn() # Add Reset Filter button button_html = """