From e3f2754e3f14b8f73ce2d05ea02e8fcdc925f850 Mon Sep 17 00:00:00 2001 From: Omer Acar Date: Wed, 18 Jun 2025 13:57:16 -0400 Subject: [PATCH 01/13] Added prompt_path argument to querychat_init and querychat_system_prompt to allow user to provide completely custom system prompt. --- pkg-r/R/prompt.R | 27 ++++++++++++++++----------- pkg-r/R/querychat.R | 30 ++++++++++++++++++------------ 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/pkg-r/R/prompt.R b/pkg-r/R/prompt.R index 75ac68b6..19e12c8c 100644 --- a/pkg-r/R/prompt.R +++ b/pkg-r/R/prompt.R @@ -8,17 +8,18 @@ #' @param data_description Optional description of the data, in plain text or Markdown format. #' @param extra_instructions Optional additional instructions for the chat model, in plain text or Markdown format. #' @param categorical_threshold The maximum number of unique values for a text column to be considered categorical. +#' @param prompt_path Optional path to a custom prompt file. If NULL, the default prompt file in the package will be used. #' #' @return A string containing the system prompt for the chat model. #' #' @export querychat_system_prompt <- function( - df, - name, - data_description = NULL, - extra_instructions = NULL, - categorical_threshold = 10 -) { + df, + name, + data_description = NULL, + extra_instructions = NULL, + categorical_threshold = 10, + prompt_path = NULL) { schema <- df_to_schema(df, name, categorical_threshold) if (!is.null(data_description)) { @@ -29,7 +30,12 @@ querychat_system_prompt <- function( } # Read the prompt file - prompt_path <- system.file("prompt", "prompt.md", package = "querychat") + if (is.null(prompt_path)) { + prompt_path <- system.file("prompt", "prompt.md", package = "querychat") + } + if (!file.exists(prompt_path)) { + stop("Prompt file not found at: ", prompt_path) + } prompt_content <- readLines(prompt_path, warn = FALSE) prompt_text <- paste(prompt_content, collapse = "\n") @@ -44,10 +50,9 @@ querychat_system_prompt <- function( } df_to_schema <- function( - df, - name = deparse(substitute(df)), - categorical_threshold -) { + df, + name = deparse(substitute(df)), + categorical_threshold) { schema <- c(paste("Table:", name), "Columns:") column_info <- lapply(names(df), function(column) { diff --git a/pkg-r/R/querychat.R b/pkg-r/R/querychat.R index f97a867c..3889731b 100644 --- a/pkg-r/R/querychat.R +++ b/pkg-r/R/querychat.R @@ -20,6 +20,11 @@ #' any additional instructions for the chat model. These will be appended at #' the end of the system prompt. If a `system_prompt` argument is provided, #' the `extra_instructions` argument will be ignored. +#' @param prompt_path A string containing the path to a custom prompt file. If +#' NULL, the default prompt file in the package will be used. This file should +#' contain a template for the system prompt, with placeholders for the schema, +#' data description, and extra instructions. The default prompt file is +#' located in the `inst/prompt/` directory of the package. #' @param create_chat_func A function that takes a system prompt and returns a #' chat object. The default uses `ellmer::chat_openai()`. #' @param system_prompt A string containing the system prompt for the chat model. @@ -33,19 +38,20 @@ #' #' @export querychat_init <- function( - df, - tbl_name = deparse(substitute(df)), - greeting = NULL, - data_description = NULL, - extra_instructions = NULL, - create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o"), - system_prompt = querychat_system_prompt( df, - tbl_name, - data_description = data_description, - extra_instructions = extra_instructions - ) -) { + tbl_name = deparse(substitute(df)), + greeting = NULL, + data_description = NULL, + extra_instructions = NULL, + prompt_path = NULL, + create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o"), + system_prompt = querychat_system_prompt( + df, + tbl_name, + data_description = data_description, + extra_instructions = extra_instructions, + prompt_path = prompt_path + )) { is_tbl_name_ok <- is.character(tbl_name) && length(tbl_name) == 1 && grepl("^[a-zA-Z][a-zA-Z0-9_]*$", tbl_name, perl = TRUE) From bcec3158268510b87b72dea7ebb6a96f2d25d5c1 Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 15:46:57 -0400 Subject: [PATCH 02/13] Restructure querychat_init params --- pkg-r/R/prompt.R | 16 +++++++++++---- pkg-r/R/querychat.R | 48 +++++++++++++++++---------------------------- 2 files changed, 30 insertions(+), 34 deletions(-) diff --git a/pkg-r/R/prompt.R b/pkg-r/R/prompt.R index 19e12c8c..ae70d2f9 100644 --- a/pkg-r/R/prompt.R +++ b/pkg-r/R/prompt.R @@ -4,11 +4,19 @@ #' schema and optional additional context and instructions. #' #' @param df A data frame to generate schema information from. -#' @param name A string containing the name of the table in SQL queries. -#' @param data_description Optional description of the data, in plain text or Markdown format. -#' @param extra_instructions Optional additional instructions for the chat model, in plain text or Markdown format. +#' @param tbl_name A string containing the name of the table in SQL queries. +#' @param data_description Optional string in plain text or Markdown format, containing +#' a description of the data frame or any additional context that might be +#' helpful in understanding the data. This will be included in the system +#' prompt for the chat model. +#' @param extra_instructions Optional string in plain text or Markdown format, containing +#' any additional instructions for the chat model. These will be appended at +#' the end of the system prompt. #' @param categorical_threshold The maximum number of unique values for a text column to be considered categorical. -#' @param prompt_path Optional path to a custom prompt file. If NULL, the default prompt file in the package will be used. +#' @param prompt_path Optional string containing the path to a custom prompt file. If +#' `NULL`, the default prompt file in the package will be used. This file should +#' contain a whisker template for the system prompt, with placeholders for `{{schema}}`, +#' `{{data_description}}` [optional], and `{{extra_instructions}}` [optional]. #' #' @return A string containing the system prompt for the chat model. #' diff --git a/pkg-r/R/querychat.R b/pkg-r/R/querychat.R index 3889731b..6fd402fa 100644 --- a/pkg-r/R/querychat.R +++ b/pkg-r/R/querychat.R @@ -11,26 +11,16 @@ #' @param greeting A string in Markdown format, containing the initial message #' to display to the user upon first loading the chatbot. If not provided, the #' LLM will be invoked at the start of the conversation to generate one. -#' @param data_description A string in plain text or Markdown format, containing -#' a description of the data frame or any additional context that might be -#' helpful in understanding the data. This will be included in the system -#' prompt for the chat model. If a `system_prompt` argument is provided, the -#' `data_description` argument will be ignored. -#' @param extra_instructions A string in plain text or Markdown format, containing -#' any additional instructions for the chat model. These will be appended at -#' the end of the system prompt. If a `system_prompt` argument is provided, -#' the `extra_instructions` argument will be ignored. -#' @param prompt_path A string containing the path to a custom prompt file. If -#' NULL, the default prompt file in the package will be used. This file should -#' contain a template for the system prompt, with placeholders for the schema, -#' data description, and extra instructions. The default prompt file is -#' located in the `inst/prompt/` directory of the package. -#' @param create_chat_func A function that takes a system prompt and returns a -#' chat object. The default uses `ellmer::chat_openai()`. +#' @param ... Additional arguments passed to the `querychat_system_prompt()` +#' function, such as `data_description`, `extra_instructions`, and +#' `prompt_path`. If a `system_prompt` argument is provided, the +#' `...` arguments will be silently ignored. #' @param system_prompt A string containing the system prompt for the chat model. #' The default uses `querychat_system_prompt()` to generate a generic prompt, #' which you can enhance via the `data_description` and `extra_instructions` #' arguments. +#' @param create_chat_func A function that takes a system prompt and returns a +#' chat object. The default uses `ellmer::chat_openai()`. #' #' @returns An object that can be passed to `querychat_server()` as the #' `querychat_config` argument. By convention, this object should be named @@ -38,20 +28,18 @@ #' #' @export querychat_init <- function( + df, + ..., + tbl_name = deparse(substitute(df)), + greeting = NULL, + system_prompt = querychat_system_prompt( df, - tbl_name = deparse(substitute(df)), - greeting = NULL, - data_description = NULL, - extra_instructions = NULL, - prompt_path = NULL, - create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o"), - system_prompt = querychat_system_prompt( - df, - tbl_name, - data_description = data_description, - extra_instructions = extra_instructions, - prompt_path = prompt_path - )) { + tbl_name, + # By default, pass through any params supplied to querychat_init() + ... + ), + create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o") +) { is_tbl_name_ok <- is.character(tbl_name) && length(tbl_name) == 1 && grepl("^[a-zA-Z][a-zA-Z0-9_]*$", tbl_name, perl = TRUE) @@ -68,7 +56,7 @@ querychat_init <- function( } force(df) - force(system_prompt) + force(system_prompt) # Have default `...` params evaluated force(create_chat_func) # TODO: Provide nicer looking errors here From 98e4bfe6b9e7bab131820a3ef10c9533dc904c74 Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 15:48:03 -0400 Subject: [PATCH 03/13] Assert that `tbl_name`s match in init method and system prompt (when possible) --- pkg-r/R/prompt.R | 43 +++++++++++++++++++++++++------------------ pkg-r/R/querychat.R | 9 +++++++++ 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/pkg-r/R/prompt.R b/pkg-r/R/prompt.R index ae70d2f9..9bfca6eb 100644 --- a/pkg-r/R/prompt.R +++ b/pkg-r/R/prompt.R @@ -22,13 +22,14 @@ #' #' @export querychat_system_prompt <- function( - df, - name, - data_description = NULL, - extra_instructions = NULL, - categorical_threshold = 10, - prompt_path = NULL) { - schema <- df_to_schema(df, name, categorical_threshold) + df, + tbl_name, + data_description = NULL, + extra_instructions = NULL, + categorical_threshold = 10, + prompt_path = system.file("prompt", "prompt.md", package = "querychat") +) { + schema <- df_to_schema(df, tbl_name, categorical_threshold) if (!is.null(data_description)) { data_description <- paste(data_description, collapse = "\n") @@ -47,21 +48,27 @@ querychat_system_prompt <- function( prompt_content <- readLines(prompt_path, warn = FALSE) prompt_text <- paste(prompt_content, collapse = "\n") - whisker::whisker.render( - prompt_text, - list( - schema = schema, - data_description = data_description, - extra_instructions = extra_instructions + processed_template <- + whisker::whisker.render( + prompt_text, + list( + schema = schema, + data_description = data_description, + extra_instructions = extra_instructions + ) ) - ) + + attr(processed_template, "tbl_name") <- tbl_name + + processed_template } df_to_schema <- function( - df, - name = deparse(substitute(df)), - categorical_threshold) { - schema <- c(paste("Table:", name), "Columns:") + df, + tbl_name = deparse(substitute(df)), + categorical_threshold = 10 +) { + schema <- c(paste("Table:", tbl_name), "Columns:") column_info <- lapply(names(df), function(column) { # Map R classes to SQL-like types diff --git a/pkg-r/R/querychat.R b/pkg-r/R/querychat.R index 6fd402fa..d54ded92 100644 --- a/pkg-r/R/querychat.R +++ b/pkg-r/R/querychat.R @@ -67,6 +67,15 @@ querychat_init <- function( "create_chat_func must be a function" = is.function(create_chat_func) ) + if ("tbl_name" %in% names(attributes(system_prompt))) { + # If available, be sure to use the `tbl_name` argument to `querychat_init()` + # matches the one supplied to the system prompt + if (tbl_name != attr(system_prompt, "tbl_name")) { + rlang::abort( + "`querychat_init(tbl_name=)` must match system prompt `tbl_name` supplied to `querychat_system_prompt()`." + ) + } + } if (!is.null(greeting)) { greeting <- paste(collapse = "\n", greeting) } else { From dd5ec6846e2f1eeb05e50ea18c38a7e273efeffc Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 15:48:26 -0400 Subject: [PATCH 04/13] Export `df_to_schema` --- pkg-r/NAMESPACE | 1 + pkg-r/R/prompt.R | 14 +++++++++++++ pkg-r/barret/app.R | 28 ++++++++++++++++++++++++++ pkg-r/man/df_to_schema.Rd | 29 +++++++++++++++++++++++++++ pkg-r/man/querychat_init.Rd | 30 ++++++++++------------------ pkg-r/man/querychat_system_prompt.Rd | 21 ++++++++++++++----- 6 files changed, 99 insertions(+), 24 deletions(-) create mode 100644 pkg-r/barret/app.R create mode 100644 pkg-r/man/df_to_schema.Rd diff --git a/pkg-r/NAMESPACE b/pkg-r/NAMESPACE index d1e39fd8..077a6ed0 100644 --- a/pkg-r/NAMESPACE +++ b/pkg-r/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(df_to_schema) export(querychat_init) export(querychat_server) export(querychat_sidebar) diff --git a/pkg-r/R/prompt.R b/pkg-r/R/prompt.R index 9bfca6eb..ebb121bc 100644 --- a/pkg-r/R/prompt.R +++ b/pkg-r/R/prompt.R @@ -63,6 +63,20 @@ querychat_system_prompt <- function( processed_template } +#' Generate a schema description from a data frame +#' +#' This function generates a schema description for a data frame, including +#' the column names, their types, and additional information such as ranges for +#' numeric columns and unique values for text columns. +#' +#' @param df A data frame to generate schema information from. +#' @param tbl_name A string containing the name of the table in SQL queries. +#' @param categorical_threshold The maximum number of unique values for a text column to be considered categorical. +#' +#' @return A string containing the schema description for the data frame. +#' The schema includes the table name, column names, their types, and additional +#' information such as ranges for numeric columns and unique values for text columns. +#' @export df_to_schema <- function( df, tbl_name = deparse(substitute(df)), diff --git a/pkg-r/barret/app.R b/pkg-r/barret/app.R new file mode 100644 index 00000000..ede637b5 --- /dev/null +++ b/pkg-r/barret/app.R @@ -0,0 +1,28 @@ +library(shiny) +library(bslib) +library(querychat) + +# 1. Configure querychat. This is where you specify the dataset and can also +# override options like the greeting message, system prompt, model, etc. +querychat_config <- querychat_init(mtcars) + +ui <- page_sidebar( + # 2. Use querychat_sidebar(id) in a bslib::page_sidebar. + # Alternatively, use querychat_ui(id) elsewhere if you don't want your + # chat interface to live in a sidebar. + sidebar = querychat_sidebar("chat"), + DT::DTOutput("dt") +) + +server <- function(input, output, session) { + # 3. Create a querychat object using the config from step 1. + querychat <- querychat_server("chat", querychat_config) + + output$dt <- DT::renderDT({ + # 4. Use the filtered/sorted data frame anywhere you wish, via the + # querychat$df() reactive. + DT::datatable(querychat$df()) + }) +} + +shinyApp(ui, server) diff --git a/pkg-r/man/df_to_schema.Rd b/pkg-r/man/df_to_schema.Rd new file mode 100644 index 00000000..e453fec8 --- /dev/null +++ b/pkg-r/man/df_to_schema.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prompt.R +\name{df_to_schema} +\alias{df_to_schema} +\title{Generate a schema description from a data frame} +\usage{ +df_to_schema( + df, + tbl_name = deparse(substitute(df)), + categorical_threshold = 10 +) +} +\arguments{ +\item{df}{A data frame to generate schema information from.} + +\item{tbl_name}{A string containing the name of the table in SQL queries.} + +\item{categorical_threshold}{The maximum number of unique values for a text column to be considered categorical.} +} +\value{ +A string containing the schema description for the data frame. +The schema includes the table name, column names, their types, and additional +information such as ranges for numeric columns and unique values for text columns. +} +\description{ +This function generates a schema description for a data frame, including +the column names, their types, and additional information such as ranges for +numeric columns and unique values for text columns. +} diff --git a/pkg-r/man/querychat_init.Rd b/pkg-r/man/querychat_init.Rd index 260261ae..9026a159 100644 --- a/pkg-r/man/querychat_init.Rd +++ b/pkg-r/man/querychat_init.Rd @@ -6,18 +6,21 @@ \usage{ querychat_init( df, + ..., tbl_name = deparse(substitute(df)), greeting = NULL, - data_description = NULL, - extra_instructions = NULL, - create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o"), - system_prompt = querychat_system_prompt(df, tbl_name, data_description = - data_description, extra_instructions = extra_instructions) + system_prompt = querychat_system_prompt(df, tbl_name, ...), + create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o") ) } \arguments{ \item{df}{A data frame.} +\item{...}{Additional arguments passed to the \code{querychat_system_prompt()} +function, such as \code{data_description}, \code{extra_instructions}, and +\code{prompt_path}. If a \code{system_prompt} argument is provided, the +\code{...} arguments will be silently ignored.} + \item{tbl_name}{A string containing a valid table name for the data frame, that will appear in SQL queries. Ensure that it begins with a letter, and contains only letters, numbers, and underscores. By default, querychat will @@ -27,24 +30,13 @@ try to infer a table name using the name of the \code{df} argument.} to display to the user upon first loading the chatbot. If not provided, the LLM will be invoked at the start of the conversation to generate one.} -\item{data_description}{A string in plain text or Markdown format, containing -a description of the data frame or any additional context that might be -helpful in understanding the data. This will be included in the system -prompt for the chat model. If a \code{system_prompt} argument is provided, the -\code{data_description} argument will be ignored.} - -\item{extra_instructions}{A string in plain text or Markdown format, containing -any additional instructions for the chat model. These will be appended at -the end of the system prompt. If a \code{system_prompt} argument is provided, -the \code{extra_instructions} argument will be ignored.} - -\item{create_chat_func}{A function that takes a system prompt and returns a -chat object. The default uses \code{ellmer::chat_openai()}.} - \item{system_prompt}{A string containing the system prompt for the chat model. The default uses \code{querychat_system_prompt()} to generate a generic prompt, which you can enhance via the \code{data_description} and \code{extra_instructions} arguments.} + +\item{create_chat_func}{A function that takes a system prompt and returns a +chat object. The default uses \code{ellmer::chat_openai()}.} } \value{ An object that can be passed to \code{querychat_server()} as the diff --git a/pkg-r/man/querychat_system_prompt.Rd b/pkg-r/man/querychat_system_prompt.Rd index 31dae21f..e4245ae6 100644 --- a/pkg-r/man/querychat_system_prompt.Rd +++ b/pkg-r/man/querychat_system_prompt.Rd @@ -6,22 +6,33 @@ \usage{ querychat_system_prompt( df, - name, + tbl_name, data_description = NULL, extra_instructions = NULL, - categorical_threshold = 10 + categorical_threshold = 10, + prompt_path = system.file("prompt", "prompt.md", package = "querychat") ) } \arguments{ \item{df}{A data frame to generate schema information from.} -\item{name}{A string containing the name of the table in SQL queries.} +\item{tbl_name}{A string containing the name of the table in SQL queries.} -\item{data_description}{Optional description of the data, in plain text or Markdown format.} +\item{data_description}{Optional string in plain text or Markdown format, containing +a description of the data frame or any additional context that might be +helpful in understanding the data. This will be included in the system +prompt for the chat model.} -\item{extra_instructions}{Optional additional instructions for the chat model, in plain text or Markdown format.} +\item{extra_instructions}{Optional string in plain text or Markdown format, containing +any additional instructions for the chat model. These will be appended at +the end of the system prompt.} \item{categorical_threshold}{The maximum number of unique values for a text column to be considered categorical.} + +\item{prompt_path}{Optional string containing the path to a custom prompt file. If +\code{NULL}, the default prompt file in the package will be used. This file should +contain a whisker template for the system prompt, with placeholders for \code{{{schema}}}, +\code{{{data_description}}} \link{optional}, and \code{{{extra_instructions}}} \link{optional}.} } \value{ A string containing the system prompt for the chat model. From 541d90c38d7bbe33e91f9178b0e7aaaf228f9716 Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 15:49:49 -0400 Subject: [PATCH 05/13] Delete debug app --- pkg-r/barret/app.R | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 pkg-r/barret/app.R diff --git a/pkg-r/barret/app.R b/pkg-r/barret/app.R deleted file mode 100644 index ede637b5..00000000 --- a/pkg-r/barret/app.R +++ /dev/null @@ -1,28 +0,0 @@ -library(shiny) -library(bslib) -library(querychat) - -# 1. Configure querychat. This is where you specify the dataset and can also -# override options like the greeting message, system prompt, model, etc. -querychat_config <- querychat_init(mtcars) - -ui <- page_sidebar( - # 2. Use querychat_sidebar(id) in a bslib::page_sidebar. - # Alternatively, use querychat_ui(id) elsewhere if you don't want your - # chat interface to live in a sidebar. - sidebar = querychat_sidebar("chat"), - DT::DTOutput("dt") -) - -server <- function(input, output, session) { - # 3. Create a querychat object using the config from step 1. - querychat <- querychat_server("chat", querychat_config) - - output$dt <- DT::renderDT({ - # 4. Use the filtered/sorted data frame anywhere you wish, via the - # querychat$df() reactive. - DT::datatable(querychat$df()) - }) -} - -shinyApp(ui, server) From 61c3ab79feb63979f8ef5e01af3d087995273092 Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 15:54:06 -0400 Subject: [PATCH 06/13] `tbl_name` -> `table_name` --- pkg-r/R/prompt.R | 14 ++++++------ pkg-r/R/querychat.R | 32 ++++++++++++++-------------- pkg-r/man/df_to_schema.Rd | 4 ++-- pkg-r/man/querychat_init.Rd | 6 +++--- pkg-r/man/querychat_system_prompt.Rd | 4 ++-- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/pkg-r/R/prompt.R b/pkg-r/R/prompt.R index ebb121bc..746d309b 100644 --- a/pkg-r/R/prompt.R +++ b/pkg-r/R/prompt.R @@ -4,7 +4,7 @@ #' schema and optional additional context and instructions. #' #' @param df A data frame to generate schema information from. -#' @param tbl_name A string containing the name of the table in SQL queries. +#' @param table_name A string containing the name of the table in SQL queries. #' @param data_description Optional string in plain text or Markdown format, containing #' a description of the data frame or any additional context that might be #' helpful in understanding the data. This will be included in the system @@ -23,13 +23,13 @@ #' @export querychat_system_prompt <- function( df, - tbl_name, + table_name, data_description = NULL, extra_instructions = NULL, categorical_threshold = 10, prompt_path = system.file("prompt", "prompt.md", package = "querychat") ) { - schema <- df_to_schema(df, tbl_name, categorical_threshold) + schema <- df_to_schema(df, table_name, categorical_threshold) if (!is.null(data_description)) { data_description <- paste(data_description, collapse = "\n") @@ -58,7 +58,7 @@ querychat_system_prompt <- function( ) ) - attr(processed_template, "tbl_name") <- tbl_name + attr(processed_template, "table_name") <- table_name processed_template } @@ -70,7 +70,7 @@ querychat_system_prompt <- function( #' numeric columns and unique values for text columns. #' #' @param df A data frame to generate schema information from. -#' @param tbl_name A string containing the name of the table in SQL queries. +#' @param table_name A string containing the name of the table in SQL queries. #' @param categorical_threshold The maximum number of unique values for a text column to be considered categorical. #' #' @return A string containing the schema description for the data frame. @@ -79,10 +79,10 @@ querychat_system_prompt <- function( #' @export df_to_schema <- function( df, - tbl_name = deparse(substitute(df)), + table_name = deparse(substitute(df)), categorical_threshold = 10 ) { - schema <- c(paste("Table:", tbl_name), "Columns:") + schema <- c(paste("Table:", table_name), "Columns:") column_info <- lapply(names(df), function(column) { # Map R classes to SQL-like types diff --git a/pkg-r/R/querychat.R b/pkg-r/R/querychat.R index d54ded92..432c5cc5 100644 --- a/pkg-r/R/querychat.R +++ b/pkg-r/R/querychat.R @@ -4,7 +4,7 @@ #' Shiny sessions in the R process. #' #' @param df A data frame. -#' @param tbl_name A string containing a valid table name for the data frame, +#' @param table_name A string containing a valid table name for the data frame, #' that will appear in SQL queries. Ensure that it begins with a letter, and #' contains only letters, numbers, and underscores. By default, querychat will #' try to infer a table name using the name of the `df` argument. @@ -30,27 +30,27 @@ querychat_init <- function( df, ..., - tbl_name = deparse(substitute(df)), + table_name = deparse(substitute(df)), greeting = NULL, system_prompt = querychat_system_prompt( df, - tbl_name, + table_name, # By default, pass through any params supplied to querychat_init() ... ), create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o") ) { - is_tbl_name_ok <- is.character(tbl_name) && - length(tbl_name) == 1 && - grepl("^[a-zA-Z][a-zA-Z0-9_]*$", tbl_name, perl = TRUE) - if (!is_tbl_name_ok) { - if (missing(tbl_name)) { + is_table_name_ok <- is.character(table_name) && + length(table_name) == 1 && + grepl("^[a-zA-Z][a-zA-Z0-9_]*$", table_name, perl = TRUE) + if (!is_table_name_ok) { + if (missing(table_name)) { rlang::abort( - "Unable to infer table name from `df` argument. Please specify `tbl_name` argument explicitly." + "Unable to infer table name from `df` argument. Please specify `table_name` argument explicitly." ) } else { rlang::abort( - "`tbl_name` argument must be a string containing a valid table name." + "`table_name` argument must be a string containing a valid table name." ) } } @@ -62,17 +62,17 @@ querychat_init <- function( # TODO: Provide nicer looking errors here stopifnot( "df must be a data frame" = is.data.frame(df), - "tbl_name must be a string" = is.character(tbl_name), + "table_name must be a string" = is.character(table_name), "system_prompt must be a string" = is.character(system_prompt), "create_chat_func must be a function" = is.function(create_chat_func) ) - if ("tbl_name" %in% names(attributes(system_prompt))) { - # If available, be sure to use the `tbl_name` argument to `querychat_init()` + if ("table_name" %in% names(attributes(system_prompt))) { + # If available, be sure to use the `table_name` argument to `querychat_init()` # matches the one supplied to the system prompt - if (tbl_name != attr(system_prompt, "tbl_name")) { + if (table_name != attr(system_prompt, "table_name")) { rlang::abort( - "`querychat_init(tbl_name=)` must match system prompt `tbl_name` supplied to `querychat_system_prompt()`." + "`querychat_init(table_name=)` must match system prompt `table_name` supplied to `querychat_system_prompt()`." ) } } @@ -86,7 +86,7 @@ querychat_init <- function( } conn <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:") - duckdb::duckdb_register(conn, tbl_name, df, experimental = FALSE) + duckdb::duckdb_register(conn, table_name, df, experimental = FALSE) shiny::onStop(function() DBI::dbDisconnect(conn)) structure( diff --git a/pkg-r/man/df_to_schema.Rd b/pkg-r/man/df_to_schema.Rd index e453fec8..d6060c4c 100644 --- a/pkg-r/man/df_to_schema.Rd +++ b/pkg-r/man/df_to_schema.Rd @@ -6,14 +6,14 @@ \usage{ df_to_schema( df, - tbl_name = deparse(substitute(df)), + table_name = deparse(substitute(df)), categorical_threshold = 10 ) } \arguments{ \item{df}{A data frame to generate schema information from.} -\item{tbl_name}{A string containing the name of the table in SQL queries.} +\item{table_name}{A string containing the name of the table in SQL queries.} \item{categorical_threshold}{The maximum number of unique values for a text column to be considered categorical.} } diff --git a/pkg-r/man/querychat_init.Rd b/pkg-r/man/querychat_init.Rd index 9026a159..26a678ec 100644 --- a/pkg-r/man/querychat_init.Rd +++ b/pkg-r/man/querychat_init.Rd @@ -7,9 +7,9 @@ querychat_init( df, ..., - tbl_name = deparse(substitute(df)), + table_name = deparse(substitute(df)), greeting = NULL, - system_prompt = querychat_system_prompt(df, tbl_name, ...), + system_prompt = querychat_system_prompt(df, table_name, ...), create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o") ) } @@ -21,7 +21,7 @@ function, such as \code{data_description}, \code{extra_instructions}, and \code{prompt_path}. If a \code{system_prompt} argument is provided, the \code{...} arguments will be silently ignored.} -\item{tbl_name}{A string containing a valid table name for the data frame, +\item{table_name}{A string containing a valid table name for the data frame, that will appear in SQL queries. Ensure that it begins with a letter, and contains only letters, numbers, and underscores. By default, querychat will try to infer a table name using the name of the \code{df} argument.} diff --git a/pkg-r/man/querychat_system_prompt.Rd b/pkg-r/man/querychat_system_prompt.Rd index e4245ae6..7e782dd1 100644 --- a/pkg-r/man/querychat_system_prompt.Rd +++ b/pkg-r/man/querychat_system_prompt.Rd @@ -6,7 +6,7 @@ \usage{ querychat_system_prompt( df, - tbl_name, + table_name, data_description = NULL, extra_instructions = NULL, categorical_threshold = 10, @@ -16,7 +16,7 @@ querychat_system_prompt( \arguments{ \item{df}{A data frame to generate schema information from.} -\item{tbl_name}{A string containing the name of the table in SQL queries.} +\item{table_name}{A string containing the name of the table in SQL queries.} \item{data_description}{Optional string in plain text or Markdown format, containing a description of the data frame or any additional context that might be From 721278e4543d23655d864488fd7a263d623365c0 Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 16:01:52 -0400 Subject: [PATCH 07/13] Add prompt_path to python code --- pkg-py/src/querychat/querychat.py | 32 +++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/pkg-py/src/querychat/querychat.py b/pkg-py/src/querychat/querychat.py index 9eba2c47..3c7eae7a 100644 --- a/pkg-py/src/querychat/querychat.py +++ b/pkg-py/src/querychat/querychat.py @@ -145,6 +145,7 @@ def system_prompt( data_description: Optional[str] = None, extra_instructions: Optional[str] = None, categorical_threshold: int = 10, + prompt_path: Optional[Path] = None, ) -> str: """ Create a system prompt for the chat model based on a data source's schema @@ -162,6 +163,9 @@ def system_prompt( categorical_threshold : int, default=10 Threshold for determining if a column is categorical based on number of unique values + prompt_path + Optional `Path` to a custom prompt file. If not provided, the default + querychat template will be used. Returns ------- @@ -170,7 +174,11 @@ def system_prompt( """ # Read the prompt file - prompt_path = Path(__file__).parent / "prompt" / "prompt.md" + if prompt_path is None: + # Default to the prompt file in the same directory as this module + # This allows for easy customization by placing a different prompt.md file there + prompt_path = Path(__file__).parent / "prompt" / "prompt.md" + prompt_text = prompt_path.read_text() return chevron.render( @@ -226,11 +234,14 @@ def df_to_html(df: IntoFrame, maxrows: int = 5) -> str: def init( data_source: IntoFrame | sqlalchemy.Engine, table_name: str, + /, + *, greeting: Optional[str] = None, data_description: Optional[str] = None, extra_instructions: Optional[str] = None, - create_chat_callback: Optional[CreateChatCallback] = None, + prompt_path: Optional[Path] = None, system_prompt_override: Optional[str] = None, + create_chat_callback: Optional[CreateChatCallback] = None, ) -> QueryChatConfig: """ Initialize querychat with any compliant data source. @@ -251,10 +262,22 @@ def init( Description of the data in plain text or Markdown extra_instructions : str, optional Additional instructions for the chat model + prompt_path : Path, optional + Path to a custom prompt file. If not provided, the default querychat + template will be used. This should be a Markdown file that contains the + system prompt template. The mustache template can use the following + variables: + - `{{db_engine}}`: The database engine used (e.g., "DuckDB") + - `{{schema}}`: The schema of the data source, generated by + `data_source.get_schema()` + - `{{data_description}}`: The optional data description provided + - `{{extra_instructions}}`: Any additional instructions provided + system_prompt_override : str, optional + A custom system prompt to use instead of the default. If provided, + `data_description`, `extra_instructions`, and `prompt_path` will be + silently ignored. create_chat_callback : CreateChatCallback, optional A function that creates a chat object - system_prompt_override : str, optional - A custom system prompt to use instead of the default Returns ------- @@ -289,6 +312,7 @@ def init( data_source_obj, data_description, extra_instructions, + prompt_path=prompt_path, ) # Default chat function if none provided From 0981b4574bb8d35a001c91bbbec3fc4c0ab22092 Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 16:11:04 -0400 Subject: [PATCH 08/13] Fix python type checks --- pkg-py/src/querychat/querychat.py | 2 -- pyproject.toml | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg-py/src/querychat/querychat.py b/pkg-py/src/querychat/querychat.py index 3c7eae7a..f4099f5b 100644 --- a/pkg-py/src/querychat/querychat.py +++ b/pkg-py/src/querychat/querychat.py @@ -9,9 +9,7 @@ import chatlas import chevron import narwhals as nw -import pandas as pd import sqlalchemy -from narwhals.typing import IntoFrame from shiny import Inputs, Outputs, Session, module, reactive, ui if TYPE_CHECKING: diff --git a/pyproject.toml b/pyproject.toml index 3ce33dc4..38ceb234 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,6 +108,7 @@ extend-ignore = [ "D104", # Missing docstring in public package "D107", # Missing docstring in __init__ "D205", # 1 blank line required between summary line and description + "UP045", # Use `X | NULL` for type annotations, not `Optional[X]` ] extend-select = [ # "C90", # C90; mccabe: https://docs.astral.sh/ruff/rules/complex-structure/ From bb3fed250797d67dc5aa12ac5a9d498ea150ab6f Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 16:12:43 -0400 Subject: [PATCH 09/13] Create NEWS.md --- pkg-r/NEWS.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 pkg-r/NEWS.md diff --git a/pkg-r/NEWS.md b/pkg-r/NEWS.md new file mode 100644 index 00000000..c3b37413 --- /dev/null +++ b/pkg-r/NEWS.md @@ -0,0 +1,3 @@ +# querychat (development version) + +* Initial CRAN submission. From 7ef7ba076a4441f0b9b52c99a14ab8fd15f20e58 Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 16:12:48 -0400 Subject: [PATCH 10/13] Add news entry --- pkg-r/NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg-r/NEWS.md b/pkg-r/NEWS.md index c3b37413..811fe69b 100644 --- a/pkg-r/NEWS.md +++ b/pkg-r/NEWS.md @@ -1,3 +1,5 @@ # querychat (development version) * Initial CRAN submission. + +* Added `prompt_path` support for `querychat_system_prompt()`. (Thank you, @oacar! #37) From 0b45c6685ff0e94da2478037ba9bb7e198792ca7 Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 16:15:06 -0400 Subject: [PATCH 11/13] Fix R doc link --- pkg-r/R/prompt.R | 2 +- pkg-r/man/querychat_system_prompt.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg-r/R/prompt.R b/pkg-r/R/prompt.R index 746d309b..10bfc977 100644 --- a/pkg-r/R/prompt.R +++ b/pkg-r/R/prompt.R @@ -16,7 +16,7 @@ #' @param prompt_path Optional string containing the path to a custom prompt file. If #' `NULL`, the default prompt file in the package will be used. This file should #' contain a whisker template for the system prompt, with placeholders for `{{schema}}`, -#' `{{data_description}}` [optional], and `{{extra_instructions}}` [optional]. +#' `{{data_description}}`, and `{{extra_instructions}}`. #' #' @return A string containing the system prompt for the chat model. #' diff --git a/pkg-r/man/querychat_system_prompt.Rd b/pkg-r/man/querychat_system_prompt.Rd index 7e782dd1..a62b0ac3 100644 --- a/pkg-r/man/querychat_system_prompt.Rd +++ b/pkg-r/man/querychat_system_prompt.Rd @@ -32,7 +32,7 @@ the end of the system prompt.} \item{prompt_path}{Optional string containing the path to a custom prompt file. If \code{NULL}, the default prompt file in the package will be used. This file should contain a whisker template for the system prompt, with placeholders for \code{{{schema}}}, -\code{{{data_description}}} \link{optional}, and \code{{{extra_instructions}}} \link{optional}.} +\code{{{data_description}}}, and \code{{{extra_instructions}}}.} } \value{ A string containing the system prompt for the chat model. From e8fe2a25d5868807f070ffa253c48decb948678f Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Fri, 27 Jun 2025 16:19:58 -0400 Subject: [PATCH 12/13] lints post merge --- pkg-py/src/querychat/__init__.py | 6 ++++-- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg-py/src/querychat/__init__.py b/pkg-py/src/querychat/__init__.py index 985d24f5..660a202f 100644 --- a/pkg-py/src/querychat/__init__.py +++ b/pkg-py/src/querychat/__init__.py @@ -1,3 +1,5 @@ -from querychat.querychat import init, mod_server as server, sidebar, system_prompt, mod_ui as ui +from querychat.querychat import init, sidebar, system_prompt +from querychat.querychat import mod_server as server +from querychat.querychat import mod_ui as ui -__all__ = ["init", "server", "sidebar", "ui", "system_prompt"] +__all__ = ["init", "server", "sidebar", "system_prompt", "ui"] diff --git a/pyproject.toml b/pyproject.toml index e59cb398..c5a1787e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ exclude = [ "node_modules", "site-packages", "venv", - "app-*.py", # ignore example apps for now + "examples", # ignore example apps for now ] line-length = 88 From 0ef38bd875136604d42ceef2c074435e8b2c7149 Mon Sep 17 00:00:00 2001 From: Barret Schloerke Date: Tue, 1 Jul 2025 11:49:36 -0400 Subject: [PATCH 13/13] Add back `data_description` and `extra_instructions` to `querychat_init` for better autocomplete --- pkg-r/R/querychat.R | 14 +++++++++----- pkg-r/man/querychat_init.Rd | 20 ++++++++++++++++---- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/pkg-r/R/querychat.R b/pkg-r/R/querychat.R index 432c5cc5..eca6a0b8 100644 --- a/pkg-r/R/querychat.R +++ b/pkg-r/R/querychat.R @@ -12,16 +12,16 @@ #' to display to the user upon first loading the chatbot. If not provided, the #' LLM will be invoked at the start of the conversation to generate one. #' @param ... Additional arguments passed to the `querychat_system_prompt()` -#' function, such as `data_description`, `extra_instructions`, and -#' `prompt_path`. If a `system_prompt` argument is provided, the -#' `...` arguments will be silently ignored. +#' function, such as `categorical_threshold`, and `prompt_path`. If a +#' `system_prompt` argument is provided, the `...` arguments will be silently +#' ignored. +#' @inheritParams querychat_system_prompt #' @param system_prompt A string containing the system prompt for the chat model. #' The default uses `querychat_system_prompt()` to generate a generic prompt, #' which you can enhance via the `data_description` and `extra_instructions` #' arguments. #' @param create_chat_func A function that takes a system prompt and returns a #' chat object. The default uses `ellmer::chat_openai()`. -#' #' @returns An object that can be passed to `querychat_server()` as the #' `querychat_config` argument. By convention, this object should be named #' `querychat_config`. @@ -32,11 +32,15 @@ querychat_init <- function( ..., table_name = deparse(substitute(df)), greeting = NULL, + data_description = NULL, + extra_instructions = NULL, system_prompt = querychat_system_prompt( df, table_name, # By default, pass through any params supplied to querychat_init() - ... + ..., + data_description = data_description, + extra_instructions = extra_instructions ), create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o") ) { diff --git a/pkg-r/man/querychat_init.Rd b/pkg-r/man/querychat_init.Rd index 26a678ec..5a0b0c84 100644 --- a/pkg-r/man/querychat_init.Rd +++ b/pkg-r/man/querychat_init.Rd @@ -9,7 +9,10 @@ querychat_init( ..., table_name = deparse(substitute(df)), greeting = NULL, - system_prompt = querychat_system_prompt(df, table_name, ...), + data_description = NULL, + extra_instructions = NULL, + system_prompt = querychat_system_prompt(df, table_name, ..., data_description = + data_description, extra_instructions = extra_instructions), create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o") ) } @@ -17,9 +20,9 @@ querychat_init( \item{df}{A data frame.} \item{...}{Additional arguments passed to the \code{querychat_system_prompt()} -function, such as \code{data_description}, \code{extra_instructions}, and -\code{prompt_path}. If a \code{system_prompt} argument is provided, the -\code{...} arguments will be silently ignored.} +function, such as \code{categorical_threshold}, and \code{prompt_path}. If a +\code{system_prompt} argument is provided, the \code{...} arguments will be silently +ignored.} \item{table_name}{A string containing a valid table name for the data frame, that will appear in SQL queries. Ensure that it begins with a letter, and @@ -30,6 +33,15 @@ try to infer a table name using the name of the \code{df} argument.} to display to the user upon first loading the chatbot. If not provided, the LLM will be invoked at the start of the conversation to generate one.} +\item{data_description}{Optional string in plain text or Markdown format, containing +a description of the data frame or any additional context that might be +helpful in understanding the data. This will be included in the system +prompt for the chat model.} + +\item{extra_instructions}{Optional string in plain text or Markdown format, containing +any additional instructions for the chat model. These will be appended at +the end of the system prompt.} + \item{system_prompt}{A string containing the system prompt for the chat model. The default uses \code{querychat_system_prompt()} to generate a generic prompt, which you can enhance via the \code{data_description} and \code{extra_instructions}