diff --git a/pkg-py/src/querychat/querychat.py b/pkg-py/src/querychat/querychat.py index 865facca9..a999f0c7c 100644 --- a/pkg-py/src/querychat/querychat.py +++ b/pkg-py/src/querychat/querychat.py @@ -139,7 +139,7 @@ def system_prompt( data_description: Optional[str | Path] = None, extra_instructions: Optional[str | Path] = None, categorical_threshold: int = 10, - prompt_path: Optional[Path] = None, + prompt_template: Optional[str | Path] = None, ) -> str: """ Create a system prompt for the chat model based on a data source's schema @@ -157,8 +157,8 @@ def system_prompt( categorical_threshold : int, default=10 Threshold for determining if a column is categorical based on number of unique values - prompt_path - Optional `Path` to a custom prompt file. If not provided, the default + prompt_template + Optional `Path` to or string of a custom prompt template. If not provided, the default querychat template will be used. Returns @@ -168,27 +168,30 @@ def system_prompt( """ # Read the prompt file - if prompt_path is None: + if prompt_template is None: # Default to the prompt file in the same directory as this module # This allows for easy customization by placing a different prompt.md file there - prompt_path = Path(__file__).parent / "prompt" / "prompt.md" - - prompt_text = prompt_path.read_text() + prompt_template = Path(__file__).parent / "prompt" / "prompt.md" + prompt_str = ( + prompt_template.read_text() + if isinstance(prompt_template, Path) + else prompt_template + ) - data_description_str: str | None = ( + data_description_str = ( data_description.read_text() if isinstance(data_description, Path) else data_description ) - extra_instructions_str: str | None = ( + extra_instructions_str = ( extra_instructions.read_text() if isinstance(extra_instructions, Path) else extra_instructions ) return chevron.render( - prompt_text, + prompt_str, { "db_engine": data_source.db_engine, "schema": data_source.get_schema( @@ -244,7 +247,7 @@ def init( greeting: Optional[str | Path] = None, data_description: Optional[str | Path] = None, extra_instructions: Optional[str | Path] = None, - prompt_path: Optional[Path] = None, + prompt_template: Optional[str | Path] = None, system_prompt_override: Optional[str] = None, create_chat_callback: Optional[CreateChatCallback] = None, ) -> QueryChatConfig: @@ -273,8 +276,8 @@ def init( Additional instructions for the chat model. If a pathlib.Path object is passed, querychat will read the contents of the path into a string with `.read_text()`. - prompt_path : Path, optional - Path to a custom prompt file. If not provided, the default querychat + prompt_template : Path, optional + Path to or a string of a custom prompt file. If not provided, the default querychat template will be used. This should be a Markdown file that contains the system prompt template. The mustache template can use the following variables: @@ -285,7 +288,7 @@ def init( - `{{extra_instructions}}`: Any additional instructions provided system_prompt_override : str, optional A custom system prompt to use instead of the default. If provided, - `data_description`, `extra_instructions`, and `prompt_path` will be + `data_description`, `extra_instructions`, and `prompt_template` will be silently ignored. create_chat_callback : CreateChatCallback, optional A function that creates a chat object @@ -331,7 +334,7 @@ def init( data_source_obj, data_description=data_description, extra_instructions=extra_instructions, - prompt_path=prompt_path, + prompt_template=prompt_template, ) # Default chat function if none provided diff --git a/pkg-r/NEWS.md b/pkg-r/NEWS.md index 811fe69bb..81a3e390b 100644 --- a/pkg-r/NEWS.md +++ b/pkg-r/NEWS.md @@ -2,4 +2,4 @@ * Initial CRAN submission. -* Added `prompt_path` support for `querychat_system_prompt()`. (Thank you, @oacar! #37) +* Added `prompt_template` support for `querychat_system_prompt()`. (Thank you, @oacar! #37, #45) diff --git a/pkg-r/R/prompt.R b/pkg-r/R/prompt.R index 10bfc977e..a8e9deb96 100644 --- a/pkg-r/R/prompt.R +++ b/pkg-r/R/prompt.R @@ -5,18 +5,21 @@ #' #' @param df A data frame to generate schema information from. #' @param table_name A string containing the name of the table in SQL queries. -#' @param data_description Optional string in plain text or Markdown format, containing -#' a description of the data frame or any additional context that might be -#' helpful in understanding the data. This will be included in the system -#' prompt for the chat model. -#' @param extra_instructions Optional string in plain text or Markdown format, containing -#' any additional instructions for the chat model. These will be appended at -#' the end of the system prompt. -#' @param categorical_threshold The maximum number of unique values for a text column to be considered categorical. -#' @param prompt_path Optional string containing the path to a custom prompt file. If -#' `NULL`, the default prompt file in the package will be used. This file should -#' contain a whisker template for the system prompt, with placeholders for `{{schema}}`, -#' `{{data_description}}`, and `{{extra_instructions}}`. +#' @param data_description Optional string or existing file path. The contents +#' should be in plain text or Markdown format, containing a description of the +#' data frame or any additional context that might be helpful in understanding +#' the data. This will be included in the system prompt for the chat model. +#' @param extra_instructions Optional string or existing file path. The contents +#' should be in plain text or Markdown format, containing any additional +#' instructions for the chat model. These will be appended at the end of the +#' system prompt. +#' @param prompt_template Optional string or existing file path. If `NULL`, the +#' default prompt file in the package will be used. The contents should +#' contain a whisker template for the system prompt, with placeholders for +#' `{{schema}}`, `{{data_description}}`, and `{{extra_instructions}}`. +#' @param categorical_threshold The maximum number of unique values for a text +#' column to be considered categorical. +#' @param ... Ignored. Used to allow for future parameters. #' #' @return A string containing the system prompt for the chat model. #' @@ -24,29 +27,25 @@ querychat_system_prompt <- function( df, table_name, + ..., data_description = NULL, extra_instructions = NULL, - categorical_threshold = 10, - prompt_path = system.file("prompt", "prompt.md", package = "querychat") + prompt_template = NULL, + categorical_threshold = 10 ) { - schema <- df_to_schema(df, table_name, categorical_threshold) + rlang::check_dots_empty() - if (!is.null(data_description)) { - data_description <- paste(data_description, collapse = "\n") - } - if (!is.null(extra_instructions)) { - extra_instructions <- paste(extra_instructions, collapse = "\n") - } + schema <- df_to_schema(df, table_name, categorical_threshold) - # Read the prompt file - if (is.null(prompt_path)) { - prompt_path <- system.file("prompt", "prompt.md", package = "querychat") - } - if (!file.exists(prompt_path)) { - stop("Prompt file not found at: ", prompt_path) + data_description <- read_path_or_string(data_description, "data_description") + extra_instructions <- read_path_or_string( + extra_instructions, + "extra_instructions" + ) + if (is.null(prompt_template)) { + prompt_template <- system.file("prompt", "prompt.md", package = "querychat") } - prompt_content <- readLines(prompt_path, warn = FALSE) - prompt_text <- paste(prompt_content, collapse = "\n") + prompt_text <- read_path_or_string(prompt_template, "prompt_template") processed_template <- whisker::whisker.render( @@ -63,6 +62,20 @@ querychat_system_prompt <- function( processed_template } +read_path_or_string <- function(x, name) { + if (is.null(x)) { + return(NULL) + } + if (!is.character(x)) { + stop(sprintf("`%s=` must be a string or a path to a file.", name)) + } + if (file.exists(x)) { + x <- readLines(x, warn = FALSE) + } + return(paste(x, collapse = "\n")) +} + + #' Generate a schema description from a data frame #' #' This function generates a schema description for a data frame, including diff --git a/pkg-r/R/querychat.R b/pkg-r/R/querychat.R index eca6a0b88..27601e94a 100644 --- a/pkg-r/R/querychat.R +++ b/pkg-r/R/querychat.R @@ -12,7 +12,7 @@ #' to display to the user upon first loading the chatbot. If not provided, the #' LLM will be invoked at the start of the conversation to generate one. #' @param ... Additional arguments passed to the `querychat_system_prompt()` -#' function, such as `categorical_threshold`, and `prompt_path`. If a +#' function, such as `categorical_threshold`. If a #' `system_prompt` argument is provided, the `...` arguments will be silently #' ignored. #' @inheritParams querychat_system_prompt @@ -34,13 +34,15 @@ querychat_init <- function( greeting = NULL, data_description = NULL, extra_instructions = NULL, + prompt_template = NULL, system_prompt = querychat_system_prompt( df, table_name, # By default, pass through any params supplied to querychat_init() ..., data_description = data_description, - extra_instructions = extra_instructions + extra_instructions = extra_instructions, + prompt_template = prompt_template ), create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o") ) { diff --git a/pkg-r/man/querychat_init.Rd b/pkg-r/man/querychat_init.Rd index 5a0b0c842..b2e355df7 100644 --- a/pkg-r/man/querychat_init.Rd +++ b/pkg-r/man/querychat_init.Rd @@ -11,8 +11,10 @@ querychat_init( greeting = NULL, data_description = NULL, extra_instructions = NULL, + prompt_template = NULL, system_prompt = querychat_system_prompt(df, table_name, ..., data_description = - data_description, extra_instructions = extra_instructions), + data_description, extra_instructions = extra_instructions, prompt_template = + prompt_template), create_chat_func = purrr::partial(ellmer::chat_openai, model = "gpt-4o") ) } @@ -20,7 +22,7 @@ querychat_init( \item{df}{A data frame.} \item{...}{Additional arguments passed to the \code{querychat_system_prompt()} -function, such as \code{categorical_threshold}, and \code{prompt_path}. If a +function, such as \code{categorical_threshold}. If a \code{system_prompt} argument is provided, the \code{...} arguments will be silently ignored.} @@ -33,14 +35,20 @@ try to infer a table name using the name of the \code{df} argument.} to display to the user upon first loading the chatbot. If not provided, the LLM will be invoked at the start of the conversation to generate one.} -\item{data_description}{Optional string in plain text or Markdown format, containing -a description of the data frame or any additional context that might be -helpful in understanding the data. This will be included in the system -prompt for the chat model.} +\item{data_description}{Optional string or existing file path. The contents +should be in plain text or Markdown format, containing a description of the +data frame or any additional context that might be helpful in understanding +the data. This will be included in the system prompt for the chat model.} -\item{extra_instructions}{Optional string in plain text or Markdown format, containing -any additional instructions for the chat model. These will be appended at -the end of the system prompt.} +\item{extra_instructions}{Optional string or existing file path. The contents +should be in plain text or Markdown format, containing any additional +instructions for the chat model. These will be appended at the end of the +system prompt.} + +\item{prompt_template}{Optional string or existing file path. If \code{NULL}, the +default prompt file in the package will be used. The contents should +contain a whisker template for the system prompt, with placeholders for +\code{{{schema}}}, \code{{{data_description}}}, and \code{{{extra_instructions}}}.} \item{system_prompt}{A string containing the system prompt for the chat model. The default uses \code{querychat_system_prompt()} to generate a generic prompt, diff --git a/pkg-r/man/querychat_system_prompt.Rd b/pkg-r/man/querychat_system_prompt.Rd index a62b0ac3b..9c5a0e955 100644 --- a/pkg-r/man/querychat_system_prompt.Rd +++ b/pkg-r/man/querychat_system_prompt.Rd @@ -7,10 +7,11 @@ querychat_system_prompt( df, table_name, + ..., data_description = NULL, extra_instructions = NULL, - categorical_threshold = 10, - prompt_path = system.file("prompt", "prompt.md", package = "querychat") + prompt_template = NULL, + categorical_threshold = 10 ) } \arguments{ @@ -18,21 +19,25 @@ querychat_system_prompt( \item{table_name}{A string containing the name of the table in SQL queries.} -\item{data_description}{Optional string in plain text or Markdown format, containing -a description of the data frame or any additional context that might be -helpful in understanding the data. This will be included in the system -prompt for the chat model.} +\item{...}{Ignored. Used to allow for future parameters.} -\item{extra_instructions}{Optional string in plain text or Markdown format, containing -any additional instructions for the chat model. These will be appended at -the end of the system prompt.} +\item{data_description}{Optional string or existing file path. The contents +should be in plain text or Markdown format, containing a description of the +data frame or any additional context that might be helpful in understanding +the data. This will be included in the system prompt for the chat model.} -\item{categorical_threshold}{The maximum number of unique values for a text column to be considered categorical.} +\item{extra_instructions}{Optional string or existing file path. The contents +should be in plain text or Markdown format, containing any additional +instructions for the chat model. These will be appended at the end of the +system prompt.} -\item{prompt_path}{Optional string containing the path to a custom prompt file. If -\code{NULL}, the default prompt file in the package will be used. This file should -contain a whisker template for the system prompt, with placeholders for \code{{{schema}}}, -\code{{{data_description}}}, and \code{{{extra_instructions}}}.} +\item{prompt_template}{Optional string or existing file path. If \code{NULL}, the +default prompt file in the package will be used. The contents should +contain a whisker template for the system prompt, with placeholders for +\code{{{schema}}}, \code{{{data_description}}}, and \code{{{extra_instructions}}}.} + +\item{categorical_threshold}{The maximum number of unique values for a text +column to be considered categorical.} } \value{ A string containing the system prompt for the chat model.