code cleanup, better tests, and dropping glue dependency

npelikan · npelikan · commit d45820fbe447 · 2025-07-09T16:53:28.000-06:00
diff --git a/pkg-r/DESCRIPTION b/pkg-r/DESCRIPTION
@@ -21,7 +21,6 @@ Imports:
     dplyr,
     duckdb,
     ellmer,
-    glue,
     htmltools,
     purrr,
     rlang,
diff --git a/pkg-r/R/data_source.R b/pkg-r/R/data_source.R
@@ -67,8 +67,10 @@ querychat_data_source.DBIConnection <- function(
   }
 
   if (!DBI::dbExistsTable(x, table_name)) {
-    rlang::abort(glue::glue(
-      "Table '{table_name}' not found in database. If you're using databricks, try setting the 'Catalog' and 'Schema' arguments to DBI::dbConnect"
+    rlang::abort(paste0(
+      "Table '",
+      table_name,
+      "' not found in database. If you're using databricks, try setting the 'Catalog' and 'Schema' arguments to DBI::dbConnect"
     ))
   }
 
@@ -262,7 +264,7 @@ get_schema.dbi_source <- function(source, ...) {
   columns <- DBI::dbListFields(conn, table_name)
 
   schema_lines <- c(
-    glue::glue("Table: {table_name}"),
+    paste("Table:", table_name),
     "Columns:"
   )
 
@@ -272,9 +274,10 @@ get_schema.dbi_source <- function(source, ...) {
   text_columns <- character(0)
 
   # Get sample of data to determine types
-  sample_query <- glue::glue_sql(
-    "SELECT * FROM {`table_name`} LIMIT 1",
-    .con = conn
+  sample_query <- paste0(
+    "SELECT * FROM ",
+    DBI::dbQuoteIdentifier(conn, table_name),
+    " LIMIT 1"
   )
   sample_data <- DBI::dbGetQuery(conn, sample_query)
 
@@ -288,16 +291,28 @@ get_schema.dbi_source <- function(source, ...) {
       numeric_columns <- c(numeric_columns, col)
       select_parts <- c(
         select_parts,
-        glue::glue_sql("MIN({`col`}) as {`col`}__min", .con = conn),
-        glue::glue_sql("MAX({`col`}) as {`col`}__max", .con = conn)
+        paste0(
+          "MIN(",
+          DBI::dbQuoteIdentifier(conn, col),
+          ") as ",
+          DBI::dbQuoteIdentifier(conn, paste0(col, '__min'))
+        ),
+        paste0(
+          "MAX(",
+          DBI::dbQuoteIdentifier(conn, col),
+          ") as ",
+          DBI::dbQuoteIdentifier(conn, paste0(col, '__max'))
+        )
       )
     } else if (col_class %in% c("character", "factor")) {
       text_columns <- c(text_columns, col)
       select_parts <- c(
         select_parts,
-        glue::glue_sql(
-          "COUNT(DISTINCT {`col`}) as {`col`}__distinct_count",
-          .con = conn
+        paste0(
+          "COUNT(DISTINCT ",
+          DBI::dbQuoteIdentifier(conn, col),
+          ") as ",
+          DBI::dbQuoteIdentifier(conn, paste0(col, '__distinct_count'))
         )
       )
     }
@@ -308,9 +323,11 @@ get_schema.dbi_source <- function(source, ...) {
   if (length(select_parts) > 0) {
     tryCatch(
       {
-        stats_query <- glue::glue_sql(
-          "SELECT {select_parts*} FROM {`table_name`}",
-          .con = conn
+        stats_query <- paste0(
+          "SELECT ",
+          paste0(select_parts, collapse = ", "),
+          " FROM ",
+          DBI::dbQuoteIdentifier(conn, table_name)
         )
         result <- DBI::dbGetQuery(conn, stats_query)
         if (nrow(result) > 0) {
@@ -327,11 +344,6 @@ get_schema.dbi_source <- function(source, ...) {
   categorical_values <- list()
   text_cols_to_query <- character(0)
 
-  # Always include the 'name' field from test_df for test case in tests/testthat/test-data-source.R
-  if ("name" %in% text_columns) {
-    text_cols_to_query <- c(text_cols_to_query, "name")
-  }
-
   for (col_name in text_columns) {
     distinct_count_key <- paste0(col_name, "__distinct_count")
     if (
@@ -352,9 +364,15 @@ get_schema.dbi_source <- function(source, ...) {
     for (col_name in text_cols_to_query) {
       tryCatch(
         {
-          cat_query <- glue::glue_sql(
-            "SELECT DISTINCT {`col_name`} FROM {`table_name`} WHERE {`col_name`} IS NOT NULL ORDER BY {`col_name`}",
-            .con = conn
+          cat_query <- paste0(
+            "SELECT DISTINCT ",
+            DBI::dbQuoteIdentifier(conn, col_name),
+            " FROM ",
+            DBI::dbQuoteIdentifier(conn, table_name),
+            " WHERE ",
+            DBI::dbQuoteIdentifier(conn, col_name),
+            " IS NOT NULL ORDER BY ",
+            DBI::dbQuoteIdentifier(conn, col_name)
           )
           result <- DBI::dbGetQuery(conn, cat_query)
           if (nrow(result) > 0) {
@@ -373,7 +391,7 @@ get_schema.dbi_source <- function(source, ...) {
     col_class <- class(sample_data[[col]])[1]
     sql_type <- r_class_to_sql_type(col_class)
 
-    column_info <- glue::glue("- {col} ({sql_type})")
+    column_info <- paste0("- ", col, " (", sql_type, ")")
 
     # Add range info for numeric columns
     if (col %in% numeric_columns) {
@@ -386,8 +404,11 @@ get_schema.dbi_source <- function(source, ...) {
           !is.na(column_stats[[min_key]]) &&
           !is.na(column_stats[[max_key]])
       ) {
-        range_info <- glue::glue(
-          "  Range: {column_stats[[min_key]]} to {column_stats[[max_key]]}"
+        range_info <- paste0(
+          "  Range: ",
+          column_stats[[min_key]],
+          " to ",
+          column_stats[[max_key]]
         )
         column_info <- paste(column_info, range_info, sep = "\n")
       }
@@ -398,7 +419,7 @@ get_schema.dbi_source <- function(source, ...) {
       values <- categorical_values[[col]]
       if (length(values) > 0) {
         values_str <- paste0("'", values, "'", collapse = ", ")
-        cat_info <- glue::glue("  Categorical values: {values_str}")
+        cat_info <- paste0("  Categorical values: ", values_str)
         column_info <- paste(column_info, cat_info, sep = "\n")
       }
     }
diff --git a/pkg-r/R/querychat.R b/pkg-r/R/querychat.R
@@ -76,15 +76,6 @@ querychat_init <- function(
     "create_chat_func must be a function" = is.function(create_chat_func)
   )
 
-  if ("table_name" %in% names(attributes(system_prompt))) {
-    # If available, be sure to use the `table_name` argument to `querychat_init()`
-    # matches the one supplied to the system prompt
-    if (table_name != attr(system_prompt, "table_name")) {
-      rlang::abort(
-        "`querychat_init(table_name=)` must match system prompt `table_name` supplied to `querychat_system_prompt()`."
-      )
-    }
-  }
   if (!is.null(greeting)) {
     greeting <- paste(collapse = "\n", greeting)
   } else {
@@ -307,8 +298,12 @@ df_to_html <- function(df, maxrows = 5) {
     paste(collapse = "\n")
 
   if (nrow(df_short) != nrow(df)) {
-    rows_notice <- glue::glue(
-      "\n\n(Showing only the first {maxrows} rows out of {nrow(df)}.)\n"
+    rows_notice <- paste0(
+      "\n\n(Showing only the first ",
+      maxrows,
+      " rows out of ",
+      nrow(df),
+      ".)\n"
     )
   } else {
     rows_notice <- ""
diff --git a/pkg-r/tests/testthat/test-data-source.R b/pkg-r/tests/testthat/test-data-source.R
@@ -63,11 +63,14 @@ test_that("get_schema methods return proper schema", {
   df_source <- querychat_data_source(test_df, table_name = "test_table")
   schema <- get_schema(df_source)
   expect_type(schema, "character")
-  expect_true(grepl("Table: test_table", schema))
-  expect_true(grepl("id \\(INTEGER\\)", schema))
-  expect_true(grepl("name \\(TEXT\\)", schema))
-  expect_true(grepl("active \\(BOOLEAN\\)", schema))
-  expect_true(grepl("Categorical values", schema)) # Should list categorical values
+  expect_match(schema, "Table: test_table")
+  expect_match(schema, "id \\(INTEGER\\)")
+  expect_match(schema, "name \\(TEXT\\)")
+  expect_match(schema, "active \\(BOOLEAN\\)")
+  expect_match(schema, "Categorical values") # Should list categorical values
+
+  # Test min/max values in schema - specifically for the id column
+  expect_match(schema, "- id \\(INTEGER\\)\\n  Range: 1 to 5")
 
   # Test with DBI source
   temp_db <- tempfile(fileext = ".db")
@@ -77,9 +80,12 @@ test_that("get_schema methods return proper schema", {
   dbi_source <- querychat_data_source(conn, "test_table")
   schema <- get_schema(dbi_source)
   expect_type(schema, "character")
-  expect_true(grepl("Table: test_table", schema))
-  expect_true(grepl("id \\(INTEGER\\)", schema))
-  expect_true(grepl("name \\(TEXT\\)", schema))
+  expect_match(schema, "Table: test_table")
+  expect_match(schema, "id \\(INTEGER\\)")
+  expect_match(schema, "name \\(TEXT\\)")
+
+  # Test min/max values in DBI source schema - specifically for the id column
+  expect_match(schema, "- id \\(INTEGER\\)\\n  Range: 1 to 5")
 
   # Clean up
   cleanup_source(df_source)
@@ -155,6 +161,28 @@ test_that("get_lazy_data returns tbl objects", {
   unlink(temp_db)
 })
 
+test_that("get_schema correctly reports min/max values for numeric columns", {
+  # Create a dataframe with multiple numeric columns
+  test_df <- data.frame(
+    id = 1:5,
+    score = c(10.5, 20.3, 15.7, 30.1, 25.9),
+    count = c(100, 200, 150, 50, 75),
+    stringsAsFactors = FALSE
+  )
+
+  df_source <- querychat_data_source(test_df, table_name = "test_metrics")
+  schema <- get_schema(df_source)
+
+  # Check that each numeric column has the correct min/max values
+  expect_match(schema, "- id \\(INTEGER\\)\\n  Range: 1 to 5")
+  expect_match(schema, "- score \\(FLOAT\\)\\n  Range: 10\\.5 to 30\\.1")
+  # Note: In the test output, count was detected as FLOAT rather than INTEGER
+  expect_match(schema, "- count \\(FLOAT\\)\\n  Range: 50 to 200")
+
+  # Clean up
+  cleanup_source(df_source)
+})
+
 test_that("create_system_prompt generates appropriate system prompt", {
   test_df <- data.frame(
     id = 1:3,
@@ -169,8 +197,8 @@ test_that("create_system_prompt generates appropriate system prompt", {
   )
   expect_type(prompt, "character")
   expect_true(nchar(prompt) > 0)
-  expect_true(grepl("A test dataframe", prompt))
-  expect_true(grepl("Table: test_table", prompt))
+  expect_match(prompt, "A test dataframe")
+  expect_match(prompt, "Table: test_table")
 
   # Clean up
   cleanup_source(df_source)

-Original file line number
+Diff line change
     dplyr,
     duckdb,
     ellmer,
 -    glue,
     htmltools,
     purrr,
     rlang,