Skip to content

Commit d45820f

Browse files
committed
code cleanup, better tests, and dropping glue dependency
1 parent e6731be commit d45820f

File tree

4 files changed

+91
-48
lines changed

4 files changed

+91
-48
lines changed

pkg-r/DESCRIPTION

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ Imports:
2121
dplyr,
2222
duckdb,
2323
ellmer,
24-
glue,
2524
htmltools,
2625
purrr,
2726
rlang,

pkg-r/R/data_source.R

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,10 @@ querychat_data_source.DBIConnection <- function(
6767
}
6868

6969
if (!DBI::dbExistsTable(x, table_name)) {
70-
rlang::abort(glue::glue(
71-
"Table '{table_name}' not found in database. If you're using databricks, try setting the 'Catalog' and 'Schema' arguments to DBI::dbConnect"
70+
rlang::abort(paste0(
71+
"Table '",
72+
table_name,
73+
"' not found in database. If you're using databricks, try setting the 'Catalog' and 'Schema' arguments to DBI::dbConnect"
7274
))
7375
}
7476

@@ -262,7 +264,7 @@ get_schema.dbi_source <- function(source, ...) {
262264
columns <- DBI::dbListFields(conn, table_name)
263265

264266
schema_lines <- c(
265-
glue::glue("Table: {table_name}"),
267+
paste("Table:", table_name),
266268
"Columns:"
267269
)
268270

@@ -272,9 +274,10 @@ get_schema.dbi_source <- function(source, ...) {
272274
text_columns <- character(0)
273275

274276
# Get sample of data to determine types
275-
sample_query <- glue::glue_sql(
276-
"SELECT * FROM {`table_name`} LIMIT 1",
277-
.con = conn
277+
sample_query <- paste0(
278+
"SELECT * FROM ",
279+
DBI::dbQuoteIdentifier(conn, table_name),
280+
" LIMIT 1"
278281
)
279282
sample_data <- DBI::dbGetQuery(conn, sample_query)
280283

@@ -288,16 +291,28 @@ get_schema.dbi_source <- function(source, ...) {
288291
numeric_columns <- c(numeric_columns, col)
289292
select_parts <- c(
290293
select_parts,
291-
glue::glue_sql("MIN({`col`}) as {`col`}__min", .con = conn),
292-
glue::glue_sql("MAX({`col`}) as {`col`}__max", .con = conn)
294+
paste0(
295+
"MIN(",
296+
DBI::dbQuoteIdentifier(conn, col),
297+
") as ",
298+
DBI::dbQuoteIdentifier(conn, paste0(col, '__min'))
299+
),
300+
paste0(
301+
"MAX(",
302+
DBI::dbQuoteIdentifier(conn, col),
303+
") as ",
304+
DBI::dbQuoteIdentifier(conn, paste0(col, '__max'))
305+
)
293306
)
294307
} else if (col_class %in% c("character", "factor")) {
295308
text_columns <- c(text_columns, col)
296309
select_parts <- c(
297310
select_parts,
298-
glue::glue_sql(
299-
"COUNT(DISTINCT {`col`}) as {`col`}__distinct_count",
300-
.con = conn
311+
paste0(
312+
"COUNT(DISTINCT ",
313+
DBI::dbQuoteIdentifier(conn, col),
314+
") as ",
315+
DBI::dbQuoteIdentifier(conn, paste0(col, '__distinct_count'))
301316
)
302317
)
303318
}
@@ -308,9 +323,11 @@ get_schema.dbi_source <- function(source, ...) {
308323
if (length(select_parts) > 0) {
309324
tryCatch(
310325
{
311-
stats_query <- glue::glue_sql(
312-
"SELECT {select_parts*} FROM {`table_name`}",
313-
.con = conn
326+
stats_query <- paste0(
327+
"SELECT ",
328+
paste0(select_parts, collapse = ", "),
329+
" FROM ",
330+
DBI::dbQuoteIdentifier(conn, table_name)
314331
)
315332
result <- DBI::dbGetQuery(conn, stats_query)
316333
if (nrow(result) > 0) {
@@ -327,11 +344,6 @@ get_schema.dbi_source <- function(source, ...) {
327344
categorical_values <- list()
328345
text_cols_to_query <- character(0)
329346

330-
# Always include the 'name' field from test_df for test case in tests/testthat/test-data-source.R
331-
if ("name" %in% text_columns) {
332-
text_cols_to_query <- c(text_cols_to_query, "name")
333-
}
334-
335347
for (col_name in text_columns) {
336348
distinct_count_key <- paste0(col_name, "__distinct_count")
337349
if (
@@ -352,9 +364,15 @@ get_schema.dbi_source <- function(source, ...) {
352364
for (col_name in text_cols_to_query) {
353365
tryCatch(
354366
{
355-
cat_query <- glue::glue_sql(
356-
"SELECT DISTINCT {`col_name`} FROM {`table_name`} WHERE {`col_name`} IS NOT NULL ORDER BY {`col_name`}",
357-
.con = conn
367+
cat_query <- paste0(
368+
"SELECT DISTINCT ",
369+
DBI::dbQuoteIdentifier(conn, col_name),
370+
" FROM ",
371+
DBI::dbQuoteIdentifier(conn, table_name),
372+
" WHERE ",
373+
DBI::dbQuoteIdentifier(conn, col_name),
374+
" IS NOT NULL ORDER BY ",
375+
DBI::dbQuoteIdentifier(conn, col_name)
358376
)
359377
result <- DBI::dbGetQuery(conn, cat_query)
360378
if (nrow(result) > 0) {
@@ -373,7 +391,7 @@ get_schema.dbi_source <- function(source, ...) {
373391
col_class <- class(sample_data[[col]])[1]
374392
sql_type <- r_class_to_sql_type(col_class)
375393

376-
column_info <- glue::glue("- {col} ({sql_type})")
394+
column_info <- paste0("- ", col, " (", sql_type, ")")
377395

378396
# Add range info for numeric columns
379397
if (col %in% numeric_columns) {
@@ -386,8 +404,11 @@ get_schema.dbi_source <- function(source, ...) {
386404
!is.na(column_stats[[min_key]]) &&
387405
!is.na(column_stats[[max_key]])
388406
) {
389-
range_info <- glue::glue(
390-
" Range: {column_stats[[min_key]]} to {column_stats[[max_key]]}"
407+
range_info <- paste0(
408+
" Range: ",
409+
column_stats[[min_key]],
410+
" to ",
411+
column_stats[[max_key]]
391412
)
392413
column_info <- paste(column_info, range_info, sep = "\n")
393414
}
@@ -398,7 +419,7 @@ get_schema.dbi_source <- function(source, ...) {
398419
values <- categorical_values[[col]]
399420
if (length(values) > 0) {
400421
values_str <- paste0("'", values, "'", collapse = ", ")
401-
cat_info <- glue::glue(" Categorical values: {values_str}")
422+
cat_info <- paste0(" Categorical values: ", values_str)
402423
column_info <- paste(column_info, cat_info, sep = "\n")
403424
}
404425
}

pkg-r/R/querychat.R

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,6 @@ querychat_init <- function(
7676
"create_chat_func must be a function" = is.function(create_chat_func)
7777
)
7878

79-
if ("table_name" %in% names(attributes(system_prompt))) {
80-
# If available, be sure to use the `table_name` argument to `querychat_init()`
81-
# matches the one supplied to the system prompt
82-
if (table_name != attr(system_prompt, "table_name")) {
83-
rlang::abort(
84-
"`querychat_init(table_name=)` must match system prompt `table_name` supplied to `querychat_system_prompt()`."
85-
)
86-
}
87-
}
8879
if (!is.null(greeting)) {
8980
greeting <- paste(collapse = "\n", greeting)
9081
} else {
@@ -307,8 +298,12 @@ df_to_html <- function(df, maxrows = 5) {
307298
paste(collapse = "\n")
308299

309300
if (nrow(df_short) != nrow(df)) {
310-
rows_notice <- glue::glue(
311-
"\n\n(Showing only the first {maxrows} rows out of {nrow(df)}.)\n"
301+
rows_notice <- paste0(
302+
"\n\n(Showing only the first ",
303+
maxrows,
304+
" rows out of ",
305+
nrow(df),
306+
".)\n"
312307
)
313308
} else {
314309
rows_notice <- ""

pkg-r/tests/testthat/test-data-source.R

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,14 @@ test_that("get_schema methods return proper schema", {
6363
df_source <- querychat_data_source(test_df, table_name = "test_table")
6464
schema <- get_schema(df_source)
6565
expect_type(schema, "character")
66-
expect_true(grepl("Table: test_table", schema))
67-
expect_true(grepl("id \\(INTEGER\\)", schema))
68-
expect_true(grepl("name \\(TEXT\\)", schema))
69-
expect_true(grepl("active \\(BOOLEAN\\)", schema))
70-
expect_true(grepl("Categorical values", schema)) # Should list categorical values
66+
expect_match(schema, "Table: test_table")
67+
expect_match(schema, "id \\(INTEGER\\)")
68+
expect_match(schema, "name \\(TEXT\\)")
69+
expect_match(schema, "active \\(BOOLEAN\\)")
70+
expect_match(schema, "Categorical values") # Should list categorical values
71+
72+
# Test min/max values in schema - specifically for the id column
73+
expect_match(schema, "- id \\(INTEGER\\)\\n Range: 1 to 5")
7174

7275
# Test with DBI source
7376
temp_db <- tempfile(fileext = ".db")
@@ -77,9 +80,12 @@ test_that("get_schema methods return proper schema", {
7780
dbi_source <- querychat_data_source(conn, "test_table")
7881
schema <- get_schema(dbi_source)
7982
expect_type(schema, "character")
80-
expect_true(grepl("Table: test_table", schema))
81-
expect_true(grepl("id \\(INTEGER\\)", schema))
82-
expect_true(grepl("name \\(TEXT\\)", schema))
83+
expect_match(schema, "Table: test_table")
84+
expect_match(schema, "id \\(INTEGER\\)")
85+
expect_match(schema, "name \\(TEXT\\)")
86+
87+
# Test min/max values in DBI source schema - specifically for the id column
88+
expect_match(schema, "- id \\(INTEGER\\)\\n Range: 1 to 5")
8389

8490
# Clean up
8591
cleanup_source(df_source)
@@ -155,6 +161,28 @@ test_that("get_lazy_data returns tbl objects", {
155161
unlink(temp_db)
156162
})
157163

164+
test_that("get_schema correctly reports min/max values for numeric columns", {
165+
# Create a dataframe with multiple numeric columns
166+
test_df <- data.frame(
167+
id = 1:5,
168+
score = c(10.5, 20.3, 15.7, 30.1, 25.9),
169+
count = c(100, 200, 150, 50, 75),
170+
stringsAsFactors = FALSE
171+
)
172+
173+
df_source <- querychat_data_source(test_df, table_name = "test_metrics")
174+
schema <- get_schema(df_source)
175+
176+
# Check that each numeric column has the correct min/max values
177+
expect_match(schema, "- id \\(INTEGER\\)\\n Range: 1 to 5")
178+
expect_match(schema, "- score \\(FLOAT\\)\\n Range: 10\\.5 to 30\\.1")
179+
# Note: In the test output, count was detected as FLOAT rather than INTEGER
180+
expect_match(schema, "- count \\(FLOAT\\)\\n Range: 50 to 200")
181+
182+
# Clean up
183+
cleanup_source(df_source)
184+
})
185+
158186
test_that("create_system_prompt generates appropriate system prompt", {
159187
test_df <- data.frame(
160188
id = 1:3,
@@ -169,8 +197,8 @@ test_that("create_system_prompt generates appropriate system prompt", {
169197
)
170198
expect_type(prompt, "character")
171199
expect_true(nchar(prompt) > 0)
172-
expect_true(grepl("A test dataframe", prompt))
173-
expect_true(grepl("Table: test_table", prompt))
200+
expect_match(prompt, "A test dataframe")
201+
expect_match(prompt, "Table: test_table")
174202

175203
# Clean up
176204
cleanup_source(df_source)

0 commit comments

Comments
 (0)