Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 15 additions & 73 deletions R/relational-duckdb.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,14 @@ duckdb_rel_from_df <- function(df, call = caller_env()) {
df <- as_duckplyr_df_impl(df)
}

out <- check_df_for_rel(df, call)
con <- get_default_duckdb_connection()

# FIXME: For some reason, it seems crucial to assign the result to a
# variable before returning it
experimental <- (Sys.getenv("DUCKPLYR_EXPERIMENTAL") == "TRUE")
out <- duckdb$rel_from_df(con, df, experimental = experimental)

check_df_for_rel(out, df, call)
meta_rel_register_df(out, df)

out
Expand All @@ -127,78 +133,19 @@ duckdb_rel_from_df <- function(df, call = caller_env()) {
# duckdb$rel_from_df(get_default_duckdb_connection(), df)
}

# FIXME: This should be duckdb's responsibility
check_df_for_rel <- function(df, call = caller_env()) {
rni <- .row_names_info(df, 0L)
if (is.character(rni)) {
cli::cli_abort("Need data frame without row names to convert to relational, got character row names.", call = call)
}
if (length(rni) != 0) {
if (length(rni) != 2L || !is.na(rni[[1]])) {
cli::cli_abort("Need data frame without row names to convert to relational, got numeric row names.", call = call)
}
}

if (length(df) == 0L) {
cli::cli_abort("Can't convert empty data frame to relational.", call = call)
check_df_for_rel <- function(rel, df, call = caller_env()) {
if (Sys.getenv("DUCKPLYR_CHECK_ROUNDTRIP") != "TRUE") {
return()
}

for (i in seq_along(df)) {
col <- .subset2(df, i)
if (!is.null(names(col))) {
cli::cli_abort("Can't convert named vectors to relational. Affected column: {.var {names(df)[[i]]}}.", call = call)
}
if (!is.null(dim(col))) {
cli::cli_abort("Can't convert arrays or matrices to relational. Affected column: {.var {names(df)[[i]]}}.", call = call)
}
if (isS4(col)) {
cli::cli_abort("Can't convert S4 columns to relational. Affected column: {.var {names(df)[[i]]}}.", call = call)
}

# Factors: https://github.com/duckdb/duckdb/issues/8561

# When adding new classes, make sure to adapt the first test in test-relational-duckdb.R

col_class <- class(col)
if (length(col_class) == 1) {
valid <- col_class %in% c("logical", "integer", "numeric", "character", "Date", "difftime")
} else if (length(col_class) == 2) {
valid <- identical(col_class, c("POSIXct", "POSIXt")) || identical(col_class, c("hms", "difftime"))
} else {
valid <- FALSE
}
if (!valid) {
cli::cli_abort("Can't convert columns of class {.cls {col_class}} to relational. Affected column: {.var {names(df)[[i]]}}.", call = call)
}
}

# FIXME: For some reason, it's important to create an alias here
con <- get_default_duckdb_connection()

# FIXME: For some other reason, it seems crucial to assign the result to a
# variable before returning it
out <- duckdb$rel_from_df(con, df)

roundtrip <- duckdb$rapi_rel_to_altrep(out)
if (Sys.getenv("DUCKPLYR_CHECK_ROUNDTRIP") == "TRUE") {
rlang::with_options(duckdb.materialize_callback = NULL, {
for (i in seq_along(df)) {
if (!identical(df[[i]], roundtrip[[i]])) {
cli::cli_abort("Imperfect roundtrip. Affected column: {.var {names(df)[[i]]}}.", call = call)
}
}
})
} else {
roundtrip <- duckdb$rapi_rel_to_altrep(rel)
rlang::with_options(duckdb.materialize_callback = NULL, {
for (i in seq_along(df)) {
df_attrib <- attributes(df[[i]])
roundtrip_attrib <- attributes(roundtrip[[i]])
if (!identical(df_attrib, roundtrip_attrib)) {
cli::cli_abort("Attributes are lost during conversion. Affected column: {.var {names(df)[[i]]}}.", call = call)
if (!identical(df[[i]], roundtrip[[i]])) {
cli::cli_abort("Imperfect roundtrip. Affected column: {.var {names(df)[[i]]}}.", call = call)
}
}
}

out
})
}

# https://github.com/r-lib/vctrs/issues/1956
Expand Down Expand Up @@ -463,12 +410,7 @@ to_duckdb_expr <- function(x) {
out
},
relational_relexpr_constant = {
# FIXME: Should be duckdb's responsibility
# Example: https://github.com/dschafer/activatr/issues/18
check_df_for_rel(vctrs::new_data_frame(list(constant = x$val)))

out <- duckdb$expr_constant(x$val)

if (!is.null(x$alias)) {
duckdb$expr_set_alias(out, x$alias)
}
Expand Down
14 changes: 3 additions & 11 deletions tests/testthat/_snaps/fallback.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
# A duckplyr data frame: 2 variables
Message
i dplyr fallback recorded
{"version":"0.3.1","message":"Can't convert columns of class <ordered/factor> to relational. Affected column: `...2`.","name":"head","x":{"...1":"Date","...2":"ordered/factor"},"args":{"n":21}}
{"version":"0.3.1","message":"Can't convert column `...2` to relational.","name":"head","x":{"...1":"Date","...2":"ordered/factor"},"args":{"n":21}}
Output
a b
<date> <ord>
Expand Down Expand Up @@ -151,25 +151,17 @@
Code
duckdb_tibble(a = c(x = 1))
Condition
Error in `duckdb_tibble()`:
Error in `duckdb_rel_from_df()`:
! Can't convert named vectors to relational. Affected column: `a`.

---

Code
duckdb_tibble(a = matrix(1:4, ncol = 2))
Condition
Error in `duckdb_tibble()`:
Error in `duckdb_rel_from_df()`:
! Can't convert arrays or matrices to relational. Affected column: `a`.

# list column

Code
duckdb_tibble(a = 1, b = 2, c = list(3))
Condition
Error in `duckdb_tibble()`:
! Can't convert columns of class <list> to relational. Affected column: `c`.

# __row_number

Code
Expand Down
10 changes: 5 additions & 5 deletions tests/testthat/_snaps/relational-duckdb.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
Code
data.frame(a = vctrs::new_vctr(1:3)) %>% duckdb_rel_from_df()
Condition
Error:
! Can't convert columns of class <vctrs_vctr> to relational. Affected column: `a`.
Error in `duckdb_rel_from_df()`:
! Can't convert column `a` to relational.

# duckdb_rel_from_df() error call

Code
as_duckdb_tibble(data.frame(a = factor(letters)))
as_duckdb_tibble(data.frame(a = ordered(letters)))
Condition
Error in `as_duckdb_tibble()`:
! Can't convert columns of class <factor> to relational. Affected column: `a`.
Error in `duckdb_rel_from_df()`:
! Can't convert column `a` to relational.

# rel_aggregate()

Expand Down
1 change: 1 addition & 0 deletions tests/testthat/test-dplyr-arrange.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ test_that("duckplyr_arrange() gives meaningful errors", {
# column types ----------------------------------------------------------

test_that("arrange handles list columns (#282)", {
skip("TODO duckdb")
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
# no intrinsic ordering
df <- tibble(x = 1:3, y = list(3, 2, 1))
Expand Down
7 changes: 0 additions & 7 deletions tests/testthat/test-dplyr-join.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,6 @@ test_that("keys are coerced to symmetric type", {
expect_type(duckplyr_inner_join(bar, foo, by = "id")$id, "character")
})

test_that("factor keys are coerced to the union factor type", {
df1 <- tibble(x = 1, y = factor("a"))
df2 <- tibble(x = 2, y = factor("b"))
out <- duckplyr_full_join(df1, df2, by = c("x", "y"))
expect_equal(out$y, factor(c("a", "b")))
})

test_that("keys of non-equi conditions are not coerced if `keep = NULL`", {
skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE")
foo <- tibble(id = factor(c("a", "b")), col1 = c(1, 2), var1 = "foo")
Expand Down
12 changes: 0 additions & 12 deletions tests/testthat/test-fallback.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,18 +179,6 @@ test_that("named column", {
})
})

test_that("list column", {
withr::local_envvar(c(
"DUCKPLYR_FALLBACK_COLLECT" = "1",
"DUCKPLYR_FALLBACK_VERBOSE" = "TRUE",
"DUCKPLYR_FALLBACK_LOG_DIR" = tempdir()
))

expect_snapshot(error = TRUE, {
duckdb_tibble(a = 1, b = 2, c = list(3))
})
})

test_that("__row_number", {
withr::local_envvar(c(
"DUCKPLYR_FALLBACK_COLLECT" = "1",
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-relational-duckdb.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ test_that("duckdb_rel_from_df() and changing column names", {

test_that("duckdb_rel_from_df() error call", {
expect_snapshot(error = TRUE, {
as_duckdb_tibble(data.frame(a = factor(letters)))
as_duckdb_tibble(data.frame(a = ordered(letters)))
})
})

Expand Down
2 changes: 2 additions & 0 deletions tools/00-funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ duckplyr_tests <- head(n = -1, list(
NULL
),
"test-arrange.R" = c(
# Overly strict test, should be split
"arrange handles list columns (#282)",
NULL
),
"test-bind-cols.R" = c(
Expand Down
Loading