diff --git a/R/relational-duckdb.R b/R/relational-duckdb.R index 61e25cec1..4971c5a4f 100644 --- a/R/relational-duckdb.R +++ b/R/relational-duckdb.R @@ -117,8 +117,14 @@ duckdb_rel_from_df <- function(df, call = caller_env()) { df <- as_duckplyr_df_impl(df) } - out <- check_df_for_rel(df, call) + con <- get_default_duckdb_connection() + + # FIXME: For some reason, it seems crucial to assign the result to a + # variable before returning it + experimental <- (Sys.getenv("DUCKPLYR_EXPERIMENTAL") == "TRUE") + out <- duckdb$rel_from_df(con, df, experimental = experimental) + check_df_for_rel(out, df, call) meta_rel_register_df(out, df) out @@ -127,78 +133,19 @@ duckdb_rel_from_df <- function(df, call = caller_env()) { # duckdb$rel_from_df(get_default_duckdb_connection(), df) } -# FIXME: This should be duckdb's responsibility -check_df_for_rel <- function(df, call = caller_env()) { - rni <- .row_names_info(df, 0L) - if (is.character(rni)) { - cli::cli_abort("Need data frame without row names to convert to relational, got character row names.", call = call) - } - if (length(rni) != 0) { - if (length(rni) != 2L || !is.na(rni[[1]])) { - cli::cli_abort("Need data frame without row names to convert to relational, got numeric row names.", call = call) - } - } - - if (length(df) == 0L) { - cli::cli_abort("Can't convert empty data frame to relational.", call = call) +check_df_for_rel <- function(rel, df, call = caller_env()) { + if (Sys.getenv("DUCKPLYR_CHECK_ROUNDTRIP") != "TRUE") { + return() } - for (i in seq_along(df)) { - col <- .subset2(df, i) - if (!is.null(names(col))) { - cli::cli_abort("Can't convert named vectors to relational. Affected column: {.var {names(df)[[i]]}}.", call = call) - } - if (!is.null(dim(col))) { - cli::cli_abort("Can't convert arrays or matrices to relational. Affected column: {.var {names(df)[[i]]}}.", call = call) - } - if (isS4(col)) { - cli::cli_abort("Can't convert S4 columns to relational. Affected column: {.var {names(df)[[i]]}}.", call = call) - } - - # Factors: https://github.com/duckdb/duckdb/issues/8561 - - # When adding new classes, make sure to adapt the first test in test-relational-duckdb.R - - col_class <- class(col) - if (length(col_class) == 1) { - valid <- col_class %in% c("logical", "integer", "numeric", "character", "Date", "difftime") - } else if (length(col_class) == 2) { - valid <- identical(col_class, c("POSIXct", "POSIXt")) || identical(col_class, c("hms", "difftime")) - } else { - valid <- FALSE - } - if (!valid) { - cli::cli_abort("Can't convert columns of class {.cls {col_class}} to relational. Affected column: {.var {names(df)[[i]]}}.", call = call) - } - } - - # FIXME: For some reason, it's important to create an alias here - con <- get_default_duckdb_connection() - - # FIXME: For some other reason, it seems crucial to assign the result to a - # variable before returning it - out <- duckdb$rel_from_df(con, df) - - roundtrip <- duckdb$rapi_rel_to_altrep(out) - if (Sys.getenv("DUCKPLYR_CHECK_ROUNDTRIP") == "TRUE") { - rlang::with_options(duckdb.materialize_callback = NULL, { - for (i in seq_along(df)) { - if (!identical(df[[i]], roundtrip[[i]])) { - cli::cli_abort("Imperfect roundtrip. Affected column: {.var {names(df)[[i]]}}.", call = call) - } - } - }) - } else { + roundtrip <- duckdb$rapi_rel_to_altrep(rel) + rlang::with_options(duckdb.materialize_callback = NULL, { for (i in seq_along(df)) { - df_attrib <- attributes(df[[i]]) - roundtrip_attrib <- attributes(roundtrip[[i]]) - if (!identical(df_attrib, roundtrip_attrib)) { - cli::cli_abort("Attributes are lost during conversion. Affected column: {.var {names(df)[[i]]}}.", call = call) + if (!identical(df[[i]], roundtrip[[i]])) { + cli::cli_abort("Imperfect roundtrip. Affected column: {.var {names(df)[[i]]}}.", call = call) } } - } - - out + }) } # https://github.com/r-lib/vctrs/issues/1956 @@ -463,12 +410,7 @@ to_duckdb_expr <- function(x) { out }, relational_relexpr_constant = { - # FIXME: Should be duckdb's responsibility - # Example: https://github.com/dschafer/activatr/issues/18 - check_df_for_rel(vctrs::new_data_frame(list(constant = x$val))) - out <- duckdb$expr_constant(x$val) - if (!is.null(x$alias)) { duckdb$expr_set_alias(out, x$alias) } diff --git a/tests/testthat/_snaps/fallback.md b/tests/testthat/_snaps/fallback.md index 1b4b53bb9..842651e7e 100644 --- a/tests/testthat/_snaps/fallback.md +++ b/tests/testthat/_snaps/fallback.md @@ -76,7 +76,7 @@ # A duckplyr data frame: 2 variables Message i dplyr fallback recorded - {"version":"0.3.1","message":"Can't convert columns of class to relational. Affected column: `...2`.","name":"head","x":{"...1":"Date","...2":"ordered/factor"},"args":{"n":21}} + {"version":"0.3.1","message":"Can't convert column `...2` to relational.","name":"head","x":{"...1":"Date","...2":"ordered/factor"},"args":{"n":21}} Output a b @@ -151,7 +151,7 @@ Code duckdb_tibble(a = c(x = 1)) Condition - Error in `duckdb_tibble()`: + Error in `duckdb_rel_from_df()`: ! Can't convert named vectors to relational. Affected column: `a`. --- @@ -159,17 +159,9 @@ Code duckdb_tibble(a = matrix(1:4, ncol = 2)) Condition - Error in `duckdb_tibble()`: + Error in `duckdb_rel_from_df()`: ! Can't convert arrays or matrices to relational. Affected column: `a`. -# list column - - Code - duckdb_tibble(a = 1, b = 2, c = list(3)) - Condition - Error in `duckdb_tibble()`: - ! Can't convert columns of class to relational. Affected column: `c`. - # __row_number Code diff --git a/tests/testthat/_snaps/relational-duckdb.md b/tests/testthat/_snaps/relational-duckdb.md index 62bb68a10..08f48cb3f 100644 --- a/tests/testthat/_snaps/relational-duckdb.md +++ b/tests/testthat/_snaps/relational-duckdb.md @@ -3,16 +3,16 @@ Code data.frame(a = vctrs::new_vctr(1:3)) %>% duckdb_rel_from_df() Condition - Error: - ! Can't convert columns of class to relational. Affected column: `a`. + Error in `duckdb_rel_from_df()`: + ! Can't convert column `a` to relational. # duckdb_rel_from_df() error call Code - as_duckdb_tibble(data.frame(a = factor(letters))) + as_duckdb_tibble(data.frame(a = ordered(letters))) Condition - Error in `as_duckdb_tibble()`: - ! Can't convert columns of class to relational. Affected column: `a`. + Error in `duckdb_rel_from_df()`: + ! Can't convert column `a` to relational. # rel_aggregate() diff --git a/tests/testthat/test-dplyr-arrange.R b/tests/testthat/test-dplyr-arrange.R index bea50702e..a16d04ec1 100644 --- a/tests/testthat/test-dplyr-arrange.R +++ b/tests/testthat/test-dplyr-arrange.R @@ -53,6 +53,7 @@ test_that("duckplyr_arrange() gives meaningful errors", { # column types ---------------------------------------------------------- test_that("arrange handles list columns (#282)", { + skip("TODO duckdb") skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE") # no intrinsic ordering df <- tibble(x = 1:3, y = list(3, 2, 1)) diff --git a/tests/testthat/test-dplyr-join.R b/tests/testthat/test-dplyr-join.R index 60b8355ef..cab812be8 100644 --- a/tests/testthat/test-dplyr-join.R +++ b/tests/testthat/test-dplyr-join.R @@ -62,13 +62,6 @@ test_that("keys are coerced to symmetric type", { expect_type(duckplyr_inner_join(bar, foo, by = "id")$id, "character") }) -test_that("factor keys are coerced to the union factor type", { - df1 <- tibble(x = 1, y = factor("a")) - df2 <- tibble(x = 2, y = factor("b")) - out <- duckplyr_full_join(df1, df2, by = c("x", "y")) - expect_equal(out$y, factor(c("a", "b"))) -}) - test_that("keys of non-equi conditions are not coerced if `keep = NULL`", { skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE") foo <- tibble(id = factor(c("a", "b")), col1 = c(1, 2), var1 = "foo") diff --git a/tests/testthat/test-fallback.R b/tests/testthat/test-fallback.R index cb03b7cca..9d8418ee4 100644 --- a/tests/testthat/test-fallback.R +++ b/tests/testthat/test-fallback.R @@ -179,18 +179,6 @@ test_that("named column", { }) }) -test_that("list column", { - withr::local_envvar(c( - "DUCKPLYR_FALLBACK_COLLECT" = "1", - "DUCKPLYR_FALLBACK_VERBOSE" = "TRUE", - "DUCKPLYR_FALLBACK_LOG_DIR" = tempdir() - )) - - expect_snapshot(error = TRUE, { - duckdb_tibble(a = 1, b = 2, c = list(3)) - }) -}) - test_that("__row_number", { withr::local_envvar(c( "DUCKPLYR_FALLBACK_COLLECT" = "1", diff --git a/tests/testthat/test-relational-duckdb.R b/tests/testthat/test-relational-duckdb.R index 4df078a3a..0808e7f31 100644 --- a/tests/testthat/test-relational-duckdb.R +++ b/tests/testthat/test-relational-duckdb.R @@ -59,7 +59,7 @@ test_that("duckdb_rel_from_df() and changing column names", { test_that("duckdb_rel_from_df() error call", { expect_snapshot(error = TRUE, { - as_duckdb_tibble(data.frame(a = factor(letters))) + as_duckdb_tibble(data.frame(a = ordered(letters))) }) }) diff --git a/tools/00-funs.R b/tools/00-funs.R index 96133789f..f8151ec29 100644 --- a/tools/00-funs.R +++ b/tools/00-funs.R @@ -85,6 +85,8 @@ duckplyr_tests <- head(n = -1, list( NULL ), "test-arrange.R" = c( + # Overly strict test, should be split + "arrange handles list columns (#282)", NULL ), "test-bind-cols.R" = c(