From d71907038002858960701344080e099cc1d88797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 3 Nov 2024 07:38:12 +0100 Subject: [PATCH 1/5] feat: Client-side warning on join explosion --- R/dplyr.R | 2 + R/full_join.R | 2 +- R/inner_join.R | 2 +- R/join.R | 64 ++++++++++++++++++++++++ R/left_join.R | 2 +- R/right_join.R | 2 +- tests/testthat/_snaps/dplyr-join-rows.md | 33 ++++++++++++ tests/testthat/_snaps/dplyr-join.md | 9 ++++ tests/testthat/test-dplyr-join-rows.R | 1 - tests/testthat/test-dplyr-join.R | 1 - tools/00-funs.R | 6 +-- 11 files changed, 113 insertions(+), 11 deletions(-) diff --git a/R/dplyr.R b/R/dplyr.R index dd1129355..7d4fd7c7e 100644 --- a/R/dplyr.R +++ b/R/dplyr.R @@ -80,9 +80,11 @@ rows_check_y_unmatched <- dplyr$rows_check_y_unmatched rows_df_in_place <- dplyr$rows_df_in_place rowwise_df <- dplyr$rowwise_df slice_rows <- dplyr$slice_rows +stop_join <- dplyr$stop_join summarise_build <- dplyr$summarise_build summarise_cols <- dplyr$summarise_cols summarise_deprecate_variable_size <- dplyr$summarise_deprecate_variable_size the <- dplyr$the tick_if_needed <- dplyr$tick_if_needed +warn_join <- dplyr$warn_join warn_join_cross_by <- dplyr$warn_join_cross_by diff --git a/R/full_join.R b/R/full_join.R index 40578e11a..12d6469f3 100644 --- a/R/full_join.R +++ b/R/full_join.R @@ -17,7 +17,7 @@ full_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x" "No implicit cross joins for {.code full_join()}" = is_cross_by(by), "{.arg multiple} not supported" = !identical(multiple, "all"), { - out <- rel_join_impl(x, y, by, "full", na_matches, suffix, keep, error_call) + out <- rel_join_impl(x, y, by, "full", na_matches, suffix, keep, relationship, error_call) return(out) } ) diff --git a/R/inner_join.R b/R/inner_join.R index 29bc68699..45e1bd058 100644 --- a/R/inner_join.R +++ b/R/inner_join.R @@ -19,7 +19,7 @@ inner_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x "{.arg multiple} not supported" = !identical(multiple, "all"), "{.arg unmatched} not supported" = !identical(unmatched, "drop"), { - out <- rel_join_impl(x, y, by, "inner", na_matches, suffix, keep, error_call) + out <- rel_join_impl(x, y, by, "inner", na_matches, suffix, keep, relationship, error_call) return(out) } ) diff --git a/R/join.R b/R/join.R index 42dde6927..ceaf65581 100644 --- a/R/join.R +++ b/R/join.R @@ -6,6 +6,7 @@ rel_join_impl <- function( na_matches, suffix = c(".x", ".y"), keep = NULL, + relationship = NULL, error_call = caller_env() ) { mutating <- !(join %in% c("semi", "anti")) @@ -25,6 +26,10 @@ rel_join_impl <- function( by <- as_join_by(by, error_call = error_call) } + if (mutating) { + check_relationship(relationship, x, y, by, error_call = error_call) + } + x_by <- by$x y_by <- by$y x_rel <- duckdb_rel_from_df(x) @@ -136,3 +141,62 @@ rel_join_impl <- function( return(out) } + +check_relationship <- function(relationship, x, y, by, error_call) { + if (is_null(relationship)) { + # FIXME: Determine behavior based on option + if (!is_key(x, by$x) && !is_key(y, by$y)) { + warn_join( + message = c( + "Detected an unexpected many-to-many relationship between `x` and `y`.", + i = paste0( + "If a many-to-many relationship is expected, ", + "set `relationship = \"many-to-many\"` to silence this warning." + ) + ), + class = "dplyr_warning_join_relationship_many_to_many", + call = error_call + ) + } + return() + } + + if (relationship %in% c("one-to-many", "one-to-one")) { + if (!is_key(x, by$x)) { + stop_join( + message = c( + glue("Each row in `{x_name}` must match at most 1 row in `{y_name}`."), + ), + class = paste0("dplyr_error_join_relationship_", gsub("-", "_", relationship)), + call = error_call + ) + } + } + + if (relationship %in% c("many-to-one", "one-to-one")) { + if (!is_key(y, by$y)) { + stop_join( + message = c( + glue("Each row in `{y_name}` must match at most 1 row in `{x_name}`."), + ), + class = paste0("dplyr_error_join_relationship_", gsub("-", "_", relationship)), + call = error_call + ) + } + } +} + +is_key <- function(x, cols) { + local_options(duckdb.materialize_message = FALSE) + + rows <- + x %>% + # FIXME: Why does this materialize + # as_duckplyr_tibble() %>% + summarize(.by = c(!!!syms(cols)), `___n` = n()) %>% + filter(`___n` > 1L) %>% + head(1L) %>% + nrow() + + rows == 0 +} diff --git a/R/left_join.R b/R/left_join.R index f171a8054..565b3f375 100644 --- a/R/left_join.R +++ b/R/left_join.R @@ -20,7 +20,7 @@ left_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x" "{.arg multiple} not supported" = !identical(multiple, "all"), "{.arg unmatched} not supported" = !identical(unmatched, "drop"), { - out <- rel_join_impl(x, y, by, "left", na_matches, suffix, keep, error_call) + out <- rel_join_impl(x, y, by, "left", na_matches, suffix, keep, relationship, error_call) return(out) } ) diff --git a/R/right_join.R b/R/right_join.R index 38ed32b76..5d97298a3 100644 --- a/R/right_join.R +++ b/R/right_join.R @@ -19,7 +19,7 @@ right_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x "{.arg multiple} not supported" = !identical(multiple, "all"), "{.arg unmatched} not supported" = !identical(unmatched, "drop"), { - out <- rel_join_impl(x, y, by, "right", na_matches, suffix, keep, error_call) + out <- rel_join_impl(x, y, by, "right", na_matches, suffix, keep, relationship, error_call) return(out) } ) diff --git a/tests/testthat/_snaps/dplyr-join-rows.md b/tests/testthat/_snaps/dplyr-join-rows.md index 07d31a943..f478fd51f 100644 --- a/tests/testthat/_snaps/dplyr-join-rows.md +++ b/tests/testthat/_snaps/dplyr-join-rows.md @@ -65,6 +65,39 @@ ! Each row in `x` must match at most 1 row in `y`. i Row 1 of `x` matches multiple rows in `y`. +# join_rows() gives meaningful many-to-many warnings + + Code + join_rows(c(1, 1), c(1, 1)) + Condition + Warning: + Detected an unexpected many-to-many relationship between `x` and `y`. + i Row 1 of `x` matches multiple rows in `y`. + i Row 1 of `y` matches multiple rows in `x`. + i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning. + Output + $x + [1] 1 1 2 2 + + $y + [1] 1 2 1 2 + + +--- + + Code + duckplyr_left_join(df, df, by = join_by(x)) + Condition + Warning in `duckplyr_left_join()`: + Detected an unexpected many-to-many relationship between `x` and `y`. + i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning. + Output + x + 1 1 + 2 1 + 3 1 + 4 1 + # join_rows() gives meaningful error message on unmatched rows Code diff --git a/tests/testthat/_snaps/dplyr-join.md b/tests/testthat/_snaps/dplyr-join.md index 73111ec5a..28ebdbb85 100644 --- a/tests/testthat/_snaps/dplyr-join.md +++ b/tests/testthat/_snaps/dplyr-join.md @@ -50,6 +50,15 @@ Error: ! `na_matches` must be one of "na" or "never", not "foo". +# mutating joins trigger many-to-many warning + + Code + out <- duckplyr_left_join(df, df, join_by(x)) + Condition + Warning in `duckplyr_left_join()`: + Detected an unexpected many-to-many relationship between `x` and `y`. + i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning. + # mutating joins compute common columns Code diff --git a/tests/testthat/test-dplyr-join-rows.R b/tests/testthat/test-dplyr-join-rows.R index 3cd8ff956..cd7edf110 100644 --- a/tests/testthat/test-dplyr-join-rows.R +++ b/tests/testthat/test-dplyr-join-rows.R @@ -197,7 +197,6 @@ test_that("join_rows() gives meaningful many-to-one errors", { }) test_that("join_rows() gives meaningful many-to-many warnings", { - skip("TODO duckdb") expect_snapshot({ join_rows(c(1, 1), c(1, 1)) }) diff --git a/tests/testthat/test-dplyr-join.R b/tests/testthat/test-dplyr-join.R index 60b8355ef..f66be8a5b 100644 --- a/tests/testthat/test-dplyr-join.R +++ b/tests/testthat/test-dplyr-join.R @@ -360,7 +360,6 @@ test_that("join_filter() validates arguments", { }) test_that("mutating joins trigger many-to-many warning", { - skip("TODO duckdb") df <- tibble(x = c(1, 1)) expect_snapshot(out <- duckplyr_left_join(df, df, join_by(x))) }) diff --git a/tools/00-funs.R b/tools/00-funs.R index 2ecf6094e..b1b3fe493 100644 --- a/tools/00-funs.R +++ b/tools/00-funs.R @@ -237,14 +237,10 @@ duckplyr_tests <- head(n = -1, list( NULL ), "test-join-rows.R" = c( - "join_rows() gives meaningful many-to-many warnings", NULL ), "test-join.R" = c( - "mutating joins trigger multiple match warning", - "mutating joins don't trigger multiple match warning when called indirectly", - - "mutating joins trigger many-to-many warning", + # FIXME: How to detect an indirect call? "mutating joins don't trigger many-to-many warning when called indirectly", NULL ), From 461c09f801a1066cffb10fc31764359940f22f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 26 Jan 2025 07:30:15 +0100 Subject: [PATCH 2/5] Squashed commit of the following: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit db5d405f508af70a3abfda7d8fed0e56510156d2 Author: Kirill Müller Date: Sun Jan 26 06:54:20 2025 +0100 Sync commit 7bd1872e95563cc7df2dca20e14e56f741b82ffa Author: Kirill Müller Date: Fri Jan 24 21:23:24 2025 +0100 Remove dplyr compat code commit 8d6a573227b2462e4a9b15b45ea64e9aad56532c Author: Kirill Müller Date: Wed Jan 22 16:55:00 2025 +0100 fix: Avoid forwarding `is.na()` to `is.nan()` to support non-numeric data --- R/relational-duckdb.R | 8 ++ tests/testthat/test-rel_api.R | 167 ++++++++++++++++++++++++++++++++++ 2 files changed, 175 insertions(+) diff --git a/R/relational-duckdb.R b/R/relational-duckdb.R index 889f53276..a291cbaef 100644 --- a/R/relational-duckdb.R +++ b/R/relational-duckdb.R @@ -180,6 +180,14 @@ check_df_for_rel <- function(df, call = caller_env()) { if (!identical(df_attrib, roundtrip_attrib)) { cli::cli_abort("Attributes are lost during conversion. Affected column: {.var {names(df)[[i]]}}.", call = call) } + # Always check roundtrip for timestamp columns + # duckdb uses microsecond precision only, this is in some cases + # less than R does + if (inherits(df[[i]], "POSIXct")) { + if (!identical(df[[i]], roundtrip[[i]])) { + cli::cli_abort("Imperfect roundtrip. Affected column: {.var {names(df)[[i]]}}.", call = call) + } + } } } diff --git a/tests/testthat/test-rel_api.R b/tests/testthat/test-rel_api.R index 28bca761f..7be8dacce 100644 --- a/tests/testthat/test-rel_api.R +++ b/tests/testthat/test-rel_api.R @@ -9715,6 +9715,87 @@ test_that("relational mutate(d = NA_real_, e = is.na(d)) order-preserving", { DBI::dbDisconnect(con, shutdown = TRUE) }) +test_that("relational mutate(d = NaN, e = is.na(d)) order-preserving", { + # Autogenerated + duckdb <- asNamespace("duckdb") + drv <- duckdb::duckdb() + con <- DBI::dbConnect(drv) + experimental <- FALSE + invisible(DBI::dbExecute(con, 'CREATE MACRO "is.na"(x) AS (x IS NULL)')) + df1 <- data.frame(a = seq(1, 6, by = 1), b = 2, g = c(1L, 2L, 2L, 3L, 3L, 3L)) + + "mutate" + rel1 <- duckdb$rel_from_df(con, df1, experimental = experimental) + "mutate" + rel2 <- duckdb$rel_project( + rel1, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NaN, experimental = experimental) + } else { + duckdb$expr_constant(NaN) + } + duckdb$expr_set_alias(tmp_expr, "d") + tmp_expr + } + ) + ) + "mutate" + rel3 <- duckdb$rel_project( + rel2, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("d") + duckdb$expr_set_alias(tmp_expr, "d") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_function("is.na", list(duckdb$expr_reference("d"))) + duckdb$expr_set_alias(tmp_expr, "e") + tmp_expr + } + ) + ) + rel3 + out <- duckdb$rel_to_altrep(rel3) + expect_identical( + out, + data.frame(a = seq(1, 6, by = 1), b = 2, g = c(1L, 2L, 2L, 3L, 3L, 3L), d = NaN, e = TRUE) + ) + DBI::dbDisconnect(con, shutdown = TRUE) +}) + test_that("relational mutate(d = row_number()) order-preserving", { # Autogenerated duckdb <- asNamespace("duckdb") @@ -14115,6 +14196,92 @@ test_that("relational mutate(d = NA_real_, e = is.na(d)) order-enforcing", { DBI::dbDisconnect(con, shutdown = TRUE) }) +test_that("relational mutate(d = NaN, e = is.na(d)) order-enforcing", { + # Autogenerated + duckdb <- asNamespace("duckdb") + drv <- duckdb::duckdb() + con <- DBI::dbConnect(drv) + experimental <- FALSE + invisible(DBI::dbExecute(con, 'CREATE MACRO "is.na"(x) AS (x IS NULL)')) + df1 <- data.frame(a = seq(1, 6, by = 1), b = 2, g = c(1L, 2L, 2L, 3L, 3L, 3L)) + + "mutate" + rel1 <- duckdb$rel_from_df(con, df1, experimental = experimental) + "mutate" + rel2 <- duckdb$rel_project( + rel1, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NaN, experimental = experimental) + } else { + duckdb$expr_constant(NaN) + } + duckdb$expr_set_alias(tmp_expr, "d") + tmp_expr + } + ) + ) + "mutate" + rel3 <- duckdb$rel_project( + rel2, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("d") + duckdb$expr_set_alias(tmp_expr, "d") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_function("is.na", list(duckdb$expr_reference("d"))) + duckdb$expr_set_alias(tmp_expr, "e") + tmp_expr + } + ) + ) + "arrange" + rel4 <- duckdb$rel_order( + rel3, + list(duckdb$expr_reference("a"), duckdb$expr_reference("b"), duckdb$expr_reference("g"), duckdb$expr_reference("d"), duckdb$expr_reference("e")) + ) + rel4 + out <- duckdb$rel_to_altrep(rel4) + expect_identical( + out, + data.frame(a = seq(1, 6, by = 1), b = 2, g = c(1L, 2L, 2L, 3L, 3L, 3L), d = NaN, e = TRUE) + ) + DBI::dbDisconnect(con, shutdown = TRUE) +}) + test_that("relational mutate(d = row_number()) order-enforcing", { # Autogenerated duckdb <- asNamespace("duckdb") From 96ec2820c3011f63ef4148578e9ddcd69d540672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 26 Jan 2025 09:23:27 +0100 Subject: [PATCH 3/5] Tests --- tests/testthat/test-rel_api.R | 167 ---------------------------------- 1 file changed, 167 deletions(-) diff --git a/tests/testthat/test-rel_api.R b/tests/testthat/test-rel_api.R index 7be8dacce..28bca761f 100644 --- a/tests/testthat/test-rel_api.R +++ b/tests/testthat/test-rel_api.R @@ -9715,87 +9715,6 @@ test_that("relational mutate(d = NA_real_, e = is.na(d)) order-preserving", { DBI::dbDisconnect(con, shutdown = TRUE) }) -test_that("relational mutate(d = NaN, e = is.na(d)) order-preserving", { - # Autogenerated - duckdb <- asNamespace("duckdb") - drv <- duckdb::duckdb() - con <- DBI::dbConnect(drv) - experimental <- FALSE - invisible(DBI::dbExecute(con, 'CREATE MACRO "is.na"(x) AS (x IS NULL)')) - df1 <- data.frame(a = seq(1, 6, by = 1), b = 2, g = c(1L, 2L, 2L, 3L, 3L, 3L)) - - "mutate" - rel1 <- duckdb$rel_from_df(con, df1, experimental = experimental) - "mutate" - rel2 <- duckdb$rel_project( - rel1, - list( - { - tmp_expr <- duckdb$expr_reference("a") - duckdb$expr_set_alias(tmp_expr, "a") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_reference("b") - duckdb$expr_set_alias(tmp_expr, "b") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_reference("g") - duckdb$expr_set_alias(tmp_expr, "g") - tmp_expr - }, - { - tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { - duckdb$expr_constant(NaN, experimental = experimental) - } else { - duckdb$expr_constant(NaN) - } - duckdb$expr_set_alias(tmp_expr, "d") - tmp_expr - } - ) - ) - "mutate" - rel3 <- duckdb$rel_project( - rel2, - list( - { - tmp_expr <- duckdb$expr_reference("a") - duckdb$expr_set_alias(tmp_expr, "a") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_reference("b") - duckdb$expr_set_alias(tmp_expr, "b") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_reference("g") - duckdb$expr_set_alias(tmp_expr, "g") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_reference("d") - duckdb$expr_set_alias(tmp_expr, "d") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_function("is.na", list(duckdb$expr_reference("d"))) - duckdb$expr_set_alias(tmp_expr, "e") - tmp_expr - } - ) - ) - rel3 - out <- duckdb$rel_to_altrep(rel3) - expect_identical( - out, - data.frame(a = seq(1, 6, by = 1), b = 2, g = c(1L, 2L, 2L, 3L, 3L, 3L), d = NaN, e = TRUE) - ) - DBI::dbDisconnect(con, shutdown = TRUE) -}) - test_that("relational mutate(d = row_number()) order-preserving", { # Autogenerated duckdb <- asNamespace("duckdb") @@ -14196,92 +14115,6 @@ test_that("relational mutate(d = NA_real_, e = is.na(d)) order-enforcing", { DBI::dbDisconnect(con, shutdown = TRUE) }) -test_that("relational mutate(d = NaN, e = is.na(d)) order-enforcing", { - # Autogenerated - duckdb <- asNamespace("duckdb") - drv <- duckdb::duckdb() - con <- DBI::dbConnect(drv) - experimental <- FALSE - invisible(DBI::dbExecute(con, 'CREATE MACRO "is.na"(x) AS (x IS NULL)')) - df1 <- data.frame(a = seq(1, 6, by = 1), b = 2, g = c(1L, 2L, 2L, 3L, 3L, 3L)) - - "mutate" - rel1 <- duckdb$rel_from_df(con, df1, experimental = experimental) - "mutate" - rel2 <- duckdb$rel_project( - rel1, - list( - { - tmp_expr <- duckdb$expr_reference("a") - duckdb$expr_set_alias(tmp_expr, "a") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_reference("b") - duckdb$expr_set_alias(tmp_expr, "b") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_reference("g") - duckdb$expr_set_alias(tmp_expr, "g") - tmp_expr - }, - { - tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { - duckdb$expr_constant(NaN, experimental = experimental) - } else { - duckdb$expr_constant(NaN) - } - duckdb$expr_set_alias(tmp_expr, "d") - tmp_expr - } - ) - ) - "mutate" - rel3 <- duckdb$rel_project( - rel2, - list( - { - tmp_expr <- duckdb$expr_reference("a") - duckdb$expr_set_alias(tmp_expr, "a") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_reference("b") - duckdb$expr_set_alias(tmp_expr, "b") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_reference("g") - duckdb$expr_set_alias(tmp_expr, "g") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_reference("d") - duckdb$expr_set_alias(tmp_expr, "d") - tmp_expr - }, - { - tmp_expr <- duckdb$expr_function("is.na", list(duckdb$expr_reference("d"))) - duckdb$expr_set_alias(tmp_expr, "e") - tmp_expr - } - ) - ) - "arrange" - rel4 <- duckdb$rel_order( - rel3, - list(duckdb$expr_reference("a"), duckdb$expr_reference("b"), duckdb$expr_reference("g"), duckdb$expr_reference("d"), duckdb$expr_reference("e")) - ) - rel4 - out <- duckdb$rel_to_altrep(rel4) - expect_identical( - out, - data.frame(a = seq(1, 6, by = 1), b = 2, g = c(1L, 2L, 2L, 3L, 3L, 3L), d = NaN, e = TRUE) - ) - DBI::dbDisconnect(con, shutdown = TRUE) -}) - test_that("relational mutate(d = row_number()) order-enforcing", { # Autogenerated duckdb <- asNamespace("duckdb") From 645f5ff561e1e6781b3c7cdb1a238e8139a20bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 31 Jan 2025 20:41:58 +0100 Subject: [PATCH 4/5] chore: Sync --- patch/mutate.patch | 4 +-- patch/summarise.patch | 4 +-- tests/testthat/test-rel_api.R | 46 +++++++++++++++++++---------------- tools/05-duckdb-tests.R | 2 ++ 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/patch/mutate.patch b/patch/mutate.patch index 38072752f..e51e2d098 100644 --- a/patch/mutate.patch +++ b/patch/mutate.patch @@ -29,7 +29,7 @@ diff --git b/R/mutate.R a/R/mutate.R + + names_used <- character() + names_new <- character() -+ current_data <- rel_to_df(rel) ++ current_data <- rel_to_df(rel, prudence = "frugal") + + # FIXME: use fewer projections + for (i in seq_along(dots)) { @@ -77,7 +77,7 @@ diff --git b/R/mutate.R a/R/mutate.R + } + + rel <- rel_project(rel, unname(exprs)) -+ current_data <- rel_to_df(rel) ++ current_data <- rel_to_df(rel, prudence = "frugal") + } + + if (length(by_names) > 0) { diff --git a/patch/summarise.patch b/patch/summarise.patch index 0e43d59cd..5ae1f68be 100644 --- a/patch/summarise.patch +++ b/patch/summarise.patch @@ -58,9 +58,9 @@ diff --git b/R/summarise.R a/R/summarise.R + out_rel <- oo_restore(out_rel, "___row_number") + } + -+ out <- rel_to_df(out_rel) ++ out <- rel_to_df(out_rel, prudence = get_prudence_duckplyr_df(.data)) + # https://github.com/tidyverse/dplyr/pull/6988 -+ class(out) <- intersect(c("duckplyr_df", "tbl_df", "tbl", "data.frame"), class(.data)) ++ class(out) <- intersect(c("prudent_duckplyr_df", "duckplyr_df", "tbl_df", "tbl", "data.frame"), class(.data)) + return(out) } diff --git a/tests/testthat/test-rel_api.R b/tests/testthat/test-rel_api.R index 28bca761f..66ed88b20 100644 --- a/tests/testthat/test-rel_api.R +++ b/tests/testthat/test-rel_api.R @@ -16682,13 +16682,17 @@ test_that("relational symdiff() order-preserving", { } ) ) - df3 <- data.frame(a = 5L, b = 2) + df3 <- data.frame(a = 1L, b = 2) "union_all" rel9 <- duckdb$rel_from_df(con, df3, experimental = experimental) + df4 <- data.frame(a = 5L, b = 2) + "union_all" - rel10 <- duckdb$rel_project( - rel8, + rel10 <- duckdb$rel_from_df(con, df4, experimental = experimental) + "union_all" + rel11 <- duckdb$rel_project( + rel9, list( { tmp_expr <- duckdb$expr_reference("a") @@ -16717,8 +16721,8 @@ test_that("relational symdiff() order-preserving", { ) ) "union_all" - rel11 <- duckdb$rel_project( - rel9, + rel12 <- duckdb$rel_project( + rel10, list( { tmp_expr <- duckdb$expr_reference("a") @@ -16747,15 +16751,15 @@ test_that("relational symdiff() order-preserving", { ) ) "union_all" - rel12 <- duckdb$rel_union_all(rel10, rel11) + rel13 <- duckdb$rel_union_all(rel11, rel12) "union_all" - rel13 <- duckdb$rel_order( - rel12, + rel14 <- duckdb$rel_order( + rel13, list(duckdb$expr_reference("___row_number_x"), duckdb$expr_reference("___row_number_y")) ) "union_all" - rel14 <- duckdb$rel_project( - rel13, + rel15 <- duckdb$rel_project( + rel14, list( { tmp_expr <- duckdb$expr_reference("a") @@ -16770,8 +16774,8 @@ test_that("relational symdiff() order-preserving", { ) ) "distinct" - rel15 <- duckdb$rel_project( - rel14, + rel16 <- duckdb$rel_project( + rel15, list( { tmp_expr <- duckdb$expr_reference("a") @@ -16791,8 +16795,8 @@ test_that("relational symdiff() order-preserving", { ) ) "distinct" - rel16 <- duckdb$rel_project( - rel15, + rel17 <- duckdb$rel_project( + rel16, list( { tmp_expr <- duckdb$expr_reference("a") @@ -16830,8 +16834,8 @@ test_that("relational symdiff() order-preserving", { ) ) "distinct" - rel17 <- duckdb$rel_filter( - rel16, + rel18 <- duckdb$rel_filter( + rel17, list( duckdb$expr_comparison( "==", @@ -16847,10 +16851,10 @@ test_that("relational symdiff() order-preserving", { ) ) "distinct" - rel18 <- duckdb$rel_order(rel17, list(duckdb$expr_reference("___row_number"))) + rel19 <- duckdb$rel_order(rel18, list(duckdb$expr_reference("___row_number"))) "distinct" - rel19 <- duckdb$rel_project( - rel18, + rel20 <- duckdb$rel_project( + rel19, list( { tmp_expr <- duckdb$expr_reference("a") @@ -16864,8 +16868,8 @@ test_that("relational symdiff() order-preserving", { } ) ) - rel19 - out <- duckdb$rel_to_altrep(rel19) + rel20 + out <- duckdb$rel_to_altrep(rel20) expect_identical( out, data.frame(a = c(1L, 5L), b = 2) diff --git a/tools/05-duckdb-tests.R b/tools/05-duckdb-tests.R index ac648b3a2..c273b5150 100644 --- a/tools/05-duckdb-tests.R +++ b/tools/05-duckdb-tests.R @@ -1,5 +1,7 @@ source("tools/00-funs.R", echo = TRUE) +Sys.setenv("DUCKPLYR_META_ENABLE" = "TRUE") + pkgload::load_all() first_line <- paste0( From b3320d2c4de0340a449e5a84be90c199f560176b Mon Sep 17 00:00:00 2001 From: krlmlr <1741643+krlmlr@users.noreply.github.com> Date: Fri, 31 Jan 2025 20:17:00 +0000 Subject: [PATCH 5/5] [create-pull-request] automated change --- tests/testthat/_snaps/handle_desc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/_snaps/handle_desc.md b/tests/testthat/_snaps/handle_desc.md index 5edb31ba8..bf815e06d 100644 --- a/tests/testthat/_snaps/handle_desc.md +++ b/tests/testthat/_snaps/handle_desc.md @@ -8,5 +8,5 @@ i Use `compute(prudence = "lavish")` to materialize to temporary storage and continue with duckplyr. i See `vignette("funnel")` for other options. Caused by error in `rel_find_call()`: - ! No translation for function `desc`. + ! Function `desc` does not map to `dplyr::desc`.