Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ S3method(names,duckdb_relation)
S3method(print,duckdb_explain)
S3method(print,duckdb_expr)
S3method(print,duckdb_relation)
export(create_view)
export(duckdb)
export(duckdb_adbc)
export(duckdb_fetch_arrow)
Expand All @@ -17,6 +18,7 @@ export(duckdb_register_arrow)
export(duckdb_shutdown)
export(duckdb_unregister)
export(duckdb_unregister_arrow)
export(export_parquet)
export(read_csv_duckdb)
export(simulate_duckdb)
export(tbl_file)
Expand Down Expand Up @@ -54,5 +56,8 @@ exportMethods(dbWriteTable)
exportMethods(show)
import(DBI)
import(methods)
importFrom(DBI,dbExecute)
importFrom(dbplyr,remote_con)
importFrom(dbplyr,sql_render)
importFrom(utils,head)
useDynLib(duckdb, .registration = TRUE)
37 changes: 37 additions & 0 deletions R/create_view.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#' Create or Replace a View from a `tbl` in DuckDB
#'
#' This function creates or replaces a view in DuckDB from a `dbplyr`-based `tbl` object.
#' It converts the lazy query associated with the `tbl` into SQL and defines a named view in the database.
#'
#' @param data A `tbl_dbi` object, typically produced by `dplyr::tbl()` or `dbplyr` pipelines.
#' @param view_name A character string specifying the name of the view to create.
#'
#' @return A `tbl` object pointing on the created view (invisible)
#'
#' @details
#' The function uses `CREATE OR REPLACE VIEW`, which means it will overwrite an existing view with the same name.
#' The view is created in the same DuckDB connection used by the `tbl`. The query is lazily evaluated.
#'
#' @examples
#' con <- DBI::dbConnect(duckdb::duckdb())
#' copy_to(con, tibble(a = 1:3, b = letters[1:3]), "source_table", temporary = TRUE)
#' data <- dplyr::tbl(con, "source_table") %>% dplyr::filter(a > 1)
#' create_view(data, "filtered_view")
#' DBI::dbGetQuery(con, "SELECT * FROM filtered_view")
#' DBI::dbDisconnect(con, shutdown = TRUE)
#'
#' @importFrom DBI dbExecute dbQuoteIdentifie
#' @importFrom dbplyr remote_con sql_render
#' @export
create_view <- function(data, view_name) {
if (!inherits(data, "tbl_dbi")) stop("'data' must be a 'tbl_dbi' object.")

con <- dbplyr::remote_con(data)
sql <- dbplyr::sql_render(data, con = con)

sql <- sprintf("CREATE OR REPLACE VIEW %s AS %s", DBI::dbQuoteIdentifier(con, view_name), sql)

DBI::dbExecute(con, sql)

invisible(tbl(con, view_name))
}
58 changes: 58 additions & 0 deletions R/export_parquet.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#' Export a DuckDB table to a Parquet file using COPY TO
#'
#' This function exports a `dbplyr`-based table or SQL query to a Parquet file
#' using DuckDB's native `COPY TO` command.
#'
#' @param data A `tbl_dbi` object representing a DuckDB table or query.
#' @param output Path to the output Parquet file (a single character string).
#' @param options A named list of key-value COPY options. Values can be character,
#' numeric, logical, or vectors (which will be converted to tuples).
#' Examples include `compression = "zstd"` or `ROW_GROUP_SIZE = 1000000`.
#' see https://duckdb.org/docs/sql/statements/copy.html#parquet-options for details.
#'
#' @return Returns the number of rows affected by the `COPY TO` command.
#' The function will stop with an error if the input types are invalid.
#'
#' @details
#' Option values of length >1 are wrapped in parentheses and comma-separated
#' (e.g., for `columns = c("a", "b")`, DuckDB will receive `COLUMNS (a,b)`).
#'
#' @examples
#' con <- DBI::dbConnect(duckdb::duckdb())
#' DBI::dbWriteTable(con, "iris", iris)
#' tbl <- dplyr::tbl(con, "iris")
#' export_parquet(tbl, "iris.parquet", options = list(compression = "zstd"))
#' export_parquet(tbl, "iris_ds", options = list(partition_by = "Species", row_group_size = 1000))
#'
#' @importFrom DBI dbExecute
#' @importFrom dbplyr remote_con sql_render
#' @export
export_parquet <- function(data, output, options = NULL, print_sql = FALSE) {
if (!inherits(data, "tbl_dbi")) stop("'data' must be a 'tbl_dbi' object.")
if (!is.character(output) || length(output) != 1) stop("'output' must be a single character string.")
if (!is.null(options) && !is.list(options)) stop("'options' must be a list or NULL.")

con <- dbplyr::remote_con(data)
sql_query <- dbplyr::sql_render(data, con = con)

# Normalize and format options
if (is.null(options)) options <- list()
formatted_options <- format_copy_to_options(options)
formatted_options$FORMAT <- 'PARQUET'

parquet_opts <- paste(paste0(names(formatted_options), " ", formatted_options), collapse = ", ")
sql <- sprintf("COPY (%s) TO '%s' (%s)", sql_query, output, parquet_opts)
DBI::dbExecute(con, sql)
}


format_copy_to_options <- function(options) {
options <- lapply(options, function(x) {
if (is.logical(x) || is.character(x) || is.numeric(x)) as.character(x)
else stop("All option values must be character, numeric, or logical.")

if (length(x) > 1) x <- paste0("(",paste0(x,collapse=","),")")
x
})
setNames(options, toupper(names(options)))
}
33 changes: 33 additions & 0 deletions man/create_view.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions man/export_parquet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions tests/testthat/test-create_view.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
test_that("create_view creates a view with expected content", {
con <- dbConnect(duckdb::duckdb())
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)

df <- data.frame(x = 1:5, y = letters[1:5])
copy_to(con, df, "original_table", temporary = TRUE)

data <- tbl(con, "original_table") %>% filter(x > 3)
create_view(data, "view_test")

result <- dbReadTable(con, "view_test")
expect_equal(nrow(result), 2)
expect_equal(result$x, c(4, 5))

result <- tbl(con, "view_test") |> dplyr::collect()
expect_equal(nrow(result), 2)
expect_equal(result$x, c(4, 5))
})

test_that("create_view replaces an existing view", {
con <- dbConnect(duckdb::duckdb())
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)

df <- data.frame(a = 1:2)
copy_to(con, df, "table1", temporary = TRUE)
data1 <- tbl(con, "table1")
create_view(data1, "replace_view")

df2 <- tibble(a = 10:12)
copy_to(con, df2, "table2", temporary = TRUE)
data2 <- tbl(con, "table2")
create_view(data2, "replace_view") # Should replace

result <- dbReadTable(con, "replace_view")
expect_equal(nrow(result), 3)
expect_equal(result$a, 10:12)
})

test_that("create_view works with quoted view names", {
con <- dbConnect(duckdb::duckdb())
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)

df <- data.frame(id = 1:3)
copy_to(con, df, "quoted_table", temporary = TRUE)
data <- tbl(con, "quoted_table")

create_view(data, "weird-Name With Space")

result <- dbGetQuery(con, 'SELECT * FROM "weird-Name With Space"')
expect_equal(nrow(result), 3)
})

44 changes: 44 additions & 0 deletions tests/testthat/test-export_parquet.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
test_that("export_parquet write a valid Parquet file", {
withr::with_tempfile("parquet_file", fileext = ".parquet", {
con <- dbConnect(duckdb::duckdb())
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)

df <- data.frame(x = 1:3, y = letters[1:3])
copy_to(con, df, "test_table", temporary = TRUE)

data <- tbl(con, "test_table")
export_parquet(data, parquet_file)

expect_true(file.exists(parquet_file))
})
})

test_that("export_parquet allows options", {
withr::with_tempfile("parquet_file", fileext = ".parquet", {
con <- dbConnect(duckdb::duckdb())
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)

df <- data.frame(a = 1:5, b = 1:5)
copy_to(con, df, "table_opt", temporary = TRUE)
data <- tbl(con, "table_opt")

expect_silent(export_parquet(data, parquet_file, list(compression = "zstd", row_group_size = 1000)))
expect_true(file.exists(parquet_file))

})
})

test_that("export_parquet échoue proprement si le fichier est invalide", {
con <- dbConnect(duckdb::duckdb())
on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)

df <- data.frame(z = 1:2)
copy_to(con, df, "bad_path_table", temporary = TRUE)
data <- tbl(con, "bad_path_table")

expect_error(
export_parquet(data, "/chemin/inexistant/fichier.parquet"),
"IO Error|Failed to open"
)
})

2 changes: 2 additions & 0 deletions vignettes/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.html
*.R
Loading