Skip to content

Commit 9ea91ca

Browse files
authored
GH-48057: [R] Slow reading performance caused by apply_arrow_r_metadata() looping through all columns, including NULL ones (#48104)
### Rationale for this change Slow reading due to looping through metadata ### What changes are included in this PR? Don't loop through NULL metadata ### Are these changes tested? Not in unit tests, but see comment below with microbenchmarks. ### Are there any user-facing changes? No * GitHub Issue: #48057 Authored-by: Nic Crane <[email protected]> Signed-off-by: Nic Crane <[email protected]>
1 parent 0c20874 commit 9ea91ca

File tree

2 files changed

+28
-1
lines changed

2 files changed

+28
-1
lines changed

r/R/metadata.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ apply_arrow_r_metadata <- function(x, r_metadata) {
175175
columns_metadata <- r_metadata$columns
176176
if (is.data.frame(x)) {
177177
# if columns metadata exists, apply it here
178-
if (length(names(x)) && !is.null(columns_metadata)) {
178+
if (length(names(x)) && !is.null(columns_metadata) && !all(map_lgl(columns_metadata, is.null))) {
179179
for (name in intersect(names(columns_metadata), names(x))) {
180180
x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
181181
}

r/tests/testthat/test-metadata.R

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,3 +490,30 @@ test_that("data.frame class attribute is not saved", {
490490
df_arrow <- arrow_table(df)
491491
expect_identical(df_arrow$r_metadata, list(attributes = list(foo = "bar"), columns = list(x = NULL)))
492492
})
493+
494+
test_that("apply_arrow_r_metadata doesn't add in metadata from plain data.frame objects - GH48057", {
495+
# with just a plain df the (empty) column metadata is not preserved
496+
plain_df <- data.frame(x = 1:5)
497+
plain_df_arrow <- arrow_table(plain_df)
498+
499+
expect_equal(plain_df_arrow$metadata$r$columns, list(x = NULL))
500+
501+
plain_df_no_metadata <- plain_df_arrow$to_data_frame()
502+
plain_df_with_metadata <- apply_arrow_r_metadata(plain_df_no_metadata, plain_df_arrow$metadata$r)
503+
504+
expect_identical(plain_df_no_metadata, plain_df_with_metadata)
505+
506+
# with more complex column metadata - it preserves it
507+
spicy_df_arrow <- arrow_table(haven_data)
508+
509+
expect_equal(
510+
spicy_df_arrow$metadata$r$columns,
511+
list(num = list(attributes = list(format.spss = "F8.2"), columns = NULL), cat_int = NULL, cat_chr = NULL)
512+
)
513+
514+
spicy_df_no_metadata <- spicy_df_arrow$to_data_frame()
515+
spicy_df_with_metadata <- apply_arrow_r_metadata(spicy_df_no_metadata, spicy_df_arrow$metadata$r)
516+
517+
expect_null(attr(spicy_df_no_metadata$num, "format.spss"))
518+
expect_equal(attr(spicy_df_with_metadata$num, "format.spss"), "F8.2")
519+
})

0 commit comments

Comments
 (0)