From 57d72b59b500bcf86a98fcc61963c9378d09f837 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 6 Dec 2024 01:18:37 +0000 Subject: [PATCH 1/4] simplify format_col.default, allowing e.g. vctrs_list_of columns to print well --- NEWS.md | 2 ++ R/print.data.table.R | 4 ++-- inst/tests/other.Rraw | 8 +++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index e9b89dc115..7b961db7f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -117,6 +117,8 @@ rowwiseDT( 15. The auto-printing suppression in `knitr` documents is now done by implementing a method for `knit_print` instead of looking up the call stack, [#6589](https://github.com/Rdatatable/data.table/pull/6589). Thanks to @jangorecki for the report [#6509](https://github.com/Rdatatable/data.table/issues/6509) and @aitap for the fix. +16. A data.table with a column of class `vctrs_list_of` (from package {vctrs}) prints as expected, [#5948](https://github.com/Rdatatable/data.table/issues/5948). Before, they could be printed messily, e.g. printing every entry in a nested data.frame. Thanks @jesse-smith for the report, @DavisVaughan and @r2evans for contributing, and @MichaelChirico for the PR. + ## NOTES 1. There is a new vignette on joins! See `vignette("datatable-joins")`. Thanks to Angel Feliz for authoring it! Feedback welcome. This vignette has been highly requested since 2017: [#2181](https://github.com/Rdatatable/data.table/issues/2181). diff --git a/R/print.data.table.R b/R/print.data.table.R index bc3cf56ed2..948f41a692 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -193,8 +193,6 @@ has_format_method = function(x) { format_col.default = function(x, ...) { if (!is.null(dim(x))) "" - else if (has_format_method(x) && length(formatted<-format(x, ...))==length(x)) - formatted #PR5224 motivated by package sf where column class is c("sfc_MULTIPOLYGON","sfc") and sf:::format.sfc exists else if (is.list(x)) vapply_1c(x, format_list_item, ...) else @@ -236,6 +234,8 @@ format_list_item.data.frame = function(x, ...) { paste0("<", class1(x), paste_dims(x), ">") } +# format_list_item.vctrs_list_of = function(, .) + # FR #1091 for pretty printing of character # TODO: maybe instead of doing "this is...", we could do "this ... test"? # Current implementation may have issues when dealing with strings that have combinations of full-width and half-width characters, diff --git a/inst/tests/other.Rraw b/inst/tests/other.Rraw index 96f40b071b..081744f0cc 100644 --- a/inst/tests/other.Rraw +++ b/inst/tests/other.Rraw @@ -1,4 +1,4 @@ -pkgs = c("ggplot2", "hexbin", "plyr", "dplyr", "caret", "zoo", "xts", "gdata", "nlme", "bit64", "knitr", "parallel", "sf", "nanotime", "R.utils", "yaml") +pkgs = c("bit64", "caret", "dplyr", "gdata", "ggplot2", "hexbin", "knitr", "nanotime", "nlme", "parallel", "plyr", "R.utils", "sf", "vctrs", "xts", "yaml", "zoo") # First expression of this file must be as above: .gitlab-ci.yml uses parse(,n=1L) to read one expression from this file and installs pkgs. # So that these dependencies of other.Rraw are maintained in a single place. # TEST_DATA_TABLE_WITH_OTHER_PACKAGES is off by default so this other.Rraw doesn't run on CRAN. It is run by GLCI, locally in dev, and by @@ -773,3 +773,9 @@ if (loaded[["nanotime"]]) { res <- tables(env=.e) test(32, res[, .(NAME,NROW,NCOL,MB)], data.table(NAME="DT",NROW=20000000L,NCOL=15L,MB=2288.0)) rm(.e, res) + +if (loaded[["vctrs"]]) { + # vctrs::list_of() columns are treated the same as other list() columns + DT = data.table(a = 1, b = list_of(mtcars)) + test(33, DT, output=".*") +} From 867773f9772b9cc69b873db43b140c1abc2bcec4 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 6 Dec 2024 01:19:41 +0000 Subject: [PATCH 2/4] rm vestigial --- R/print.data.table.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/print.data.table.R b/R/print.data.table.R index 948f41a692..25c9ccfe76 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -234,8 +234,6 @@ format_list_item.data.frame = function(x, ...) { paste0("<", class1(x), paste_dims(x), ">") } -# format_list_item.vctrs_list_of = function(, .) - # FR #1091 for pretty printing of character # TODO: maybe instead of doing "this is...", we could do "this ... test"? # Current implementation may have issues when dealing with strings that have combinations of full-width and half-width characters, From 26c71c5417861654263504aad3f088a308185df4 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 5 Mar 2025 13:00:25 +0300 Subject: [PATCH 3/4] Test formatting of sf geometry columns 'sf' is one of the few packages that uses a list column in a data.frame with a correctly working format() method. Since we disregard such methods now, test that the correct behaviour still happens thanks to the format() methods of the individual list items. --- inst/tests/other.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/other.Rraw b/inst/tests/other.Rraw index 081744f0cc..6306d9a7ee 100644 --- a/inst/tests/other.Rraw +++ b/inst/tests/other.Rraw @@ -220,6 +220,7 @@ test(14.2, {example('CJ', package='data.table', local=TRUE, echo=FALSE); TRUE}) if (loaded[["sf"]]) { #2273 DT = as.data.table(st_read(system.file("shape/nc.shp", package = "sf"), quiet=TRUE)) test(15, DT[1:3, .(NAME, FIPS, geometry)], output="Ashe.*-81.4.*Surry.*-80.4") + test(15.1, DT, output="MULTIPOLYGON (((") # make sure individual list items are formatted, #6637, #5224 dsf = sf::st_as_sf(data.table(x=1:10, y=1:10, s=sample(1:2, 10, TRUE)), coords=1:2) test(16, split(dsf, dsf$s), list(`1` = dsf[dsf$s == 1, ], `2` = dsf[dsf$s == 2, ])) From 9e1260f40467e255083a4e06401859fe999df4d0 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 5 Mar 2025 13:01:54 +0300 Subject: [PATCH 4/4] Test that format.() is disregarded --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1969472943..48c0a766b0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16770,7 +16770,7 @@ registerS3method("format_col", "complex", format_col.default) # then i) test 1610.1 fails if test.data.table() is rerun, ii) user display of complex data would be affected # did try wrapping with on.exit(,add=TRUE) but perhaps because this is a script that is sys.source'd, it ran straight away -# format method for column takes predecedence over format method for each list item +# as of #6637, format individual list items but not the whole list registerS3method("format", "myclass2130", function(x, ...) paste0("<", class(x)[1L], ":", x$id, ">")) DT = data.table(row=1:2, objs=list(structure(list(id="foo"), class="myclass2130"), structure(list(id="bar"), class="myclass2130"))) test(2130.13, print(DT), output="myclass2130:foo.*myclass2130:bar") @@ -16778,7 +16778,7 @@ setattr(DT$objs, "class", "foo2130") registerS3method("format", "foo2130", function(x, ...) "All hail foo") test(2130.14, print(DT), output="myclass2130:foo.*myclass2130:bar") # because length 1 from format but needs to be length(x) registerS3method("format", "foo2130", function(x, ...) rep("All hail foo",length(x))) -test(2130.15, print(DT), output="All hail foo") # e.g. sf:::format.sfc rather than sf:::format.sfg on each item +test(2130.15, print(DT), output="myclass2130:foo.*myclass2130:bar") # used to call format(column), not vapply_1c(column, format) setattr(DT$objs, "class", "bar2130_with_no_method") test(2130.16, print(DT), output="myclass2130:foo.*myclass2130:bar") registerS3method("format", "myclass2130", format.default)