From 7539668f6f7a718045e096e42380dbaf8760d639 Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Tue, 8 Jul 2025 00:24:43 +0530 Subject: [PATCH 01/38] Added logic to s.data.table.list() to preserve names from vectors --- R/as.data.table.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/R/as.data.table.R b/R/as.data.table.R index 38f99b80da..c2a79d3811 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -133,6 +133,22 @@ as.data.table.list = function(x, missing.check.names = missing(check.names) origListNames = if (missing(.named)) names(x) else NULL # as.data.table called directly, not from inside data.table() which provides .named, #3854 empty_atomic = FALSE + + #Handle keep.rownames for vectors (mimicking data.frame behavior) + vector_rownames = NULL + if(!identical(keep.rownames, FALSE)) { + for(i in seq_len(n)){ + xi = x[[i]] + if (!is.null(xi) && is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi)) && length(names(xi)) > 0) { + valid_names = names(xi) + if(any(nzchar(valid_names))) { + vector_rownames = valid_names + x[[i]] = unname(xi) + break + } + } + } + } for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above @@ -200,6 +216,13 @@ as.data.table.list = function(x, } if (any(vnames==".SD")) stopf("A column may not be called .SD. That has special meaning.") if (check.names) vnames = make.names(vnames, unique=TRUE) + + # Add rownames column when vector names were found + if(!is.null(vector_rownames)){ + rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn" + ans = c(list(recycle(vector_rownames, nrow)), ans) + vnames = c(rn_name, vnames) + } setattr(ans, "names", vnames) setDT(ans, key=key) # copy ensured above; also, setDT handles naming if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames) # PR 3854 and tests 2058.15-17 From 3cfc61bace8baf713f436be588afdfe739175b64 Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Tue, 8 Jul 2025 00:47:54 +0530 Subject: [PATCH 02/38] remove trail whit space --- R/as.data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index c2a79d3811..5eac350523 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -143,10 +143,10 @@ as.data.table.list = function(x, valid_names = names(xi) if(any(nzchar(valid_names))) { vector_rownames = valid_names - x[[i]] = unname(xi) + x[[i]] = unname(xi) break } - } + } } } for (i in seq_len(n)) { From 7d1e5431009412222e87ab188d3b3d20e2b7e26c Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Tue, 8 Jul 2025 01:25:24 +0530 Subject: [PATCH 03/38] add coverage tests --- inst/tests/tests.Rraw | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ec76104ea9..3c714b566d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21365,3 +21365,9 @@ test(2328.1, levels(droplevels(DT)$f), character()) DT[, i := integer()] DT[, f2 := factor()] test(2328.2, droplevels(DT), data.table(f=factor(), i=integer(), f2=factor())) + +# Row name extraction from multiple vectors, #7136 +x <- c(1, 2, 3) +y <- setNames(c(4, 5, 6), c("A", "B", "C")) +test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), X=x, Y=unname(y))) +test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), X=x, Y=unname(y))) From 11485c9221d95d6d81d4e0e79dd42c2a043d5a62 Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Tue, 8 Jul 2025 01:42:13 +0530 Subject: [PATCH 04/38] replace X and Y --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3c714b566d..6de135dcc3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21369,5 +21369,5 @@ test(2328.2, droplevels(DT), data.table(f=factor(), i=integer(), f2=factor())) # Row name extraction from multiple vectors, #7136 x <- c(1, 2, 3) y <- setNames(c(4, 5, 6), c("A", "B", "C")) -test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), X=x, Y=unname(y))) -test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), X=x, Y=unname(y))) +test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=x, V2=unname(y))) +test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=x, V2=unname(y))) From 4088852c54eb09f2d9c726b9f265863601d6d94f Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Tue, 8 Jul 2025 02:01:01 +0530 Subject: [PATCH 05/38] result test --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6de135dcc3..cef4c56910 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21369,5 +21369,5 @@ test(2328.2, droplevels(DT), data.table(f=factor(), i=integer(), f2=factor())) # Row name extraction from multiple vectors, #7136 x <- c(1, 2, 3) y <- setNames(c(4, 5, 6), c("A", "B", "C")) -test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=x, V2=unname(y))) -test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=x, V2=unname(y))) +test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1L, 2L, 3L), V2=c(4L, 5L, 6L))) +test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1L, 2L, 3L), V2=c(4L, 5L, 6L))) From c143dabf851073d0d0ba7e7303fc65d7726a5384 Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Tue, 8 Jul 2025 02:04:32 +0530 Subject: [PATCH 06/38] tests --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index cef4c56910..aa5729757a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21369,5 +21369,5 @@ test(2328.2, droplevels(DT), data.table(f=factor(), i=integer(), f2=factor())) # Row name extraction from multiple vectors, #7136 x <- c(1, 2, 3) y <- setNames(c(4, 5, 6), c("A", "B", "C")) -test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1L, 2L, 3L), V2=c(4L, 5L, 6L))) -test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1L, 2L, 3L), V2=c(4L, 5L, 6L))) +test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) +test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) From 6775ce18df129fdda5a692f65f656bfae4e5747b Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Tue, 8 Jul 2025 02:06:21 +0530 Subject: [PATCH 07/38] corrected test output --- inst/tests/tests.Rraw | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6de135dcc3..3fd980e3f1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21369,5 +21369,6 @@ test(2328.2, droplevels(DT), data.table(f=factor(), i=integer(), f2=factor())) # Row name extraction from multiple vectors, #7136 x <- c(1, 2, 3) y <- setNames(c(4, 5, 6), c("A", "B", "C")) -test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=x, V2=unname(y))) -test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=x, V2=unname(y))) +test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) +test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) + From b825415c5ea559fd4bb9b76e175efe0e79fd062a Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Tue, 8 Jul 2025 09:56:08 +0530 Subject: [PATCH 08/38] use as.data.table in tests --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3fd980e3f1..a43539992c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21369,6 +21369,6 @@ test(2328.2, droplevels(DT), data.table(f=factor(), i=integer(), f2=factor())) # Row name extraction from multiple vectors, #7136 x <- c(1, 2, 3) y <- setNames(c(4, 5, 6), c("A", "B", "C")) -test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) -test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) +test(2329.1, as.data.table(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) +test(2329.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) From 9d6f99a65799ea21a3ed001e3869b0fdabd51ebf Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Tue, 8 Jul 2025 13:38:22 +0530 Subject: [PATCH 09/38] use isFalse --- R/as.data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index 5eac350523..c9a3bfa7aa 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -136,7 +136,7 @@ as.data.table.list = function(x, #Handle keep.rownames for vectors (mimicking data.frame behavior) vector_rownames = NULL - if(!identical(keep.rownames, FALSE)) { + if(!isFALSE(keep.rownames)) { for(i in seq_len(n)){ xi = x[[i]] if (!is.null(xi) && is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi)) && length(names(xi)) > 0) { From 68cc4213449edf24bb17af643a591920fbbc433e Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Tue, 8 Jul 2025 18:16:38 +0530 Subject: [PATCH 10/38] add classed error conditions --- R/groupingsets.R | 4 ++-- R/merge.R | 2 +- R/setkey.R | 8 ++++---- R/setops.R | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index 7112308323..d464faa50b 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -4,7 +4,7 @@ rollup = function(x, ...) { rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { # input data type basic validation if (!is.data.table(x)) - stopf("Argument 'x' must be a data.table object") + stopf("Argument 'x' must be a data.table object", class="dt_invalid_type_error") if (!is.character(by)) stopf("Argument 'by' must be a character vector of column names used in grouping.") if (!is.logical(id)) @@ -22,7 +22,7 @@ cube = function(x, ...) { cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { # input data type basic validation if (!is.data.table(x)) - stopf("Argument 'x' must be a data.table object") + stopf("Argument 'x' must be a data.table object", class="dt_invalid_type_error") if (!is.character(by)) stopf("Argument 'by' must be a character vector of column names used in grouping.") if (!is.logical(id)) diff --git a/R/merge.R b/R/merge.R index c67f6e266a..fb4a960b0b 100644 --- a/R/merge.R +++ b/R/merge.R @@ -34,7 +34,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL warningf("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.") if (!is.null(by.x)) { if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y)) - stopf("A non-empty vector of column names is required for `by.x` and `by.y`.") + stopf("A non-empty vector of column names is required for `by.x` and `by.y`.", class="dt_missing_column_error") if (!all(idx <- by.x %chin% nm_x)) { stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx])) } diff --git a/R/setkey.R b/R/setkey.R index 94ad3d4faf..bcea14c44f 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -43,7 +43,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU if (!all(nzchar(cols))) stopf("cols contains some blanks.") cols = gsub("`", "", cols, fixed = TRUE) miss = !(cols %chin% colnames(x)) - if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss])) + if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_colulmn_error") if (physical && identical(head(key(x), length(cols)), cols)){ ## for !physical we need to compute groups as well #4387 ## key is present but x has a longer key. No sorting needed, only attribute is changed to shorter key. @@ -54,7 +54,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU if (".xi" %chin% names(x)) stopf("x contains a column called '.xi'. Conflicts with internal use by data.table.") for (i in cols) { .xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column - if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported as a key column type, currently.", i, typeof(.xi)) + if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported as a key column type, currently.", i, typeof(.xi), class="dt_unsupported_type_error") } if (!is.character(cols) || length(cols)<1L) internal_error("'cols' should be character at this point") # nocov @@ -266,11 +266,11 @@ setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE) # remove backticks from cols cols = gsub("`", "", cols, fixed = TRUE) miss = !(cols %chin% colnames(x)) - if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss])) + if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_colulmn_error") if (".xi" %chin% colnames(x)) stopf("x contains a column called '.xi'. Conflicts with internal use by data.table.") for (i in cols) { .xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column - if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported for ordering currently.", i, typeof(.xi)) + if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported for ordering currently.", i, typeof(.xi), class="dt_unsupported_type_error") } if (!is.character(cols) || length(cols)<1L) internal_error("'cols' should be character at this point") # nocov diff --git a/R/setops.R b/R/setops.R index 9a0effd53e..08276fdacc 100644 --- a/R/setops.R +++ b/R/setops.R @@ -14,11 +14,11 @@ setdiff_ = function(x, y, by.x=seq_along(x), by.y=seq_along(y), use.names=FALSE) icnam = names(y)[lc] xcnam = names(x)[rc] if ( is.character(x[[rc]]) && !(is.character(y[[lc]]) || is.factor(y[[lc]])) ) { - stopf("When x's column ('%s') is character, the corresponding column in y ('%s') should be factor or character, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]])) + stopf("When x's column ('%s') is character, the corresponding column in y ('%s') should be factor or character, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_type_mismatch_error") } else if ( is.factor(x[[rc]]) && !(is.character(y[[lc]]) || is.factor(y[[lc]])) ) { - stopf("When x's column ('%s') is factor, the corresponding column in y ('%s') should be character or factor, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]])) + stopf("When x's column ('%s') is factor, the corresponding column in y ('%s') should be character or factor, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_type_mismatch_error") } else if ( (is.integer(x[[rc]]) || is.double(x[[rc]])) && (is.logical(y[[lc]]) || is.character(y[[lc]])) ) { - stopf("When x's column ('%s') is integer or numeric, the corresponding column in y ('%s') can not be character or logical types, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]])) + stopf("When x's column ('%s') is integer or numeric, the corresponding column in y ('%s') can not be character or logical types, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_type_mismatch_error") } } ux = unique(shallow(x, by.x)) From 11acfe6b9c2962966ce657d916a229a787f80c67 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 8 Jul 2025 09:08:11 -0700 Subject: [PATCH 11/38] ws style --- R/as.data.table.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index c9a3bfa7aa..80c033c51e 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -137,11 +137,11 @@ as.data.table.list = function(x, #Handle keep.rownames for vectors (mimicking data.frame behavior) vector_rownames = NULL if(!isFALSE(keep.rownames)) { - for(i in seq_len(n)){ + for (i in seq_len(n)) { xi = x[[i]] if (!is.null(xi) && is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi)) && length(names(xi)) > 0) { valid_names = names(xi) - if(any(nzchar(valid_names))) { + if (any(nzchar(valid_names))) { vector_rownames = valid_names x[[i]] = unname(xi) break @@ -218,7 +218,7 @@ as.data.table.list = function(x, if (check.names) vnames = make.names(vnames, unique=TRUE) # Add rownames column when vector names were found - if(!is.null(vector_rownames)){ + if (!is.null(vector_rownames)) { rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn" ans = c(list(recycle(vector_rownames, nrow)), ans) vnames = c(rn_name, vnames) From 26160e22d4adc9fb9e7c186fae377464a9837bfe Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 8 Jul 2025 09:08:24 -0700 Subject: [PATCH 12/38] rm redundant condition --- R/as.data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index 80c033c51e..e075ce6a05 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -139,7 +139,7 @@ as.data.table.list = function(x, if(!isFALSE(keep.rownames)) { for (i in seq_len(n)) { xi = x[[i]] - if (!is.null(xi) && is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi)) && length(names(xi)) > 0) { + if (!is.null(xi) && is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi))) { valid_names = names(xi) if (any(nzchar(valid_names))) { vector_rownames = valid_names From 92f281f2979ead73c0ca08c763e5d3e30af80418 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 8 Jul 2025 09:08:47 -0700 Subject: [PATCH 13/38] missed ws change --- R/as.data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index e075ce6a05..2e1b7003e8 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -136,7 +136,7 @@ as.data.table.list = function(x, #Handle keep.rownames for vectors (mimicking data.frame behavior) vector_rownames = NULL - if(!isFALSE(keep.rownames)) { + if (!isFALSE(keep.rownames)) { for (i in seq_len(n)) { xi = x[[i]] if (!is.null(xi) && is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi))) { From 7041d8d063ef4bc4c971dfde547b31b39acac1ae Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Tue, 8 Jul 2025 22:39:19 +0530 Subject: [PATCH 14/38] invalid_input in place of invalid_type --- R/groupingsets.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index d464faa50b..f5fc2101f1 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -4,7 +4,7 @@ rollup = function(x, ...) { rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { # input data type basic validation if (!is.data.table(x)) - stopf("Argument 'x' must be a data.table object", class="dt_invalid_type_error") + stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error") if (!is.character(by)) stopf("Argument 'by' must be a character vector of column names used in grouping.") if (!is.logical(id)) @@ -22,7 +22,7 @@ cube = function(x, ...) { cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { # input data type basic validation if (!is.data.table(x)) - stopf("Argument 'x' must be a data.table object", class="dt_invalid_type_error") + stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error") if (!is.character(by)) stopf("Argument 'by' must be a character vector of column names used in grouping.") if (!is.logical(id)) From 0a583fae18a2d8eecb9ee973d1985543cdb77c16 Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Tue, 8 Jul 2025 22:42:06 +0530 Subject: [PATCH 15/38] right place invalid_input --- R/merge.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/merge.R b/R/merge.R index fb4a960b0b..00cbedea17 100644 --- a/R/merge.R +++ b/R/merge.R @@ -34,7 +34,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL warningf("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.") if (!is.null(by.x)) { if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y)) - stopf("A non-empty vector of column names is required for `by.x` and `by.y`.", class="dt_missing_column_error") + stopf("A non-empty vector of column names is required for `by.x` and `by.y`.", class="dt_invalid_input_error") if (!all(idx <- by.x %chin% nm_x)) { stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx])) } From 42d63add9398b365f51711c4c69f95bdfdbddd69 Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Tue, 8 Jul 2025 22:45:04 +0530 Subject: [PATCH 16/38] typo and unsortable in place of unsupported --- R/setkey.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/setkey.R b/R/setkey.R index bcea14c44f..4ba5be4d71 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -43,7 +43,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU if (!all(nzchar(cols))) stopf("cols contains some blanks.") cols = gsub("`", "", cols, fixed = TRUE) miss = !(cols %chin% colnames(x)) - if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_colulmn_error") + if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_column_error") if (physical && identical(head(key(x), length(cols)), cols)){ ## for !physical we need to compute groups as well #4387 ## key is present but x has a longer key. No sorting needed, only attribute is changed to shorter key. @@ -54,7 +54,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU if (".xi" %chin% names(x)) stopf("x contains a column called '.xi'. Conflicts with internal use by data.table.") for (i in cols) { .xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column - if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported as a key column type, currently.", i, typeof(.xi), class="dt_unsupported_type_error") + if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported as a key column type, currently.", i, typeof(.xi), class="dt_unsortable_type_error") } if (!is.character(cols) || length(cols)<1L) internal_error("'cols' should be character at this point") # nocov @@ -266,11 +266,11 @@ setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE) # remove backticks from cols cols = gsub("`", "", cols, fixed = TRUE) miss = !(cols %chin% colnames(x)) - if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_colulmn_error") + if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_column_error") if (".xi" %chin% colnames(x)) stopf("x contains a column called '.xi'. Conflicts with internal use by data.table.") for (i in cols) { .xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column - if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported for ordering currently.", i, typeof(.xi), class="dt_unsupported_type_error") + if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported for ordering currently.", i, typeof(.xi), class="dt_unsortable_type_error") } if (!is.character(cols) || length(cols)<1L) internal_error("'cols' should be character at this point") # nocov From 0602e72af9bddc8566f2d3b81a021d939e765c1b Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Tue, 8 Jul 2025 22:46:37 +0530 Subject: [PATCH 17/38] specify join type --- R/setops.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/setops.R b/R/setops.R index 08276fdacc..1d129c4a9a 100644 --- a/R/setops.R +++ b/R/setops.R @@ -14,11 +14,11 @@ setdiff_ = function(x, y, by.x=seq_along(x), by.y=seq_along(y), use.names=FALSE) icnam = names(y)[lc] xcnam = names(x)[rc] if ( is.character(x[[rc]]) && !(is.character(y[[lc]]) || is.factor(y[[lc]])) ) { - stopf("When x's column ('%s') is character, the corresponding column in y ('%s') should be factor or character, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_type_mismatch_error") + stopf("When x's column ('%s') is character, the corresponding column in y ('%s') should be factor or character, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_join_type_mismatch_error") } else if ( is.factor(x[[rc]]) && !(is.character(y[[lc]]) || is.factor(y[[lc]])) ) { - stopf("When x's column ('%s') is factor, the corresponding column in y ('%s') should be character or factor, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_type_mismatch_error") + stopf("When x's column ('%s') is factor, the corresponding column in y ('%s') should be character or factor, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_join_type_mismatch_error") } else if ( (is.integer(x[[rc]]) || is.double(x[[rc]])) && (is.logical(y[[lc]]) || is.character(y[[lc]])) ) { - stopf("When x's column ('%s') is integer or numeric, the corresponding column in y ('%s') can not be character or logical types, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_type_mismatch_error") + stopf("When x's column ('%s') is integer or numeric, the corresponding column in y ('%s') can not be character or logical types, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_join_type_mismatch_error") } } ux = unique(shallow(x, by.x)) From b6cc6ac842cbef31b49d205075f77ec5042ac0ac Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Tue, 8 Jul 2025 23:22:46 +0530 Subject: [PATCH 18/38] merge our loop which checks for vector rowname extraction in below --- R/as.data.table.R | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index 2e1b7003e8..c1b686240d 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -133,25 +133,19 @@ as.data.table.list = function(x, missing.check.names = missing(check.names) origListNames = if (missing(.named)) names(x) else NULL # as.data.table called directly, not from inside data.table() which provides .named, #3854 empty_atomic = FALSE - - #Handle keep.rownames for vectors (mimicking data.frame behavior) + # Handle keep.rownames for vectors (mimicking data.frame behavior) vector_rownames = NULL - if (!isFALSE(keep.rownames)) { - for (i in seq_len(n)) { - xi = x[[i]] - if (!is.null(xi) && is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi))) { - valid_names = names(xi) - if (any(nzchar(valid_names))) { - vector_rownames = valid_names - x[[i]] = unname(xi) - break - } - } - } - } + check_rownames = !isFALSE(keep.rownames) for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above + if (check_rownames && is.null(vector_rownames) && !is.null(xi) && is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi))) { + valid_names = names(xi) + if (any(nzchar(valid_names))) { + vector_rownames = valid_names + x[[i]] = unname(xi) + } + } if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE if ("POSIXlt" %chin% class(xi)) { warningf("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.") From 3454fcf0a3580993f6924731d70abb296939edcb Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Wed, 9 Jul 2025 01:28:35 +0530 Subject: [PATCH 19/38] added logic for handling data.frame --- R/as.data.table.R | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index c1b686240d..cdee8efcb9 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -139,11 +139,21 @@ as.data.table.list = function(x, for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above - if (check_rownames && is.null(vector_rownames) && !is.null(xi) && is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi))) { - valid_names = names(xi) - if (any(nzchar(valid_names))) { - vector_rownames = valid_names - x[[i]] = unname(xi) + if (check_rownames && is.null(vector_rownames)) { + # Check for named vectors + if (is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi))) { + valid_names = names(xi) + if (any(nzchar(valid_names))) { + vector_rownames = valid_names + x[[i]] = unname(xi) + } + } + # Check for data.frames or matrices with explicit rownames + else if (!is.null(dim(xi)) && !is.null(rownames(xi))) { + valid_names = rownames(xi) + if (any(nzchar(valid_names))) { + vector_rownames = valid_names + } } } if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE @@ -155,7 +165,7 @@ as.data.table.list = function(x, if (is.matrix(xi) && NCOL(xi)<=1L && is.null(colnames(xi))) { # 1 column matrix naming #4124 xi = x[[i]] = c(xi) } else { - xi = x[[i]] = as.data.table(xi, keep.rownames=keep.rownames) # we will never allow a matrix to be a column; always unpack the columns + xi = x[[i]] = as.data.table(xi, keep.rownames=FALSE) # we will never allow a matrix to be a column; always unpack the columns } } # else avoid dispatching to as.data.table.data.table (which exists and copies) From f3e5a0e66b6d2eb36011335cf8f775d2d38da0ba Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Wed, 9 Jul 2025 01:34:04 +0530 Subject: [PATCH 20/38] add tests --- inst/tests/tests.Rraw | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a43539992c..052ecda46a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21371,4 +21371,17 @@ x <- c(1, 2, 3) y <- setNames(c(4, 5, 6), c("A", "B", "C")) test(2329.1, as.data.table(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) test(2329.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) +test(2329.3, as.data.table.list(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(4, 5, 6), V2=c(1, 2, 3))) + +# Behavior under data.frame() +test(2329.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) +test(2329.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(4, 5, 6), V2=c(1, 2, 3))) + +a <- setNames(c(7, 8, 9), c("", "", "")) # test condition about any(nzchar(valid_names)) +test(2329.6, as.data.table.list(list(a), keep.rownames=TRUE), data.table(V1=c(7, 8, 9))) +b <- setNames(c(10, 11, 12), c("", "B", "")) +test(2329.7, as.data.table.list(list(b), keep.rownames=TRUE), data.table(rn=c("", "B", ""), V1=c(10, 11, 12))) + +DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames +test(2329.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=c("a", "b", "c", "d", "e", "f"), a=c(6, 5, 4, 3, 2, 1), V=c(1, 2, 3, 4, 5, 6))) From a90fe47caa61dc2faf0357eae83a5c16a4539e48 Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Wed, 9 Jul 2025 01:45:15 +0530 Subject: [PATCH 21/38] add tests --- R/as.data.table.R | 34 ++++++++++++++++++++-------------- inst/tests/tests.Rraw | 13 +++++++++++++ 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index 5eac350523..524cd06e91 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -134,24 +134,30 @@ as.data.table.list = function(x, origListNames = if (missing(.named)) names(x) else NULL # as.data.table called directly, not from inside data.table() which provides .named, #3854 empty_atomic = FALSE - #Handle keep.rownames for vectors (mimicking data.frame behavior) + # Handle keep.rownames for vectors (mimicking data.frame behavior) vector_rownames = NULL - if(!identical(keep.rownames, FALSE)) { - for(i in seq_len(n)){ - xi = x[[i]] - if (!is.null(xi) && is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi)) && length(names(xi)) > 0) { - valid_names = names(xi) - if(any(nzchar(valid_names))) { + check_rownames = !isFALSE(keep.rownames) + + for (i in seq_len(n)) { + xi = x[[i]] + if (is.null(xi)) next # eachncol already initialized to 0 by integer() above + if (check_rownames && is.null(vector_rownames)) { + # Check for named vectors + if (is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi))) { + valid_names = names(xi) + if (any(nzchar(valid_names))) { vector_rownames = valid_names x[[i]] = unname(xi) - break } } - } - } - for (i in seq_len(n)) { - xi = x[[i]] - if (is.null(xi)) next # eachncol already initialized to 0 by integer() above + # Check for data.frames or matrices with explicit rownames + else if (!is.null(dim(xi)) && !is.null(rownames(xi))) { + valid_names = rownames(xi) + if (any(nzchar(valid_names))) { + vector_rownames = valid_names + } + } + } if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE if ("POSIXlt" %chin% class(xi)) { warningf("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.") @@ -161,7 +167,7 @@ as.data.table.list = function(x, if (is.matrix(xi) && NCOL(xi)<=1L && is.null(colnames(xi))) { # 1 column matrix naming #4124 xi = x[[i]] = c(xi) } else { - xi = x[[i]] = as.data.table(xi, keep.rownames=keep.rownames) # we will never allow a matrix to be a column; always unpack the columns + xi = x[[i]] = as.data.table(xi, keep.rownames=FALSE) # we will never allow a matrix to be a column; always unpack the columns } } # else avoid dispatching to as.data.table.data.table (which exists and copies) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aa5729757a..861d129a36 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21371,3 +21371,16 @@ x <- c(1, 2, 3) y <- setNames(c(4, 5, 6), c("A", "B", "C")) test(2329.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) test(2329.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) +test(2329.3, as.data.table.list(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(4, 5, 6), V2=c(1, 2, 3))) + +# Behavior under data.frame() +test(2329.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) +test(2329.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(4, 5, 6), V2=c(1, 2, 3))) + +a <- setNames(c(7, 8, 9), c("", "", "")) # test condition about any(nzchar(valid_names)) +test(2329.6, as.data.table.list(list(a), keep.rownames=TRUE), data.table(V1=c(7, 8, 9))) +b <- setNames(c(10, 11, 12), c("", "B", "")) +test(2329.7, as.data.table.list(list(b), keep.rownames=TRUE), data.table(rn=c("", "B", ""), V1=c(10, 11, 12))) + +DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames +test(2329.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=c("a", "b", "c", "d", "e", "f"), a=c(6, 5, 4, 3, 2, 1), V=c(1, 2, 3, 4, 5, 6))) From babc65786fe83d5630ca2c0faffb31a34082c47e Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Wed, 9 Jul 2025 09:30:46 +0530 Subject: [PATCH 22/38] merger master --- ...r.R => news_github_link_mismatch_linter.R} | 30 +- .../md/news_section_numbering_linter.R | 26 ++ ..._linter.R => vignette_heading_id_linter.R} | 2 +- GOVERNANCE.md | 4 +- NEWS.md | 6 +- R/as.data.table.R | 3 + R/print.data.table.R | 2 + inst/tests/tests.Rraw | 45 ++- src/fwrite.c | 318 +++++++++--------- src/fwriteR.c | 113 ++++--- 10 files changed, 296 insertions(+), 253 deletions(-) rename .ci/linters/md/{news_linter.R => news_github_link_mismatch_linter.R} (51%) create mode 100644 .ci/linters/md/news_section_numbering_linter.R rename .ci/linters/md/{heading_id_linter.R => vignette_heading_id_linter.R} (95%) diff --git a/.ci/linters/md/news_linter.R b/.ci/linters/md/news_github_link_mismatch_linter.R similarity index 51% rename from .ci/linters/md/news_linter.R rename to .ci/linters/md/news_github_link_mismatch_linter.R index 8e8368ee7f..4cf41f1028 100644 --- a/.ci/linters/md/news_linter.R +++ b/.ci/linters/md/news_github_link_mismatch_linter.R @@ -1,32 +1,6 @@ -# ensure that numbered list in each section is in sequence -check_section_numbering = function(news) { - if (!grepl("NEWS", news)) return(invisible()) - news = readLines(news) - # plain '#' catches some examples; 'd' for 'data.table' - sections = grep("^#+ [A-Zd]", news) - entries = grep("^[0-9]+[.]", news) - entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries])) - section_id = findInterval(entries, sections) - - any_mismatch = FALSE - for (id in unique(section_id)) { - section_entries = entry_value[section_id == id] - intended_value = seq_along(section_entries) - matched = section_entries == intended_value - if (all(matched)) next - any_mismatch = TRUE - section_header = news[sections[id]] - cat(sprintf( - "In section '%s' (line %d), bad numbering:\n%s\n", - section_header, sections[id], - paste0(" [", section_entries[!matched], " --> ", intended_value[!matched], "]", collapse="\n") - )) - } - stopifnot("Please fix the NEWS issues above" = !any_mismatch) -} - # ensure that GitHub link text & URL actually agree -check_gh_links = function(news) { +news_github_link_mismatch_linter = function(news) { + if (!grepl("NEWS", news)) return(invisible()) news = readLines(news) gh_links_info = gregexpr( "\\[#(?[0-9]+)\\]\\(https://github.com/Rdatatable/data.table/(?[^/]+)/(?[0-9]+)\\)", diff --git a/.ci/linters/md/news_section_numbering_linter.R b/.ci/linters/md/news_section_numbering_linter.R new file mode 100644 index 0000000000..b209cac884 --- /dev/null +++ b/.ci/linters/md/news_section_numbering_linter.R @@ -0,0 +1,26 @@ +# ensure that numbered list in each section is in sequence +news_section_numbering_linter = function(news) { + if (!grepl("NEWS", news)) return(invisible()) + news = readLines(news) + # plain '#' catches some examples; 'd' for 'data.table' + sections = grep("^#+ [A-Zd]", news) + entries = grep("^[0-9]+[.]", news) + entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries])) + section_id = findInterval(entries, sections) + + any_mismatch = FALSE + for (id in unique(section_id)) { + section_entries = entry_value[section_id == id] + intended_value = seq_along(section_entries) + matched = section_entries == intended_value + if (all(matched)) next + any_mismatch = TRUE + section_header = news[sections[id]] + cat(sprintf( + "In section '%s' (line %d), bad numbering:\n%s\n", + section_header, sections[id], + paste0(" [", section_entries[!matched], " --> ", intended_value[!matched], "]", collapse="\n") + )) + } + stopifnot("Please fix the NEWS issues above" = !any_mismatch) +} diff --git a/.ci/linters/md/heading_id_linter.R b/.ci/linters/md/vignette_heading_id_linter.R similarity index 95% rename from .ci/linters/md/heading_id_linter.R rename to .ci/linters/md/vignette_heading_id_linter.R index 6a2933f645..ec63d2d5be 100644 --- a/.ci/linters/md/heading_id_linter.R +++ b/.ci/linters/md/vignette_heading_id_linter.R @@ -1,6 +1,6 @@ # ensure that ids are limited to alphanumerics and dashes # (in particular, dots and underscores break the links) -check_header_ids = function(md) { +vignette_heading_id_linter = function(md) { if (!grepl('[.]Rmd$', md)) return(invisible()) md = readLines(md) # A bit surprisingly, some headings don't start with a letter. diff --git a/GOVERNANCE.md b/GOVERNANCE.md index 4f75580f8a..8af4e7976c 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -63,7 +63,7 @@ Functionality that is out of current scope: ## Committer * Definition: permission to commit to, and merge PRs into, master branch. -* How to obtain this role: after a Reviewer has a consistent history of careful reviews of others' PRs, then a current Committer should ask all other current Committers if they approve promoting the Reviewer to Committer, and it should be done if there is Consensus among active Committers. +* How to obtain this role: after a Reviewer has a consistent history of careful reviews of others' substantial PRs, then a current Committer should ask all other current Committers if they approve promoting the Reviewer to Committer, and it should be done if there is Consensus among active Committers. * How this role is recognized: credited via role="aut" in DESCRIPTION (so they appear in Author list on CRAN), and added to https://github.com/orgs/Rdatatable/teams/committers which gives permission to merge PRs into master branch. ## CRAN Maintainer @@ -139,6 +139,8 @@ data.table Version line in DESCRIPTION typically has the following meanings # Governance history +July 2025: require potential new committers' considered history to be of "substantial" PRs + May 2025: update Finance and CoC language for NumFOCUS incorporation. Feb 2025: add Finances and Funding section, update Code of Conduct section to be a brief summary and reference the broader CoC document. diff --git a/NEWS.md b/NEWS.md index a0fdd13aed..09ad8fc38a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -40,7 +40,7 @@ # 2: 2 6 4 5 ``` -8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`. When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`. +8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`, [#5560](https://github.com/Rdatatable/data.table/issues/5560). When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`. Thanks @sindribaldur for the report and @aitap for the fix. ### BUG FIXES @@ -70,7 +70,7 @@ 13. In rare cases, `data.table` failed to expand ALTREP columns when assigning a full column by reference. This could result in the target column getting modified unintentionally if the next call to the data.table was a modification by reference of the source column. E.g. in `DT[, b := as.character(a)]` the string conversion gets deferred and subsequent modification of column `a` would also modify column `b`, [#5400](https://github.com/Rdatatable/data.table/issues/5400). Thanks to @aquasync for the report and Václav Tlapák for the PR. -14. `data.table()` function is now more aligned with `data.frame()` with respect to the names of the output when one of its inputs is a single-column matrix object, [#4124](https://github.com/Rdatatable/data.table/issues/4124). Thanks @PavoDive for the report and @jangorecki for the PR. +14. `data.table()` function is now more aligned with `data.frame()` with respect to the names of the output when one of its inputs is a single-column matrix object, [#4124](https://github.com/Rdatatable/data.table/issues/4124). Thanks @PavoDive for the report, @jangorecki for the PR, and @MichaelChirico for a follow-up for back-compatibility. 15. Including an `ITime` object as a named input to `data.frame()` respects the provided name, i.e. `data.frame(a = as.ITime(...))` will have column `a`, [#4673](https://github.com/Rdatatable/data.table/issues/4673). Thanks @shrektan for the report and @MichaelChirico for the fix. @@ -84,6 +84,8 @@ 20. `droplevels()` works on 0-row data.tables, [#7043](https://github.com/Rdatatable/data.table/issues/7043). The result will have factor columns `factor(character())`, consistent with the data.frame method. Thanks @advieser for the report and @MichaelChirico for the fix. +21. `print(..., col.names = 'none')` now correctly adapts column widths to the data content, ignoring the original column names and producing a more compact output, [#6882](https://github.com/Rdatatable/data.table/issues/6882). Thanks to @brooksambrose for the report and @venom1204 for the PR. + ### NOTES 1. Continued work to remove non-API C functions, [#6180](https://github.com/Rdatatable/data.table/issues/6180). Thanks Ivan Krylov for the PRs and for writing a clear and concise guide about the R API: https://aitap.codeberg.page/R-api/. diff --git a/R/as.data.table.R b/R/as.data.table.R index 524cd06e91..e02b86298a 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -48,6 +48,9 @@ as.data.table.matrix = function(x, keep.rownames=FALSE, key=NULL, ...) { if (!identical(keep.rownames, FALSE)) { # can specify col name to keep.rownames, #575 ans = data.table(rn=rownames(x), x, keep.rownames=FALSE) + # auto-inferred name 'x' is not back-compatible & inconsistent, #7145 + if (ncol(x) == 1L && is.null(colnames(x))) + setnames(ans, 'x', 'V1') if (is.character(keep.rownames)) setnames(ans, 'rn', keep.rownames[1L]) return(ans) diff --git a/R/print.data.table.R b/R/print.data.table.R index 3e93eb3f81..b29d0af02c 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -141,6 +141,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), print_default(toprint) return(invisible(x)) } + if (col.names == "none") + colnames(toprint) = rep.int("", ncol(toprint)) if (nrow(toprint)>20L && col.names == "auto") # repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them # option to shut this off per request of Oleg Bondar on SO, #1482 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 861d129a36..166c0839f9 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21276,13 +21276,46 @@ if (test_R.utils) local({ }) # Create a data.table when one vector is transposed doesn't respect the name defined by user #4124 -test(2321.1, DT <- data.table(a=1:2, b=matrix(1:2)), data.table(a=1:2, b=1:2)) -test(2321.2, names(DT), names(data.frame(a=1:2, b=matrix(1:2)))) -test(2321.3, DT <- data.table(a=integer(), b=matrix(1L, nrow=0L, ncol=1L)), data.table(a=integer(), b=integer())) -test(2321.4, names(DT), names(data.frame(a=integer(), b=matrix(1L, nrow=0L, ncol=1L)))) +test(2321.01, DT <- data.table(a=1:2, b=matrix(1:2)), data.table(a=1:2, b=1:2)) +test(2321.02, names(DT), names(data.frame(a=1:2, b=matrix(1:2)))) +test(2321.03, DT <- data.table(a=integer(), b=matrix(1L, nrow=0L, ncol=1L)), data.table(a=integer(), b=integer())) +test(2321.04, names(DT), names(data.frame(a=integer(), b=matrix(1L, nrow=0L, ncol=1L)))) ## but respect named column vectors -test(2321.5, DT <- data.table(a=1:2, cbind(b=3:4)), data.table(a=1:2, b=3:4)) -test(2321.6, names(DT), names(data.frame(a=1:2, cbind(b=3:4)))) +test(2321.05, DT <- data.table(a=1:2, cbind(b=3:4)), data.table(a=1:2, b=3:4)) +test(2321.06, names(DT), names(data.frame(a=1:2, cbind(b=3:4)))) +## also respect old naming pattern when invoked indirectly, #7145 +M = cbind(1:3) +test(2321.07, as.data.table(M), data.table(V1=1:3)) +rownames(M) = c('a', 'b', 'c') +test(2321.08, as.data.table(M), data.table(V1=1:3)) +test(2321.09, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), V1=1:3)) +colnames(M) = 'zz' +test(2321.10, as.data.table(M), data.table(zz=1:3)) +test(2321.11, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), zz=1:3)) +colnames(M) = 'x' +test(2321.12, as.data.table(M), data.table(x=1:3)) +test(2321.13, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), x=1:3)) +M = cbind(M, y=4:6) +test(2321.14, as.data.table(M), data.table(x=1:3, y=4:6)) +test(2321.15, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), x=1:3, y=4:6)) +colnames(M) = c('A', 'B') +test(2321.16, as.data.table(M), data.table(A=1:3, B=4:6)) +test(2321.17, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), A=1:3, B=4:6)) +colnames(M) = NULL +test(2321.18, as.data.table(M), data.table(V1=1:3, V2=4:6)) +test(2321.19, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), V1=1:3, V2=4:6)) +colnames(M) = c('x', '') +test(2321.20, as.data.table(M), data.table(x=1:3, V2=4:6)) +test(2321.21, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), x=1:3, V2=4:6)) +colnames(M) = c('', 'x') +test(2321.22, as.data.table(M), data.table(V1=1:3, x=4:6)) +test(2321.23, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), V1=1:3, x=4:6)) +colnames(M) = c('', '') +test(2321.24, as.data.table(M), data.table(V1=1:3, V2=4:6)) +test(2321.25, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), V1=1:3, V2=4:6)) +colnames(M) = c('A', '') +test(2321.26, as.data.table(M), data.table(A=1:3, V2=4:6)) +test(2321.27, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), A=1:3, V2=4:6)) # New fctr() helper: like factor() but retaining order by default #4837 test(2322.01, levels(fctr(c("b","a","c"))), c("b","a","c")) diff --git a/src/fwrite.c b/src/fwrite.c index 8658d4ddb5..99d9e6d72f 100644 --- a/src/fwrite.c +++ b/src/fwrite.c @@ -62,34 +62,34 @@ inline void write_chars(const char *x, char **pch) void writeBool8(const void *col, int64_t row, char **pch) { - int8_t x = ((const int8_t *)col)[row]; + int8_t x = ((const int8_t*)col)[row]; char *ch = *pch; - *ch++ = '0'+(x==1); - *pch = ch-(x==INT8_MIN); // if NA then step back, to save a branch + *ch++ = '0' + (x == 1); + *pch = ch - (x == INT8_MIN); // if NA then step back, to save a branch } void writeBool32(const void *col, int64_t row, char **pch) { - int32_t x = ((const int32_t *)col)[row]; + int32_t x = ((const int32_t*)col)[row]; char *ch = *pch; - if (x==INT32_MIN) { // TODO: when na=='\0' as recommended, use a branchless writer + if (x == INT32_MIN) { // TODO: when na=='\0' as recommended, use a branchless writer write_chars(na, &ch); } else { - *ch++ = '0'+x; + *ch++ = '0' + x; } *pch = ch; } void writeBool32AsString(const void *col, int64_t row, char **pch) { - int32_t x = ((const int32_t *)col)[row]; + int32_t x = ((const int32_t*)col)[row]; char *ch = *pch; if (x == INT32_MIN) { write_chars(na, &ch); } else if (x) { - *ch++='T'; *ch++='R'; *ch++='U'; *ch++='E'; + *ch++ = 'T'; *ch++ = 'R'; *ch++ = 'U'; *ch++ = 'E'; } else { - *ch++='F'; *ch++='A'; *ch++='L'; *ch++='S'; *ch++='E'; + *ch++ = 'F'; *ch++ = 'A'; *ch++ = 'L'; *ch++ = 'S'; *ch++ = 'E'; } *pch = ch; } @@ -97,7 +97,7 @@ void writeBool32AsString(const void *col, int64_t row, char **pch) static inline void reverse(char *upp, char *low) { upp--; - while (upp>low) { + while (upp > low) { char tmp = *upp; *upp = *low; *low = tmp; @@ -109,14 +109,14 @@ static inline void reverse(char *upp, char *low) void writeInt32(const void *col, int64_t row, char **pch) { char *ch = *pch; - int32_t x = ((const int32_t *)col)[row]; + int32_t x = ((const int32_t*)col)[row]; if (x == INT32_MIN) { write_chars(na, &ch); } else { - if (x<0) { *ch++ = '-'; x = -x; } + if (x < 0) { *ch++ = '-'; x = -x; } // Avoid log() for speed. Write backwards then reverse when we know how long. char *low = ch; - do { *ch++ = '0'+x%10; x/=10; } while (x>0); + do { *ch++ = '0' + x % 10; x /= 10; } while (x > 0); reverse(ch, low); } *pch = ch; @@ -125,13 +125,13 @@ void writeInt32(const void *col, int64_t row, char **pch) void writeInt64(const void *col, int64_t row, char **pch) { char *ch = *pch; - int64_t x = ((const int64_t *)col)[row]; + int64_t x = ((const int64_t*)col)[row]; if (x == INT64_MIN) { write_chars(na, &ch); } else { if (x<0) { *ch++ = '-'; x = -x; } char *low = ch; - do { *ch++ = '0'+x%10; x/=10; } while (x>0); + do { *ch++ = '0' + x % 10; x /= 10; } while (x > 0); reverse(ch, low); } *pch = ch; @@ -187,13 +187,13 @@ void writeFloat64(const void *col, int64_t row, char **pch) // ii) no C library calls such as sprintf() where the fmt string has to be interpreted over and over // iii) no need to return variables or flags. Just writes. // iv) shorter, easier to read and reason with in one self contained place. - double x = ((const double *)col)[row]; + double x = ((const double*)col)[row]; char *ch = *pch; if (!isfinite(x)) { if (isnan(x)) { write_chars(na, &ch); } else { - if (x<0) *ch++ = '-'; + if (x < 0) *ch++ = '-'; *ch++ = 'I'; *ch++ = 'n'; *ch++ = 'f'; } } else if (x == 0.0) { @@ -203,7 +203,7 @@ void writeFloat64(const void *col, int64_t row, char **pch) union { double d; uint64_t l; } u; u.d = x; uint64_t fraction = u.l & 0xFFFFFFFFFFFFF; // (1<<52)-1; - uint32_t exponent = (int32_t)((u.l>>52) & 0x7FF); // [0,2047] + uint32_t exponent = (int32_t)((u.l >> 52) & 0x7FF); // [0,2047] // Now sum the appropriate powers 2^-(1:52) of the fraction // Important for accuracy to start with the smallest first; i.e. 2^-52 @@ -214,9 +214,9 @@ void writeFloat64(const void *col, int64_t row, char **pch) double acc = 0; // 'long double' not needed int i = 52; if (fraction) { - while ((fraction & 0xFF) == 0) { fraction >>= 8; i-=8; } + while ((fraction & 0xFF) == 0) { fraction >>= 8; i -= 8; } while (fraction) { - acc += sigparts[(((fraction & 1u)^1u)-1u) & i]; + acc += sigparts[(((fraction & 1u) ^ 1u) - 1u) & i]; i--; fraction >>= 1; } @@ -226,59 +226,59 @@ void writeFloat64(const void *col, int64_t row, char **pch) // Therefore y in range [1.5,20.0) // Avoids (potentially inaccurate and potentially slow) log10/log10l, pow/powl, ldexp/ldexpl // By design we can just lookup the power from the tables - double y = (1.0+acc) * expsig[exponent]; // low magnitude mult + double y = (1.0 + acc) * expsig[exponent]; // low magnitude mult int exp = exppow[exponent]; - if (y>=9.99999999999999) { y /= 10; exp++; } + if (y >= 9.99999999999999) { y /= 10; exp++; } uint64_t l = y * SIZE_SF; // low magnitude mult 10^NUM_SF // l now contains NUM_SF+1 digits as integer where repeated /10 below is accurate // if (verbose) Rprintf(_("\nTRACE: acc=%.20Le ; y=%.20Le ; l=%"PRIu64" ; e=%d "), acc, y, l, exp); - if (l%10 >= 5) l+=10; // use the last digit to round + if (l % 10 >= 5) l += 10; // use the last digit to round l /= 10; if (l == 0) { - if (*(ch-1)=='-') ch--; + if (*(ch - 1) == '-') ch--; *ch++ = '0'; } else { // Count trailing zeros and therefore s.f. present in l int trailZero = 0; - while (l%10 == 0) { l /= 10; trailZero++; } + while (l % 10 == 0) { l /= 10; trailZero++; } int sf = NUM_SF - trailZero; - if (sf==0) {sf=1; exp++;} // e.g. l was 9999999[5-9] rounded to 10000000 which added 1 digit + if (sf == 0) {sf = 1; exp++;} // e.g. l was 9999999[5-9] rounded to 10000000 which added 1 digit // l is now an unsigned long that doesn't start or end with 0 // sf is the number of digits now in l // exp is e were l to be written with the decimal sep after the first digit - int dr = sf-exp-1; // how many characters to print to the right of the decimal place - int width=0; // field width were it written decimal format. Used to decide whether to or not. - int dl0=0; // how many 0's to add to the left of the decimal place before starting l - if (dr<=0) { dl0 = -dr; dr=0; width=sf+dl0; } // 1, 10, 100, 99000 + int dr = sf - exp - 1; // how many characters to print to the right of the decimal place + int width = 0; // field width were it written decimal format. Used to decide whether to or not. + int dl0 = 0; // how many 0's to add to the left of the decimal place before starting l + if (dr <= 0) { dl0 = -dr; dr = 0; width = sf + dl0; } // 1, 10, 100, 99000 else { - if (sf>dr) width=sf+1; // 1.234 and 123.4 - else { dl0=1; width=dr+1+dl0; } // 0.1234, 0.0001234 + if (sf > dr) width = sf + 1; // 1.234 and 123.4 + else { dl0 = 1; width = dr + 1 + dl0; } // 0.1234, 0.0001234 } // So: 3.1416 => l=31416, sf=5, exp=0 dr=4; dl0=0; width=6 // 30460 => l=3046, sf=4, exp=4 dr=0; dl0=1; width=5 // 0.0072 => l=72, sf=2, exp=-3 dr=4; dl0=1; width=6 - if (width <= sf + (sf>1) + 2 + (abs(exp)>99?3:2) + scipen) { - // ^^^^ to not include 1 char for dec in -7e-04 where sf==1 - // ^ 2 for 'e+'/'e-' + if (width <= sf + (sf > 1) + 2 + (abs(exp) > 99 ? 3 : 2) + scipen) { + // ^^^^^^ to not include 1 char for dec in -7e-04 where sf==1 + // ^ 2 for 'e+'/'e-' // decimal format ... - ch += width-1; + ch += width - 1; if (dr) { - while (dr && sf) { *ch--='0'+l%10; l/=10; dr--; sf--; } - while (dr) { *ch--='0'; dr--; } + while (dr && sf) { *ch-- = '0' + l % 10; l /= 10; dr--; sf--; } + while (dr) { *ch-- = '0'; dr--; } *ch-- = dec; } - while (dl0) { *ch--='0'; dl0--; } - while (sf) { *ch--='0'+l%10; l/=10; sf--; } + while (dl0) { *ch-- = '0'; dl0--; } + while (sf) { *ch-- = '0' + l % 10; l /= 10; sf--; } // ch is now 1 before the first char of the field so position it afterward again, and done - ch += width+1; + ch += width + 1; } else { // scientific ... ch += sf; // sf-1 + 1 for dec - for (int i=sf; i>1; i--) { - *ch-- = '0' + l%10; + for (int i = sf; i > 1; i--) { + *ch-- = '0' + l % 10; l /= 10; } if (sf == 1) ch--; else *ch-- = dec; @@ -303,7 +303,7 @@ void writeFloat64(const void *col, int64_t row, char **pch) void writeComplex(const void *col, int64_t row, char **pch) { - Rcomplex x = ((const Rcomplex *)col)[row]; + Rcomplex x = ((const Rcomplex*)col)[row]; char *ch = *pch; writeFloat64(&x.r, 0, &ch); if (!ISNAN(x.i)) { @@ -316,32 +316,32 @@ void writeComplex(const void *col, int64_t row, char **pch) // DATE/TIME -static inline void write_time(int32_t x, char **pch) // just a helper called below by the real writers (time-only and datetime) +static inline void write_time(int32_t x, char **pch) { char *ch = *pch; - if (x<0) { // <0 covers NA_INTEGER too (==INT_MIN checked in init.c) + if (x < 0) { // <0 covers NA_INTEGER too (==INT_MIN checked in init.c) write_chars(na, &ch); } else { - int hh = x/3600; - int mm = (x - hh*3600) / 60; - int ss = x%60; - *ch++ = '0'+hh/10; - *ch++ = '0'+hh%10; + int hh = x / 3600; + int mm = (x - hh * 3600) / 60; + int ss = x % 60; + *ch++ = '0' + hh / 10; + *ch++ = '0' + hh % 10; *ch++ = ':'; ch -= squashDateTime; - *ch++ = '0'+mm/10; - *ch++ = '0'+mm%10; + *ch++ = '0' + mm / 10; + *ch++ = '0' + mm % 10; *ch++ = ':'; ch -= squashDateTime; - *ch++ = '0'+ss/10; - *ch++ = '0'+ss%10; + *ch++ = '0' + ss / 10; + *ch++ = '0' + ss % 10; } *pch = ch; } void writeITime(const void *col, int64_t row, char **pch) { - write_time(((const int32_t *)col)[row], pch); + write_time(((const int32_t*)col)[row], pch); } static inline void write_date(int32_t x, char **pch) @@ -366,40 +366,40 @@ static inline void write_date(int32_t x, char **pch) // as.integer(as.Date(c("0000-03-01","9999-12-31"))) == c(-719468,+2932896) char *ch = *pch; - if (x< -719468 || x>2932896) { + if (x < -719468 || x > 2932896) { // NA_INTEGER<(-719468) (==INT_MIN checked in init.c) write_chars(na, &ch); } else { x += 719468; // convert days from 1970-01-01 to days from 0000-03-01 (the day after 29 Feb 0000) - int y = (x - x/1461 + x/36525 - x/146097) / 365; // year of the preceding March 1st - int z = x - y*365 - y/4 + y/100 - y/400 + 1; // days from March 1st in year y + int y = (x - x / 1461 + x / 36525 - x / 146097) / 365; // year of the preceding March 1st + int z = x - y * 365 - y / 4 + y / 100 - y / 400 + 1; // days from March 1st in year y int md = monthday[z]; // See fwriteLookups.h for how the 366 item lookup 'monthday' is arranged - y += z && (md/100)<3; // The +1 above turned z=-1 to 0 (meaning Feb29 of year y not Jan or Feb of y+1) + y += z && (md / 100) < 3; // The +1 above turned z=-1 to 0 (meaning Feb29 of year y not Jan or Feb of y+1) - ch += 7 + 2*!squashDateTime; - *ch-- = '0'+md%10; md/=10; - *ch-- = '0'+md%10; md/=10; + ch += 7 + 2 * !squashDateTime; + *ch-- = '0' + md % 10; md /= 10; + *ch-- = '0' + md % 10; md /= 10; *ch-- = '-'; ch += squashDateTime; - *ch-- = '0'+md%10; md/=10; - *ch-- = '0'+md%10; md/=10; + *ch-- = '0' + md % 10; md /= 10; + *ch-- = '0' + md % 10; md /= 10; *ch-- = '-'; ch += squashDateTime; - *ch-- = '0'+y%10; y/=10; - *ch-- = '0'+y%10; y/=10; - *ch-- = '0'+y%10; y/=10; - *ch = '0'+y%10; y/=10; - ch += 8 + 2*!squashDateTime; + *ch-- = '0' + y % 10; y /= 10; + *ch-- = '0' + y % 10; y /= 10; + *ch-- = '0' + y % 10; y /= 10; + *ch = '0' + y % 10; y /= 10; + ch += 8 + 2 * !squashDateTime; } *pch = ch; } void writeDateInt32(const void *col, int64_t row, char **pch) { - write_date(((const int32_t *)col)[row], pch); + write_date(((const int32_t*)col)[row], pch); } void writeDateFloat64(const void *col, int64_t row, char **pch) { - double x = ((const double *)col)[row]; + double x = ((const double*)col)[row]; write_date(isfinite(x) ? (int)(x) : INT32_MIN, pch); } @@ -412,51 +412,51 @@ void writePOSIXct(const void *col, int64_t row, char **pch) // All positive integers up to 2^53 (9e15) are exactly representable by double which is relied // on in the ops here; number of seconds since epoch. - double x = ((const double *)col)[row]; + double x = ((const double*)col)[row]; char *ch = *pch; if (!isfinite(x)) { write_chars(na, &ch); } else { int64_t xi, d, t; xi = floor(x); - int m = ((x-xi)*10000000); // 7th digit used to round up if 9 - m += (m%10); // 9 is numerical accuracy, 8 or less then we truncate to last microsecond + int m = ((x - xi) * 10000000); // 7th digit used to round up if 9 + m += (m % 10); // 9 is numerical accuracy, 8 or less then we truncate to last microsecond m /= 10; int carry = m / 1000000; // Need to know if we rounded up to a whole second m -= carry * 1000000; xi += carry; - if (xi>=0) { + if (xi >= 0) { d = xi / 86400; t = xi % 86400; } else { // before 1970-01-01T00:00:00Z - d = (xi+1)/86400 - 1; - t = xi - d*86400; // xi and d are both negative here; t becomes the positive number of seconds into the day + d = (xi + 1) / 86400 - 1; + t = xi - d * 86400; // xi and d are both negative here; t becomes the positive number of seconds into the day } write_date(d, &ch); *ch++ = 'T'; ch -= squashDateTime; write_time(t, &ch); - if (squashDateTime || (m && m%1000==0)) { + if (squashDateTime || (m && m % 1000 == 0)) { // when squashDateTime always write 3 digits of milliseconds even if 000, for consistent scale of squash integer64 // don't use writeInteger() because it doesn't 0 pad which we need here // integer64 is big enough for squash with milli but not micro; trunc (not round) micro when squash m /= 1000; *ch++ = dec; ch -= squashDateTime; - *(ch+2) = '0'+m%10; m/=10; - *(ch+1) = '0'+m%10; m/=10; - *ch = '0'+m; + *(ch + 2) = '0' + m % 10; m /= 10; + *(ch + 1) = '0' + m % 10; m /= 10; + *(ch + 0) = '0' + m; ch += 3; } else if (m) { // microseconds are present and !squashDateTime *ch++ = dec; - *(ch+5) = '0'+m%10; m/=10; - *(ch+4) = '0'+m%10; m/=10; - *(ch+3) = '0'+m%10; m/=10; - *(ch+2) = '0'+m%10; m/=10; - *(ch+1) = '0'+m%10; m/=10; - *ch = '0'+m; + *(ch + 5) = '0' + m % 10; m /= 10; + *(ch + 4) = '0' + m % 10; m /= 10; + *(ch + 3) = '0' + m % 10; m /= 10; + *(ch + 2) = '0' + m % 10; m /= 10; + *(ch + 1) = '0' + m % 10; m /= 10; + *(ch + 0) = '0' + m; ch += 6; } *ch++ = 'Z'; @@ -468,7 +468,7 @@ void writePOSIXct(const void *col, int64_t row, char **pch) // # nocov start. Covered in other.Rraw test 22, not the main suite. void writeNanotime(const void *col, int64_t row, char **pch) { - int64_t x = ((const int64_t *)col)[row]; + int64_t x = ((const int64_t*)col)[row]; char *ch = *pch; if (x == INT64_MIN) { write_chars(na, &ch); @@ -476,14 +476,14 @@ void writeNanotime(const void *col, int64_t row, char **pch) int d/*days*/, s/*secs*/, n/*nanos*/; n = x % 1000000000; x /= 1000000000; - if (x>=0 && n>=0) { + if (x >= 0 && n >= 0) { d = x / 86400; s = x % 86400; } else { // before 1970-01-01T00:00:00.000000000Z if (n) { x--; n += 1000000000; } - d = (x+1)/86400 - 1; - s = x - d*86400; // x and d are both negative here; secs becomes the positive number of seconds into the day + d = (x + 1)/86400 - 1; + s = x - d * 86400; // x and d are both negative here; secs becomes the positive number of seconds into the day } write_date(d, &ch); *ch++ = 'T'; @@ -491,7 +491,7 @@ void writeNanotime(const void *col, int64_t row, char **pch) write_time(s, &ch); *ch++ = dec; ch -= squashDateTime; - for (int i=8; i>=0; i--) { *(ch+i) = '0'+n%10; n/=10; } // always 9 digits for nanoseconds + for (int i = 8; i >= 0; i--) { *(ch + i) = '0' + n % 10; n /= 10; } // always 9 digits for nanoseconds ch += 9; *ch++ = 'Z'; ch -= squashDateTime; @@ -508,18 +508,18 @@ static inline void write_string(const char *x, char **pch) write_chars(na, &ch); } else { int8_t q = doQuote; - if (q==INT8_MIN) { // NA means quote="auto" + if (q == INT8_MIN) { // NA means quote="auto" const char *tt = x; - if (*tt=='\0') { + if (*tt == '\0') { // Empty strings are always quoted to distinguish from ,,==NA - *ch++='"'; *ch++='"'; // test 1732.7 covers this (confirmed in gdb) so it's unknown why codecov claims no coverage + *ch++ = '"'; *ch++ = '"'; // test 1732.7 covers this (confirmed in gdb) so it's unknown why codecov claims no coverage *pch = ch; return; } - while (*tt!='\0' && *tt!=sep && *tt!=sep2 && *tt!='\n' && *tt!='\r' && *tt!='"') *ch++ = *tt++; + while (*tt != '\0' && *tt != sep && *tt != sep2 && *tt != '\n' && *tt != '\r' && *tt != '"') *ch++ = *tt++; // Windows includes \n in its \r\n so looking for \n only is sufficient // sep2 is set to '\0' when no list columns are present - if (*tt=='\0') { + if (*tt == '\0') { // most common case: no sep, newline or " contained in string *pch = ch; // advance caller over the field already written return; @@ -527,20 +527,20 @@ static inline void write_string(const char *x, char **pch) ch = *pch; // rewind the field written since it needs to be quoted q = true; } - if (q==false) { + if (q == false) { write_chars(x, &ch); } else { *ch++ = '"'; const char *tt = x; if (qmethodEscape) { - while (*tt!='\0') { - if (*tt=='"' || *tt=='\\') *ch++ = '\\'; + while (*tt != '\0') { + if (*tt == '"' || *tt == '\\') *ch++ = '\\'; *ch++ = *tt++; } } else { // qmethod='double' - while (*tt!='\0') { - if (*tt=='"') *ch++ = '"'; + while (*tt != '\0') { + if (*tt == '"') *ch++ = '"'; *ch++ = *tt++; } } @@ -552,12 +552,12 @@ static inline void write_string(const char *x, char **pch) void writeString(const void *col, int64_t row, char **pch) { - write_string(getString((const SEXP *)col, row), pch); + write_string(getString((const SEXP*)col, row), pch); } void writeCategString(const void *col, int64_t row, char **pch) { - write_string(getCategString((const SEXP *)col, row), pch); + write_string(getCategString((const SEXP*)col, row), pch); } #ifndef NOZLIB @@ -581,12 +581,12 @@ int compressbuff(z_stream *stream, void* dest, size_t *destLen, const void* sour { stream->next_out = dest; stream->avail_out = *destLen; - stream->next_in = (Bytef *)source; // don't use z_const anywhere; #3939 + stream->next_in = (Bytef*)source; // don't use z_const anywhere; #3939 stream->avail_in = sourceLen; int err = deflate(stream, Z_SYNC_FLUSH); - *destLen = *destLen - stream->avail_out; + *destLen -= stream->avail_out; // *destLen = stream->total_out; - return (err != Z_STREAM_ERROR) ? Z_OK : err; + return err != Z_STREAM_ERROR ? Z_OK : err; } #endif @@ -603,11 +603,11 @@ OpenMP is used here primarily to parallelize the process of writing rows void fwriteMain(fwriteMainArgs args) { double startTime = wallclock(); - double nextTime = startTime+2; // start printing progress meter in 2 sec if not completed by then + double nextTime = startTime + 2; // start printing progress meter in 2 sec if not completed by then na = args.na; sep = args.sep; - sepLen = sep=='\0' ? 0 : 1; + sepLen = sep == '\0' ? 0 : 1; sep2 = args.sep2; dec = args.dec; scipen = args.scipen; @@ -627,13 +627,13 @@ void fwriteMain(fwriteMainArgs args) // When NA is a non-empty string, then we must quote all string fields in case they contain the na string // na is recommended to be empty, though - if (na[0]!='\0' && doQuote==INT8_MIN) + if (na[0] != '\0' && doQuote == INT8_MIN) doQuote = true; qmethodEscape = args.qmethodEscape; squashDateTime = args.squashDateTime; - int eolLen=strlen(args.eol), naLen=strlen(args.na); + int eolLen = strlen(args.eol), naLen = strlen(args.na); // Aside: codacy wants strnlen but strnlen is not in C99 (neither is strlen_s). To pass `gcc -std=c99 -Wall -pedantic` // we'd need `#define _POSIX_C_SOURCE 200809L` before #include but that seems a step too far // and platform specific. We prefer to be pure C99. @@ -643,12 +643,12 @@ void fwriteMain(fwriteMainArgs args) if (verbose) { DTPRINT(_("Column writers: ")); // # notranslate start - if (args.ncol<=50) { - for (int j=0; j> column name) } @@ -860,9 +860,9 @@ void fwriteMain(fwriteMainArgs args) char *buff = buffPool; char *ch = buff; if (args.bom) { - *ch++=(char)0xEF; - *ch++=(char)0xBB; - *ch++=(char)0xBF; + *ch++ = (char)0xEF; + *ch++ = (char)0xBB; + *ch++ = (char)0xBF; } // 3 appears above (search for "bom") memcpy(ch, args.yaml, yamlLen); ch += yamlLen; @@ -871,15 +871,15 @@ void fwriteMain(fwriteMainArgs args) // Unusual: the extra blank column name when row_names are added as the first column if (doQuote !=0) { // to match write.csv - *ch++='"'; - *ch++='"'; + *ch++ = '"'; + *ch++ = '"'; } *ch = sep; ch += sepLen; } int8_t tempDoQuote = doQuote; doQuote = quoteHeaders; // temporary overwrite since headers might get different quoting behavior, #2964 - for (int j=0; j < args.ncol; j++) { + for (int j = 0; j < args.ncol; j++) { writeString(args.colNames, j, &ch); *ch = sep; ch += sepLen; @@ -892,7 +892,7 @@ void fwriteMain(fwriteMainArgs args) *ch = '\0'; DTPRINT("%s", buff); // # notranslate } else { - int ret1=0, ret2=0; + int ret1 = 0, ret2 = 0; #ifndef NOZLIB if (args.is_gzip) { char* zbuff = zbuffPool; @@ -902,13 +902,13 @@ void fwriteMain(fwriteMainArgs args) crc = crc32(crc, (unsigned char*)buff, len); ret1 = compressbuff(&strm, zbuff, &zbuffUsed, buff, len); deflateEnd(&strm); - if (ret1==Z_OK) { + if (ret1 == Z_OK) { ret2 = WRITE(f, zbuff, (int)zbuffUsed); compress_len += zbuffUsed; } } else { #endif - ret2 = WRITE(f, buff, (int)(ch-buff)); + ret2 = WRITE(f, buff, (int)(ch - buff)); #ifndef NOZLIB } #endif @@ -933,7 +933,7 @@ void fwriteMain(fwriteMainArgs args) } #endif if (verbose) - DTPRINT(_("Initialization done in %.3fs\n"), 1.0*(wallclock()-t0)); + DTPRINT(_("Initialization done in %.3fs\n"), 1.0 * (wallclock() - t0)); // empty file is test in fwrite.R if (args.nrow == 0) { @@ -961,7 +961,7 @@ void fwriteMain(fwriteMainArgs args) // main parallel loop ---- #pragma omp parallel for ordered num_threads(nth) schedule(dynamic) - for(int64_t start=0; start < args.nrow; start += rowsPerBatch) { + for(int64_t start = 0; start < args.nrow; start += rowsPerBatch) { int me = omp_get_thread_num(); int my_failed_compress = 0; char* myBuff = buffPool + me * buffSize; @@ -989,25 +989,25 @@ void fwriteMain(fwriteMainArgs args) for (int64_t i = start; i < end; i++) { // Tepid starts here (once at beginning of each line) if (args.doRowNames) { - if (args.rowNames==NULL) { - if (doQuote==1) - *ch++='"'; - int64_t rn = i+1; + if (args.rowNames == NULL) { + if (doQuote == 1) + *ch++ = '"'; + int64_t rn = i + 1; writeInt64(&rn, 0, &ch); - if (doQuote==1) - *ch++='"'; + if (doQuote == 1) + *ch++ = '"'; } else { - if (args.rowNameFun != WF_String && doQuote==1) - *ch++='"'; + if (args.rowNameFun != WF_String && doQuote == 1) + *ch++ = '"'; (args.funs[args.rowNameFun])(args.rowNames, i, &ch); // #5098 - if (args.rowNameFun != WF_String && doQuote==1) - *ch++='"'; + if (args.rowNameFun != WF_String && doQuote == 1) + *ch++ = '"'; } *ch = sep; ch += sepLen; } // Hot loop - for (int j=0; j maxBuffUsedPC) maxBuffUsedPC = used; double now; - if (me == 0 && !failed && args.showProgress && (now=wallclock()) >= nextTime) { + if (me == 0 && !failed && args.showProgress && (now = wallclock()) >= nextTime) { // See comments above inside the f==-1 clause. // Not only is this ordered section one-at-a-time but we'll also Rprintf() here only from the // master thread (me==0) and hopefully this will work on Windows. If not, user should set // showProgress=FALSE until this can be fixed or removed. - int ETA = (int)((args.nrow - end) * (now-startTime) /end); + int ETA = (int)((args.nrow - end) * (now - startTime) / end); if (hasPrinted || ETA >= 2) { // # nocov start if (verbose && !hasPrinted) DTPRINT("\n"); // # notranslate DTPRINT(Pl_(nth, "\rWritten %.1f%% of %"PRId64" rows in %d secs using %d thread. maxBuffUsed=%d%%. ETA %d secs. ", "\rWritten %.1f%% of %"PRId64" rows in %d secs using %d threads. maxBuffUsed=%d%%. ETA %d secs. "), - (100.0*end)/args.nrow, args.nrow, (int)(now-startTime), nth, maxBuffUsedPC, ETA); // # nocov + (100.0 * end) / args.nrow, args.nrow, (int)(now - startTime), nth, maxBuffUsedPC, ETA); // # nocov // TODO: use progress() as in fread nextTime = now + 1; hasPrinted = true; @@ -1100,7 +1100,7 @@ void fwriteMain(fwriteMainArgs args) free(zbuffPool); /* put a 4-byte integer into a byte array in LSB order */ -#define PUT4(a,b) ((a)[0]=(b), (a)[1]=(b)>>8, (a)[2]=(b)>>16, (a)[3]=(b)>>24) +#define PUT4(a,b) ((a)[0] = (b), (a)[1] = (b) >> 8, (a)[2] = (b) >> 16, (a)[3] = (b) >> 24) // write gzip tailer with crc and len if (args.is_gzip) { @@ -1139,7 +1139,7 @@ void fwriteMain(fwriteMainArgs args) "Wrote %"PRId64" rows in %.3f secs using %d thread. MaxBuffUsed=%d%%\n"), Pl_(args.nrow, "Wrote %"PRId64" row in %.3f secs using %d threads. MaxBuffUsed=%d%%\n", "Wrote %"PRId64" rows in %.3f secs using %d threads. MaxBuffUsed=%d%%\n")), - args.nrow, 1.0*(wallclock()-t0), nth, maxBuffUsedPC); + args.nrow, 1.0 * (wallclock() - t0), nth, maxBuffUsedPC); } if (f != -1 && CLOSE(f) && !failed) diff --git a/src/fwriteR.c b/src/fwriteR.c index 3de74e6da2..60b011aa99 100644 --- a/src/fwriteR.c +++ b/src/fwriteR.c @@ -5,15 +5,15 @@ #define DATETIMEAS_EPOCH 2 #define DATETIMEAS_WRITECSV 3 -static bool utf8=false; -static bool native=false; +static bool utf8 = false; +static bool native = false; #define TO_UTF8(s) (utf8 && NEED2UTF8(s)) -#define TO_NATIVE(s) (native && (s)!=NA_STRING && !IS_ASCII(s)) +#define TO_NATIVE(s) (native && (s) != NA_STRING && !IS_ASCII(s)) #define ENCODED_CHAR(s) (TO_UTF8(s) ? translateCharUTF8(s) : (TO_NATIVE(s) ? translateChar(s) : CHAR(s))) static char sep2; // '\0' if there are no list columns. Otherwise, the within-column separator. -static bool logical01=true; // should logicals be written as 0|1 or true|false. Needed by list column writer too in case a cell is a logical vector. -static int dateTimeAs=0; // 0=ISO(yyyy-mm-dd), 1=squash(yyyymmdd), 2=epoch, 3=write.csv +static bool logical01 = true; // should logicals be written as 0|1 or true|false. Needed by list column writer too in case a cell is a logical vector. +static int dateTimeAs = 0; // 0=ISO(yyyy-mm-dd), 1=squash(yyyymmdd), 2=epoch, 3=write.csv static const char *sep2start, *sep2end; // sep2 is in main fwrite.c so that writeString can quote other fields if sep2 is present in them // if there are no list columns, set sep2=='\0' @@ -21,7 +21,7 @@ static const char *sep2start, *sep2end; const char *getString(const SEXP *col, int64_t row) { // TODO: inline for use in fwrite.c SEXP x = col[row]; - return x==NA_STRING ? NULL : ENCODED_CHAR(x); + return x == NA_STRING ? NULL : ENCODED_CHAR(x); } int getStringLen(SEXP *col, int64_t row) { @@ -29,13 +29,13 @@ int getStringLen(SEXP *col, int64_t row) { } int getMaxStringLen(const SEXP *col, const int64_t n) { - int max=0; - SEXP last=NULL; - for (int64_t i=0; imax) max=thisnchar; + if (thisnchar > max) max = thisnchar; last = this; } return max; @@ -44,13 +44,13 @@ int getMaxStringLen(const SEXP *col, const int64_t n) { int getMaxCategLen(SEXP col) { col = getAttrib(col, R_LevelsSymbol); if (!isString(col)) internal_error(__func__, "col passed to getMaxCategLen is missing levels"); - return getMaxStringLen( STRING_PTR_RO(col), LENGTH(col) ); + return getMaxStringLen(STRING_PTR_RO(col), LENGTH(col)); } const char *getCategString(SEXP col, int64_t row) { // the only writer that needs to have the header of the SEXP column, to get to the levels int x = INTEGER(col)[row]; - return x==NA_INTEGER ? NULL : ENCODED_CHAR(STRING_ELT(getAttrib(col, R_LevelsSymbol), x-1)); + return x == NA_INTEGER ? NULL : ENCODED_CHAR(STRING_ELT(getAttrib(col, R_LevelsSymbol), x - 1)); } writer_fun_t *funs[] = { @@ -74,16 +74,16 @@ writer_fun_t *funs[] = { static int32_t whichWriter(SEXP); void writeList(const void *col, int64_t row, char **pch) { - SEXP v = ((const SEXP *)col)[row]; + SEXP v = ((const SEXP*)col)[row]; int32_t wf = whichWriter(v); - if (TYPEOF(v)==VECSXP || wf==INT32_MIN || isFactor(v)) { + if (TYPEOF(v) == VECSXP || wf == INT32_MIN || isFactor(v)) { internal_error(__func__, "TYPEOF(v)!=VECSXP && wf!=INT32_MIN && !isFactor(v); getMaxListItem should have caught this up front"); // # nocov } char *ch = *pch; write_chars(sep2start, &ch); const void *data = DATAPTR_RO(v); writer_fun_t *fun = funs[wf]; - for (int j=0; jmax) max=width; + if (width > max) max = width; last = this; } return max; @@ -124,17 +124,18 @@ static int32_t whichWriter(SEXP column) { case LGLSXP: return logical01 ? WF_Bool32 : WF_Bool32AsString; case INTSXP: - if (isFactor(column)) return WF_CategString; - if (dateTimeAs==DATETIMEAS_EPOCH) return WF_Int32; - if (INHERITS(column, char_ITime)) return WF_ITime; - if (INHERITS(column, char_Date)) return WF_DateInt32; + if (isFactor(column)) return WF_CategString; + if (dateTimeAs == DATETIMEAS_EPOCH) return WF_Int32; + if (INHERITS(column, char_ITime)) return WF_ITime; + if (INHERITS(column, char_Date)) return WF_DateInt32; return WF_Int32; case REALSXP: - if (INHERITS(column, char_nanotime) && dateTimeAs!=DATETIMEAS_EPOCH) return WF_Nanotime; - if (INHERITS(column, char_integer64))return WF_Int64; - if (dateTimeAs==DATETIMEAS_EPOCH) return WF_Float64; - if (INHERITS(column, char_Date)) return WF_DateFloat64; - if (INHERITS(column, char_POSIXct)) return WF_POSIXct; + if (INHERITS(column, char_nanotime) + && dateTimeAs != DATETIMEAS_EPOCH) return WF_Nanotime; + if (INHERITS(column, char_integer64)) return WF_Int64; + if (dateTimeAs == DATETIMEAS_EPOCH) return WF_Float64; + if (INHERITS(column, char_Date)) return WF_DateFloat64; + if (INHERITS(column, char_POSIXct)) return WF_POSIXct; return WF_Float64; case CPLXSXP: return WF_Complex; @@ -176,7 +177,7 @@ SEXP fwriteR( { if (!isNewList(DF)) error(_("fwrite must be passed an object of type list; e.g. data.frame, data.table")); - fwriteMainArgs args = {0}; // {0} to quieten valgrind's uninitialized, #4639 + fwriteMainArgs args = { 0 }; // { 0 } to quieten valgrind's uninitialized, #4639 args.is_gzip = LOGICAL(is_gzip_Arg)[0]; args.gzip_level = INTEGER(gzip_level_Arg)[0]; args.bom = LOGICAL(bom_Arg)[0]; @@ -184,7 +185,7 @@ SEXP fwriteR( args.verbose = LOGICAL(verbose_Arg)[0]; args.filename = CHAR(STRING_ELT(filename_Arg, 0)); args.ncol = length(DF); - if (args.ncol==0) { + if (args.ncol == 0) { warning(_("fwrite was passed an empty list of no columns. Nothing to write.")); return R_NilValue; } @@ -194,9 +195,9 @@ SEXP fwriteR( int protecti = 0; dateTimeAs = INTEGER(dateTimeAs_Arg)[0]; if (dateTimeAs == DATETIMEAS_WRITECSV) { - int j=0; - while(j Date: Wed, 9 Jul 2025 09:55:44 +0530 Subject: [PATCH 23/38] remove duplicate --- R/as.data.table.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index 29ccb163c2..18caea82db 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -171,7 +171,6 @@ as.data.table.list = function(x, xi = x[[i]] = c(xi) } else { xi = x[[i]] = as.data.table(xi, keep.rownames=FALSE) # we will never allow a matrix to be a column; always unpack the columns - xi = x[[i]] = as.data.table(xi, keep.rownames=FALSE) # we will never allow a matrix to be a column; always unpack the columns } } # else avoid dispatching to as.data.table.data.table (which exists and copies) From 7fe757d66f6388aed0a7bb3ba731f4351642f196 Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Wed, 9 Jul 2025 10:02:44 +0530 Subject: [PATCH 24/38] remove list from as.data.table --- inst/tests/tests.Rraw | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3729493886..b49608ecd6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21410,18 +21410,18 @@ test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN\n") # Row name extraction from multiple vectors, #7136 x <- c(1, 2, 3) y <- setNames(c(4, 5, 6), c("A", "B", "C")) -test(2330.1, as.data.table.list(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) -test(2330.2, as.data.table.list(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) -test(2330.3, as.data.table.list(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(4, 5, 6), V2=c(1, 2, 3))) +test(2330.1, as.data.table(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) +test(2330.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) +test(2330.3, as.data.table(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(4, 5, 6), V2=c(1, 2, 3))) # Behavior under data.frame() test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(4, 5, 6), V2=c(1, 2, 3))) a <- setNames(c(7, 8, 9), c("", "", "")) # test condition about any(nzchar(valid_names)) -test(2330.6, as.data.table.list(list(a), keep.rownames=TRUE), data.table(V1=c(7, 8, 9))) +test(2330.6, as.data.table(list(a), keep.rownames=TRUE), data.table(V1=c(7, 8, 9))) b <- setNames(c(10, 11, 12), c("", "B", "")) -test(2330.7, as.data.table.list(list(b), keep.rownames=TRUE), data.table(rn=c("", "B", ""), V1=c(10, 11, 12))) +test(2330.7, as.data.table(list(b), keep.rownames=TRUE), data.table(rn=c("", "B", ""), V1=c(10, 11, 12))) DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames test(2330.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=c("a", "b", "c", "d", "e", "f"), a=c(6, 5, 4, 3, 2, 1), V=c(1, 2, 3, 4, 5, 6))) From 555fcc7b9d3528d8248cb2ec847ba1a2b2d0184e Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Wed, 9 Jul 2025 18:27:05 +0530 Subject: [PATCH 25/38] added vignettes --- vignettes/datatable-programming.Rmd | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/vignettes/datatable-programming.Rmd b/vignettes/datatable-programming.Rmd index 34c6d77fda..833dedf6cc 100644 --- a/vignettes/datatable-programming.Rmd +++ b/vignettes/datatable-programming.Rmd @@ -456,6 +456,50 @@ DT[, eval(cl)] DT[, cl, env = list(cl = cl)] ``` +## Error handling with classed conditions + +Starting from version 1.17.0, `data.table` provides specific error classes for common operations, making it easier to handle errors programmatically. This is particularly useful when writing robust code or packages that use `data.table`. + +### Available error classes + +`data.table` now provides four specific error classes: + +- `dt_missing_column_error`: When referencing columns that don't exist +- `dt_invalid_input_error`: When providing invalid input types or empty required arguments +- `dt_unsortable_type_error`: When trying to sort/key unsupported types +- `dt_join_type_mismatch_error`: When column types are incompatible in joins/set operations + +### Usage examples + +```{r error_handling, error=TRUE} +DT <- data.table(a = 1:3, b = 4:6) + +# Handle missing column errors specifically +tryCatch({ + setkey(DT, "nonexistent_col") +}, dt_missing_column_error = function(e) { + cat("Missing column detected:", conditionMessage(e), "\n") +}, error = function(e) { + cat("Other error:", conditionMessage(e), "\n") +}) + +# Handle type mismatches in operations +DT1 <- data.table(id = 1:3, value = c("a", "b", "c")) +DT2 <- data.table(id = 1:3, value = 1:3) + +tryCatch({ + fintersect(DT1, DT2) +}, dt_join_type_mismatch_error = function(e) { + cat("Type mismatch in join:", conditionMessage(e), "\n") +}, error = function(e) { + cat("Other error:", conditionMessage(e), "\n") +}) +``` + +### Backward compatibility + +All error classes inherit from base R's condition system, so existing `tryCatch(..., error = ...)` code continues to work unchanged. The new classes simply provide more specific handling options when needed. + ```{r cleanup, echo=FALSE} options(.opts) registerS3method("print", "data.frame", base::print.data.frame) From 1bb1e460c14d14b85598e4ce2ddfe5180207d1a5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 9 Jul 2025 09:46:29 -0700 Subject: [PATCH 26/38] rm ws --- R/as.data.table.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index 18caea82db..dd69a11d40 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -140,14 +140,14 @@ as.data.table.list = function(x, # Handle keep.rownames for vectors (mimicking data.frame behavior) vector_rownames = NULL check_rownames = !isFALSE(keep.rownames) - + for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above if (check_rownames && is.null(vector_rownames)) { # Check for named vectors if (is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi))) { - valid_names = names(xi) + valid_names = names(xi) if (any(nzchar(valid_names))) { vector_rownames = valid_names x[[i]] = unname(xi) @@ -160,7 +160,7 @@ as.data.table.list = function(x, vector_rownames = valid_names } } - } + } if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE if ("POSIXlt" %chin% class(xi)) { warningf("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.") From 8956d2eac3b92eb4a78f28fcde9006b88add71ed Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 9 Jul 2025 09:54:37 -0700 Subject: [PATCH 27/38] try to simplify --- R/as.data.table.R | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index dd69a11d40..a2a9f3de6b 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -144,20 +144,15 @@ as.data.table.list = function(x, for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above - if (check_rownames && is.null(vector_rownames)) { - # Check for named vectors - if (is.atomic(xi) && !is.null(names(xi)) && is.null(dim(xi))) { - valid_names = names(xi) - if (any(nzchar(valid_names))) { - vector_rownames = valid_names + if (check_rownames && is.null(vector_rownames) && is.atomic(xi)) { + if (is.null(dim(xi))) { + if (!is.null(nm <- names(xi)) && any(nzchar(nm))) { + vector_rownames = nm x[[i]] = unname(xi) } - } - # Check for data.frames or matrices with explicit rownames - else if (!is.null(dim(xi)) && !is.null(rownames(xi))) { - valid_names = rownames(xi) - if (any(nzchar(valid_names))) { - vector_rownames = valid_names + } else { + if (!is.null(nm <- rownames(xi)) && any(nzchar(nm))) { + vector_rownames = nm } } } From ed472a212afbc7664d00410930ccde896d073ff3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 9 Jul 2025 09:57:54 -0700 Subject: [PATCH 28/38] fix tests --- inst/tests/tests.Rraw | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b49608ecd6..fd6506ccf3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21415,8 +21415,8 @@ test(2330.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custo test(2330.3, as.data.table(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(4, 5, 6), V2=c(1, 2, 3))) # Behavior under data.frame() -test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) -test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(4, 5, 6), V2=c(1, 2, 3))) +test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), x=c(1, 2, 3), y=c(4, 5, 6))) +test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), y=c(4, 5, 6), x=c(1, 2, 3))) a <- setNames(c(7, 8, 9), c("", "", "")) # test condition about any(nzchar(valid_names)) test(2330.6, as.data.table(list(a), keep.rownames=TRUE), data.table(V1=c(7, 8, 9))) @@ -21424,4 +21424,4 @@ b <- setNames(c(10, 11, 12), c("", "B", "")) test(2330.7, as.data.table(list(b), keep.rownames=TRUE), data.table(rn=c("", "B", ""), V1=c(10, 11, 12))) DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames -test(2330.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=c("a", "b", "c", "d", "e", "f"), a=c(6, 5, 4, 3, 2, 1), V=c(1, 2, 3, 4, 5, 6))) +test(2330.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=c("a", "b", "c", "d", "e", "f"), a=c(6, 5, 4, 3, 2, 1), V=1:6)) From eb504091f114089da92cc32c948f181ba6713bb8 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 9 Jul 2025 10:05:44 -0700 Subject: [PATCH 29/38] restore --- R/as.data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index a2a9f3de6b..03f423c5d5 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -165,7 +165,7 @@ as.data.table.list = function(x, if (is.matrix(xi) && NCOL(xi)<=1L && is.null(colnames(xi))) { # 1 column matrix naming #4124 xi = x[[i]] = c(xi) } else { - xi = x[[i]] = as.data.table(xi, keep.rownames=FALSE) # we will never allow a matrix to be a column; always unpack the columns + xi = x[[i]] = as.data.table(xi, keep.rownames=keep.rownames) # we will never allow a matrix to be a column; always unpack the columns } } # else avoid dispatching to as.data.table.data.table (which exists and copies) From 7a79a1878286f8057d8abaa81efc3f23dedbe0bf Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 9 Jul 2025 10:23:20 -0700 Subject: [PATCH 30/38] try and handle "inner" row names from matrix case --- R/as.data.table.R | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index 03f423c5d5..b08e6c22ea 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -138,21 +138,21 @@ as.data.table.list = function(x, empty_atomic = FALSE # Handle keep.rownames for vectors (mimicking data.frame behavior) - vector_rownames = NULL + rownames_ = NULL check_rownames = !isFALSE(keep.rownames) for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above - if (check_rownames && is.null(vector_rownames) && is.atomic(xi)) { + if (check_rownames && is.null(rownames_)) { if (is.null(dim(xi))) { if (!is.null(nm <- names(xi)) && any(nzchar(nm))) { - vector_rownames = nm + rn = nm x[[i]] = unname(xi) } } else { if (!is.null(nm <- rownames(xi)) && any(nzchar(nm))) { - vector_rownames = nm + rownames_ = nm } } } @@ -222,13 +222,22 @@ as.data.table.list = function(x, if (check.names) vnames = make.names(vnames, unique=TRUE) # Add rownames column when vector names were found - if (!is.null(vector_rownames)) { + if (!is.null(rownames_)) { rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn" - ans = c(list(recycle(vector_rownames, nrow)), ans) + ans = c(list(recycle(rownames_, nrow)), ans) vnames = c(rn_name, vnames) + } else if (check_rownames) { + # case like data.table(a = 1, data.frame(b = 2, row.names='c')) where expanding the inner DF picks up the row names -- + # we want to bump the resulting column to the front of the output + rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn" + if (!is.na(idx <- chmatch(rn_name, names(ans))[1L]) && idx != 1L) { + ans = c(ans[[idx]], ans[-idx]) + vnames = c(vnames[idx], vnames[-idx]) + } } setattr(ans, "names", vnames) setDT(ans, key=key) # copy ensured above; also, setDT handles naming + if (!is.null(rownames_) && match(rn_name, names( if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames) # PR 3854 and tests 2058.15-17 ans } From bdc6b9995926c4991dbc5f709e4106135833d310 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 9 Jul 2025 10:23:44 -0700 Subject: [PATCH 31/38] rm vestigial --- R/as.data.table.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index b08e6c22ea..de2ab6457d 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -237,7 +237,6 @@ as.data.table.list = function(x, } setattr(ans, "names", vnames) setDT(ans, key=key) # copy ensured above; also, setDT handles naming - if (!is.null(rownames_) && match(rn_name, names( if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames) # PR 3854 and tests 2058.15-17 ans } From b9d879e6338fd1d45d6db16d5d862f8967165111 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 9 Jul 2025 17:46:00 +0000 Subject: [PATCH 32/38] fix --- R/as.data.table.R | 15 ++++++--------- inst/tests/tests.Rraw | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index de2ab6457d..d109b5d435 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -147,7 +147,7 @@ as.data.table.list = function(x, if (check_rownames && is.null(rownames_)) { if (is.null(dim(xi))) { if (!is.null(nm <- names(xi)) && any(nzchar(nm))) { - rn = nm + rownames_ = nm x[[i]] = unname(xi) } } else { @@ -224,15 +224,12 @@ as.data.table.list = function(x, # Add rownames column when vector names were found if (!is.null(rownames_)) { rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn" - ans = c(list(recycle(rownames_, nrow)), ans) - vnames = c(rn_name, vnames) - } else if (check_rownames) { - # case like data.table(a = 1, data.frame(b = 2, row.names='c')) where expanding the inner DF picks up the row names -- - # we want to bump the resulting column to the front of the output - rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn" - if (!is.na(idx <- chmatch(rn_name, names(ans))[1L]) && idx != 1L) { - ans = c(ans[[idx]], ans[-idx]) + if (!is.na(idx <- chmatch(rn_name, vnames)[1L])) { + ans = c(list(ans[[idx]]), ans[-idx]) vnames = c(vnames[idx], vnames[-idx]) + } else { + ans = c(list(recycle(rownames_, nrow)), ans) + vnames = c(rn_name, vnames) } } setattr(ans, "names", vnames) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fd6506ccf3..13329e75ab 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21424,4 +21424,4 @@ b <- setNames(c(10, 11, 12), c("", "B", "")) test(2330.7, as.data.table(list(b), keep.rownames=TRUE), data.table(rn=c("", "B", ""), V1=c(10, 11, 12))) DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames -test(2330.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=c("a", "b", "c", "d", "e", "f"), a=c(6, 5, 4, 3, 2, 1), V=1:6)) +test(2330.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=c("a", "b", "c", "d", "e", "f"), a=6:1, V=1:6)) From 13ed34655d6060b88acdd91b5ed146dc9820542d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 9 Jul 2025 17:50:24 +0000 Subject: [PATCH 33/38] simplify tests --- inst/tests/tests.Rraw | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 13329e75ab..2825e2d8f6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21408,20 +21408,20 @@ dt = data.table(a = NA_integer_, b = NaN) test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN\n") # Row name extraction from multiple vectors, #7136 -x <- c(1, 2, 3) -y <- setNames(c(4, 5, 6), c("A", "B", "C")) -test(2330.1, as.data.table(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) -test(2330.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=c(1, 2, 3), V2=c(4, 5, 6))) -test(2330.3, as.data.table(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=c(4, 5, 6), V2=c(1, 2, 3))) +x <- 1:3 +y <- setNames(4:6, c("A", "B", "C")) +test(2330.1, as.data.table(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=1:3, V2=4:6)) +test(2330.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=1:3, V2=4:6)) +test(2330.3, as.data.table(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=4:6, V2=1:3)) # Behavior under data.frame() -test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), x=c(1, 2, 3), y=c(4, 5, 6))) -test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), y=c(4, 5, 6), x=c(1, 2, 3))) +test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), x=1:3, y=4:6)) +test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), y=4:6, x=1:3)) -a <- setNames(c(7, 8, 9), c("", "", "")) # test condition about any(nzchar(valid_names)) -test(2330.6, as.data.table(list(a), keep.rownames=TRUE), data.table(V1=c(7, 8, 9))) -b <- setNames(c(10, 11, 12), c("", "B", "")) -test(2330.7, as.data.table(list(b), keep.rownames=TRUE), data.table(rn=c("", "B", ""), V1=c(10, 11, 12))) +a <- setNames(7:9, c("", "", "")) # test condition about any(nzchar(valid_names)) +test(2330.6, as.data.table(list(a), keep.rownames=TRUE), data.table(V1=7:9)) +b <- setNames(10:12, c("", "B", "")) +test(2330.7, as.data.table(list(b), keep.rownames=TRUE), data.table(rn=c("", "B", ""), V1=10:12)) DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames -test(2330.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=c("a", "b", "c", "d", "e", "f"), a=6:1, V=1:6)) +test(2330.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=letters[1:6], a=6:1, V=1:6)) From 311f145a9a9878872ee7f865ea75bdcd533f880d Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Thu, 10 Jul 2025 01:47:10 +0530 Subject: [PATCH 34/38] remove any(nzchar(nm)) --- R/as.data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index d109b5d435..bd7f97fa79 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -146,12 +146,12 @@ as.data.table.list = function(x, if (is.null(xi)) next # eachncol already initialized to 0 by integer() above if (check_rownames && is.null(rownames_)) { if (is.null(dim(xi))) { - if (!is.null(nm <- names(xi)) && any(nzchar(nm))) { + if (!is.null(nm <- names(xi))) { rownames_ = nm x[[i]] = unname(xi) } } else { - if (!is.null(nm <- rownames(xi)) && any(nzchar(nm))) { + if (!is.null(nm <- rownames(xi))) { rownames_ = nm } } From 511487170bbcf438ba222261159cd51fcd3769f3 Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Thu, 10 Jul 2025 01:49:04 +0530 Subject: [PATCH 35/38] remove test condition about any(nzchar(nm)) --- inst/tests/tests.Rraw | 5 ----- 1 file changed, 5 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2825e2d8f6..308e1ae710 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21418,10 +21418,5 @@ test(2330.3, as.data.table(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), x=1:3, y=4:6)) test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), y=4:6, x=1:3)) -a <- setNames(7:9, c("", "", "")) # test condition about any(nzchar(valid_names)) -test(2330.6, as.data.table(list(a), keep.rownames=TRUE), data.table(V1=7:9)) -b <- setNames(10:12, c("", "B", "")) -test(2330.7, as.data.table(list(b), keep.rownames=TRUE), data.table(rn=c("", "B", ""), V1=10:12)) - DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames test(2330.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=letters[1:6], a=6:1, V=1:6)) From d42f1b3d1efc476d235d94b10d7e67853ffc7008 Mon Sep 17 00:00:00 2001 From: Mukul <145585624+Mukulyadav2004@users.noreply.github.com> Date: Thu, 10 Jul 2025 01:49:47 +0530 Subject: [PATCH 36/38] update test number --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 308e1ae710..1127762924 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21419,4 +21419,4 @@ test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn= test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), y=4:6, x=1:3)) DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames -test(2330.8, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=letters[1:6], a=6:1, V=1:6)) +test(2330.6, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=letters[1:6], a=6:1, V=1:6)) From 3a8f73ed49fb7df2559f5ce8923dc0493efb2c26 Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Thu, 10 Jul 2025 17:51:44 +0530 Subject: [PATCH 37/38] update description , news.md and add tests --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 6 ++++++ man/as.data.table.Rd | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 09ad8fc38a..aa8ac388e0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -42,6 +42,8 @@ 8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`, [#5560](https://github.com/Rdatatable/data.table/issues/5560). When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`. Thanks @sindribaldur for the report and @aitap for the fix. +9. `data.table()` and `as.data.table()` with `keep.rownames=TRUE` now extract row names from named vectors, matching `data.frame()` behavior. Names from the first named vector in the input are used to create the row names column (default name `"rn"` or custom name via `keep.rownames="column_name"`), [#1916](https://github.com/Rdatatable/data.table/issues/1916). Thanks to @richierocks for the feature request and @Mukulyadav2004 for the implementation. + ### BUG FIXES 1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1127762924..aceeb77f89 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21420,3 +21420,9 @@ test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn= DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames test(2330.6, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=letters[1:6], a=6:1, V=1:6)) + +z <- setNames(1:3, rep("", 3)) # vector with all-empty names # behaviour with all-empty row names +test(2330.7, as.data.table(list(z), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3)) + +M <- matrix(1:6, nrow=3, dimnames=list(rep("", 3), c("V1", "V2"))) # test of list(M) for empty-rowname'd matrix input +test(2330.8, as.data.table(list(M), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3, V2=4:6)) diff --git a/man/as.data.table.Rd b/man/as.data.table.Rd index 6c4db54887..fbec798c81 100644 --- a/man/as.data.table.Rd +++ b/man/as.data.table.Rd @@ -31,7 +31,7 @@ is.data.table(x) } \arguments{ \item{x}{An R object.} - \item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead.} + \item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead. For lists and when calling \code{data.table()}, names from the first named vector are extracted and used as row names, similar to \code{data.frame()} behavior.} \item{key}{ Character vector of one or more column names which is passed to \code{\link{setkeyv}}. } \item{sorted}{logical used in \emph{array} method, default \code{TRUE} is overridden when \code{key} is provided. } \item{value.name}{character scalar used in \emph{array} method, default \code{"value"}.} From 6dbc4144c0e4a1d30f8db35bb9084b754aecc061 Mon Sep 17 00:00:00 2001 From: Mukul Kumar Date: Thu, 10 Jul 2025 18:07:27 +0530 Subject: [PATCH 38/38] remove unwanted changes --- R/groupingsets.R | 4 +-- R/merge.R | 2 +- R/setkey.R | 8 +++--- R/setops.R | 6 ++-- vignettes/datatable-programming.Rmd | 44 ----------------------------- 5 files changed, 10 insertions(+), 54 deletions(-) diff --git a/R/groupingsets.R b/R/groupingsets.R index f5fc2101f1..7112308323 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -4,7 +4,7 @@ rollup = function(x, ...) { rollup.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { # input data type basic validation if (!is.data.table(x)) - stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error") + stopf("Argument 'x' must be a data.table object") if (!is.character(by)) stopf("Argument 'by' must be a character vector of column names used in grouping.") if (!is.logical(id)) @@ -22,7 +22,7 @@ cube = function(x, ...) { cube.data.table = function(x, j, by, .SDcols, id = FALSE, label = NULL, ...) { # input data type basic validation if (!is.data.table(x)) - stopf("Argument 'x' must be a data.table object", class="dt_invalid_input_error") + stopf("Argument 'x' must be a data.table object") if (!is.character(by)) stopf("Argument 'by' must be a character vector of column names used in grouping.") if (!is.logical(id)) diff --git a/R/merge.R b/R/merge.R index 00cbedea17..c67f6e266a 100644 --- a/R/merge.R +++ b/R/merge.R @@ -34,7 +34,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL warningf("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.") if (!is.null(by.x)) { if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y)) - stopf("A non-empty vector of column names is required for `by.x` and `by.y`.", class="dt_invalid_input_error") + stopf("A non-empty vector of column names is required for `by.x` and `by.y`.") if (!all(idx <- by.x %chin% nm_x)) { stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx])) } diff --git a/R/setkey.R b/R/setkey.R index 4ba5be4d71..94ad3d4faf 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -43,7 +43,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU if (!all(nzchar(cols))) stopf("cols contains some blanks.") cols = gsub("`", "", cols, fixed = TRUE) miss = !(cols %chin% colnames(x)) - if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_column_error") + if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss])) if (physical && identical(head(key(x), length(cols)), cols)){ ## for !physical we need to compute groups as well #4387 ## key is present but x has a longer key. No sorting needed, only attribute is changed to shorter key. @@ -54,7 +54,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU if (".xi" %chin% names(x)) stopf("x contains a column called '.xi'. Conflicts with internal use by data.table.") for (i in cols) { .xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column - if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported as a key column type, currently.", i, typeof(.xi), class="dt_unsortable_type_error") + if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported as a key column type, currently.", i, typeof(.xi)) } if (!is.character(cols) || length(cols)<1L) internal_error("'cols' should be character at this point") # nocov @@ -266,11 +266,11 @@ setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE) # remove backticks from cols cols = gsub("`", "", cols, fixed = TRUE) miss = !(cols %chin% colnames(x)) - if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss]), class = "dt_missing_column_error") + if (any(miss)) stopf("some columns are not in the data.table: %s", brackify(cols[miss])) if (".xi" %chin% colnames(x)) stopf("x contains a column called '.xi'. Conflicts with internal use by data.table.") for (i in cols) { .xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column - if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported for ordering currently.", i, typeof(.xi), class="dt_unsortable_type_error") + if (!typeof(.xi) %chin% ORDERING_TYPES) stopf("Column '%s' is type '%s' which is not supported for ordering currently.", i, typeof(.xi)) } if (!is.character(cols) || length(cols)<1L) internal_error("'cols' should be character at this point") # nocov diff --git a/R/setops.R b/R/setops.R index 1d129c4a9a..9a0effd53e 100644 --- a/R/setops.R +++ b/R/setops.R @@ -14,11 +14,11 @@ setdiff_ = function(x, y, by.x=seq_along(x), by.y=seq_along(y), use.names=FALSE) icnam = names(y)[lc] xcnam = names(x)[rc] if ( is.character(x[[rc]]) && !(is.character(y[[lc]]) || is.factor(y[[lc]])) ) { - stopf("When x's column ('%s') is character, the corresponding column in y ('%s') should be factor or character, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_join_type_mismatch_error") + stopf("When x's column ('%s') is character, the corresponding column in y ('%s') should be factor or character, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]])) } else if ( is.factor(x[[rc]]) && !(is.character(y[[lc]]) || is.factor(y[[lc]])) ) { - stopf("When x's column ('%s') is factor, the corresponding column in y ('%s') should be character or factor, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_join_type_mismatch_error") + stopf("When x's column ('%s') is factor, the corresponding column in y ('%s') should be character or factor, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]])) } else if ( (is.integer(x[[rc]]) || is.double(x[[rc]])) && (is.logical(y[[lc]]) || is.character(y[[lc]])) ) { - stopf("When x's column ('%s') is integer or numeric, the corresponding column in y ('%s') can not be character or logical types, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]]), class="dt_join_type_mismatch_error") + stopf("When x's column ('%s') is integer or numeric, the corresponding column in y ('%s') can not be character or logical types, but found incompatible type '%s'.", xcnam, icnam, typeof(y[[lc]])) } } ux = unique(shallow(x, by.x)) diff --git a/vignettes/datatable-programming.Rmd b/vignettes/datatable-programming.Rmd index 833dedf6cc..34c6d77fda 100644 --- a/vignettes/datatable-programming.Rmd +++ b/vignettes/datatable-programming.Rmd @@ -456,50 +456,6 @@ DT[, eval(cl)] DT[, cl, env = list(cl = cl)] ``` -## Error handling with classed conditions - -Starting from version 1.17.0, `data.table` provides specific error classes for common operations, making it easier to handle errors programmatically. This is particularly useful when writing robust code or packages that use `data.table`. - -### Available error classes - -`data.table` now provides four specific error classes: - -- `dt_missing_column_error`: When referencing columns that don't exist -- `dt_invalid_input_error`: When providing invalid input types or empty required arguments -- `dt_unsortable_type_error`: When trying to sort/key unsupported types -- `dt_join_type_mismatch_error`: When column types are incompatible in joins/set operations - -### Usage examples - -```{r error_handling, error=TRUE} -DT <- data.table(a = 1:3, b = 4:6) - -# Handle missing column errors specifically -tryCatch({ - setkey(DT, "nonexistent_col") -}, dt_missing_column_error = function(e) { - cat("Missing column detected:", conditionMessage(e), "\n") -}, error = function(e) { - cat("Other error:", conditionMessage(e), "\n") -}) - -# Handle type mismatches in operations -DT1 <- data.table(id = 1:3, value = c("a", "b", "c")) -DT2 <- data.table(id = 1:3, value = 1:3) - -tryCatch({ - fintersect(DT1, DT2) -}, dt_join_type_mismatch_error = function(e) { - cat("Type mismatch in join:", conditionMessage(e), "\n") -}, error = function(e) { - cat("Other error:", conditionMessage(e), "\n") -}) -``` - -### Backward compatibility - -All error classes inherit from base R's condition system, so existing `tryCatch(..., error = ...)` code continues to work unchanged. The new classes simply provide more specific handling options when needed. - ```{r cleanup, echo=FALSE} options(.opts) registerS3method("print", "data.frame", base::print.data.frame)