From a2fa23f6b70cff8ac9c72041c6eeecf3115122eb Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Sun, 26 Jan 2025 12:19:47 +0530 Subject: [PATCH 01/25] Enhancing of error message --- po/data.table.pot | 2 +- po/es.po | 4 +-- po/fr.po | 4 +-- po/pt_BR.po | 4 +-- po/zh_CN.po | 4 +-- src/fmelt.c | 68 ++++++++++++++++++++++++++++++++++++++++------- 6 files changed, 67 insertions(+), 19 deletions(-) diff --git a/po/data.table.pot b/po/data.table.pot index e11d4c3da7..43e8850bda 100644 --- a/po/data.table.pot +++ b/po/data.table.pot @@ -1024,7 +1024,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid." +msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" msgstr "" #: fmelt.c:189 diff --git a/po/es.po b/po/es.po index ea5f2d9ce4..50a980298a 100644 --- a/po/es.po +++ b/po/es.po @@ -1281,8 +1281,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "Tipo 'measure.vars' desconocido %s en el índice %d de la lista" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid." -msgstr "Uno o más valores en 'measure.vars' no son válidos." +msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgstr "Uno o más valores en 'measure.vars' no son válidos; por favor corrige eliminando: %s" #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/po/fr.po b/po/fr.po index e63209de3c..264163e6a3 100644 --- a/po/fr.po +++ b/po/fr.po @@ -1301,8 +1301,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "Type inconnu de 'measure.vars' %s à l'indice %d de la liste" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid." -msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides." +msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides; veuillez corriger en supprimant : %s" #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/po/pt_BR.po b/po/pt_BR.po index e214a2a3bd..4d54b6796c 100644 --- a/po/pt_BR.po +++ b/po/pt_BR.po @@ -1281,8 +1281,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "'measure.vars'com tipo desconhecido %s no índice %d da lista" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid." -msgstr "Um ou mais valores em 'measure.vars' são inválidos." +msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgstr "Um ou mais valores em 'measure.vars' são inválidos; por favor, corrija removendo: %s" #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/po/zh_CN.po b/po/zh_CN.po index 493adf3767..f1bdb098b3 100644 --- a/po/zh_CN.po +++ b/po/zh_CN.po @@ -1154,8 +1154,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "未知'measure.vars'类型 %s,位于列表中 %d" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid." -msgstr "'measure.vars'里,一或多个数值无效" +msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgstr "'measure.vars'里,一或多个数值无效;请通过删除以下数值来修复:%s" #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/src/fmelt.c b/src/fmelt.c index 59e82455b4..199e04a015 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -1,5 +1,7 @@ #include "data.table.h" #include + + // #include // the debugging machinery + breakpoint aidee // raise(SIGINT); @@ -176,33 +178,79 @@ bool is_default_measure(SEXP vec) { // maybe unlist, then unique, then set_diff. SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { + // Protect input list/vector, unlisting if necessary SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list); + + // Check for duplicated elements in the input vector SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); + int n_unique_cols = 0; - for (int i=0; i 0) { + // Buffer for concatenated invalid column messages + char buffer[4096] = ""; // Large enough to store the concatenated string + for (int i = 0; i < invalid_count; ++i) { + char temp[32]; + snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); // Format the column number + + if (i > 0) { + strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); // Add separator } - } else if (!LOGICAL(is_duplicated)[i]) n_unique_cols++; + strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); // Append to the buffer + } + + // Throw the error with the concatenated message + error(_("One or more values in '%s' are invalid; please fix by removing: %s"), + is_measure ? "measure.vars" : "id.vars", buffer); } + + // Proceed with collecting unique columns SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); int unique_i = 0; - for (int i=0; i Date: Sun, 26 Jan 2025 14:36:09 +0530 Subject: [PATCH 02/25] Add Suggested changes --- po/es.po | 2 +- po/fr.po | 2 +- po/pt_BR.po | 2 +- po/zh_CN.po | 2 +- src/fmelt.c | 29 +++++++---------------------- 5 files changed, 11 insertions(+), 26 deletions(-) diff --git a/po/es.po b/po/es.po index 50a980298a..92be106585 100644 --- a/po/es.po +++ b/po/es.po @@ -1282,7 +1282,7 @@ msgstr "Tipo 'measure.vars' desconocido %s en el índice %d de la lista" #: fmelt.c:187 msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" -msgstr "Uno o más valores en 'measure.vars' no son válidos; por favor corrige eliminando: %s" +msgstr "Uno o más valores en 'measure.vars' no son válidos." #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/po/fr.po b/po/fr.po index 264163e6a3..752b1afb54 100644 --- a/po/fr.po +++ b/po/fr.po @@ -1302,7 +1302,7 @@ msgstr "Type inconnu de 'measure.vars' %s à l'indice %d de la liste" #: fmelt.c:187 msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" -msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides; veuillez corriger en supprimant : %s" +msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides." #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/po/pt_BR.po b/po/pt_BR.po index 4d54b6796c..6eb6aa1b3a 100644 --- a/po/pt_BR.po +++ b/po/pt_BR.po @@ -1282,7 +1282,7 @@ msgstr "'measure.vars'com tipo desconhecido %s no índice %d da lista" #: fmelt.c:187 msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" -msgstr "Um ou mais valores em 'measure.vars' são inválidos; por favor, corrija removendo: %s" +msgstr "Um ou mais valores em 'measure.vars' são inválidos." #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/po/zh_CN.po b/po/zh_CN.po index f1bdb098b3..86f9a966fb 100644 --- a/po/zh_CN.po +++ b/po/zh_CN.po @@ -1155,7 +1155,7 @@ msgstr "未知'measure.vars'类型 %s,位于列表中 %d" #: fmelt.c:187 msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" -msgstr "'measure.vars'里,一或多个数值无效;请通过删除以下数值来修复:%s" +msgstr "'measure.vars'里,一或多个数值无效" #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/src/fmelt.c b/src/fmelt.c index 199e04a015..0bffa31dc0 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -1,7 +1,6 @@ #include "data.table.h" #include - // #include // the debugging machinery + breakpoint aidee // raise(SIGINT); @@ -178,30 +177,23 @@ bool is_default_measure(SEXP vec) { // maybe unlist, then unique, then set_diff. SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { - // Protect input list/vector, unlisting if necessary SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list); - // Check for duplicated elements in the input vector SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); int n_unique_cols = 0; - - // Allocate a vector to store invalid column indices (initially max size is length of int_vec) + SEXP invalid_columns = PROTECT(allocVector(INTSXP, length(int_vec))); int* invalid_col_ptr = INTEGER(invalid_columns); int invalid_count = 0; - // Iterate through the column numbers to identify invalid and unique columns for (int i = 0; i < length(int_vec); ++i) { int col_number = INTEGER(int_vec)[i]; - // Check if the column number is within valid range bool good_number = 0 < col_number && col_number <= ncol; - // Special check for 'measure' case (NA_INTEGER handling) if (is_measure) good_number |= (col_number == NA_INTEGER); - // Collect invalid columns if not valid or out of range if (!good_number || col_number == 0) { invalid_col_ptr[invalid_count++] = col_number; } else if (!LOGICAL(is_duplicated)[i]) { @@ -209,41 +201,34 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { } } - // If invalid columns are found, construct the error message if (invalid_count > 0) { - // Buffer for concatenated invalid column messages - char buffer[4096] = ""; // Large enough to store the concatenated string + char buffer[4096] = ""; for (int i = 0; i < invalid_count; ++i) { char temp[32]; - snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); // Format the column number + snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); if (i > 0) { - strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); // Add separator + strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); } - strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); // Append to the buffer + strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); } - // Throw the error with the concatenated message error(_("One or more values in '%s' are invalid; please fix by removing: %s"), is_measure ? "measure.vars" : "id.vars", buffer); } - - // Proceed with collecting unique columns + SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); int unique_i = 0; - // Populate the unique column numbers into the new vector for (int i = 0; i < length(is_duplicated); ++i) { if (!LOGICAL(is_duplicated)[i]) { INTEGER(unique_col_numbers)[unique_i++] = INTEGER(int_vec)[i]; } } - // Apply set difference to get final unique column indices SEXP out = set_diff(unique_col_numbers, ncol); - // Unprotect all allocated objects - UNPROTECT(4); // Unprotect input, duplication check, invalid columns, and unique columns + UNPROTECT(4); return out; } From e75bc7319ebf7161df640ef001de1eae3fd3c982 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Mon, 27 Jan 2025 19:17:42 +0530 Subject: [PATCH 03/25] Revert to initial changes --- po/data.table.pot | 2 +- po/es.po | 2 +- po/fr.po | 2 +- po/pt_BR.po | 2 +- po/zh_CN.po | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/po/data.table.pot b/po/data.table.pot index 43e8850bda..e11d4c3da7 100644 --- a/po/data.table.pot +++ b/po/data.table.pot @@ -1024,7 +1024,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgid "One or more values in 'measure.vars' is invalid." msgstr "" #: fmelt.c:189 diff --git a/po/es.po b/po/es.po index 92be106585..ea5f2d9ce4 100644 --- a/po/es.po +++ b/po/es.po @@ -1281,7 +1281,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "Tipo 'measure.vars' desconocido %s en el índice %d de la lista" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgid "One or more values in 'measure.vars' is invalid." msgstr "Uno o más valores en 'measure.vars' no son válidos." #: fmelt.c:189 diff --git a/po/fr.po b/po/fr.po index 752b1afb54..e63209de3c 100644 --- a/po/fr.po +++ b/po/fr.po @@ -1301,7 +1301,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "Type inconnu de 'measure.vars' %s à l'indice %d de la liste" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgid "One or more values in 'measure.vars' is invalid." msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides." #: fmelt.c:189 diff --git a/po/pt_BR.po b/po/pt_BR.po index 6eb6aa1b3a..e214a2a3bd 100644 --- a/po/pt_BR.po +++ b/po/pt_BR.po @@ -1281,7 +1281,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "'measure.vars'com tipo desconhecido %s no índice %d da lista" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgid "One or more values in 'measure.vars' is invalid." msgstr "Um ou mais valores em 'measure.vars' são inválidos." #: fmelt.c:189 diff --git a/po/zh_CN.po b/po/zh_CN.po index 86f9a966fb..493adf3767 100644 --- a/po/zh_CN.po +++ b/po/zh_CN.po @@ -1154,7 +1154,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "未知'measure.vars'类型 %s,位于列表中 %d" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgid "One or more values in 'measure.vars' is invalid." msgstr "'measure.vars'里,一或多个数值无效" #: fmelt.c:189 From ea98c199df92ba4e1b3085ca37936c373ae5c641 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Tue, 18 Feb 2025 17:53:24 +0530 Subject: [PATCH 04/25] removal of empty lines --- src/fmelt.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/src/fmelt.c b/src/fmelt.c index 0bffa31dc0..0768900543 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -1,6 +1,5 @@ #include "data.table.h" #include - // #include // the debugging machinery + breakpoint aidee // raise(SIGINT); @@ -178,64 +177,46 @@ bool is_default_measure(SEXP vec) { // maybe unlist, then unique, then set_diff. SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list); - SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); - int n_unique_cols = 0; - SEXP invalid_columns = PROTECT(allocVector(INTSXP, length(int_vec))); int* invalid_col_ptr = INTEGER(invalid_columns); int invalid_count = 0; - for (int i = 0; i < length(int_vec); ++i) { int col_number = INTEGER(int_vec)[i]; - bool good_number = 0 < col_number && col_number <= ncol; - if (is_measure) good_number |= (col_number == NA_INTEGER); - if (!good_number || col_number == 0) { invalid_col_ptr[invalid_count++] = col_number; } else if (!LOGICAL(is_duplicated)[i]) { n_unique_cols++; } } - if (invalid_count > 0) { char buffer[4096] = ""; for (int i = 0; i < invalid_count; ++i) { char temp[32]; snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); - if (i > 0) { strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); } strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); } - - error(_("One or more values in '%s' are invalid; please fix by removing: %s"), + error(_("One or more values in '%s' are invalid; please fix by removing: %s"), is_measure ? "measure.vars" : "id.vars", buffer); } - SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); int unique_i = 0; - for (int i = 0; i < length(is_duplicated); ++i) { if (!LOGICAL(is_duplicated)[i]) { INTEGER(unique_col_numbers)[unique_i++] = INTEGER(int_vec)[i]; } } - SEXP out = set_diff(unique_col_numbers, ncol); - UNPROTECT(4); - return out; } - - - SEXP cols_to_int_or_list(SEXP cols, SEXP dtnames, bool is_measure) { switch(TYPEOF(cols)) { case STRSXP : return chmatch(cols, dtnames, 0); From 822964ffcbc9a2052d31e92d207cd8ccbecb8439 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Wed, 19 Feb 2025 19:26:26 +0530 Subject: [PATCH 05/25] Apply changes from maintainer's commit dbcabb0 to avoid repeated strncat() calls --- src/fmelt.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/fmelt.c b/src/fmelt.c index 0768900543..b4d8c9baab 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -193,14 +193,13 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { } } if (invalid_count > 0) { - char buffer[4096] = ""; + char buffer[4096] = "", *nexti = buffer; + size_t remaining = sizeof buffer; for (int i = 0; i < invalid_count; ++i) { - char temp[32]; - snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); - if (i > 0) { - strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); - } - strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); + int offset = snprintf(nexti, remaining, "%s[%d]", i > 0 ? ", " : "", invalid_col_ptr[i]); + if (offset < 0 || (size_t)offset >= remaining) break; + nexti += offset; + remaining -= offset; } error(_("One or more values in '%s' are invalid; please fix by removing: %s"), is_measure ? "measure.vars" : "id.vars", buffer); From eff26ca2c3a5c3e494442fedb3ba59dc19a8c858 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Thu, 27 Feb 2025 01:46:23 +0530 Subject: [PATCH 06/25] add test case --- src/fmelt.c | 10 +++---- tests/testthat/test_melt.R | 55 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 tests/testthat/test_melt.R diff --git a/src/fmelt.c b/src/fmelt.c index e0afe4a5e2..9b203c7910 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -184,9 +184,9 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { int invalid_count = 0; for (int i = 0; i < length(int_vec); ++i) { int col_number = INTEGER(int_vec)[i]; - bool good_number = 0 < col_number && col_number <= ncol; - if (is_measure) good_number |= (col_number == NA_INTEGER); - if (!good_number || col_number == 0) { + bool good_number = (col_number > 0 && col_number <= ncol); + if (is_measure) {good_number |= (col_number == NA_INTEGER);} + if (!good_number) { invalid_col_ptr[invalid_count++] = col_number; } else if (!LOGICAL(is_duplicated)[i]) { n_unique_cols++; @@ -201,8 +201,8 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { nexti += offset; remaining -= offset; } - error(_("One or more values in '%s' are invalid; please fix by removing: %s"), - is_measure ? "measure.vars" : "id.vars", buffer); + error(_("One or more values in '%s' are invalid; please fix by removing: %s"), + is_measure ? "measure.vars" : "id.vars", buffer); } SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); int unique_i = 0; diff --git a/tests/testthat/test_melt.R b/tests/testthat/test_melt.R new file mode 100644 index 0000000000..66bb3ac3cd --- /dev/null +++ b/tests/testthat/test_melt.R @@ -0,0 +1,55 @@ +options(width = 200) +# Load compiled shared object +dyn.load("/home/yadav/dataTable/data.table/src/fmelt.so") + +# Ensure the function is loaded +stopifnot(is.loaded("uniq_diff")) + +# Test cases +test_uniq_diff <- function() { + cat("Running tests for uniq_diff...\n") + + # Load required R functions + library(data.table) + + # Test 1: Valid integer vector input + input_1 <- as.integer(c(1, 2, 3, 4, 5)) + result_1 <- .Call("uniq_diff", input_1, as.integer(5), FALSE) + expected_1 <- input_1 # Should return unique values + stopifnot(identical(result_1, expected_1)) + cat("Test 1 passed!\n") + + # Test 2: Input with duplicates + input_2 <- as.integer(c(1, 2, 2, 3, 4, 4, 5)) + result_2 <- .Call("uniq_diff", input_2, as.integer(5), FALSE) + expected_2 <- as.integer(c(1, 2, 3, 4, 5)) # Should remove duplicates + stopifnot(identical(result_2, expected_2)) + cat("Test 2 passed!\n") + + # Test 3: Invalid column numbers (out of range) + input_3 <- as.integer(c(-1, 0, 1, 6, 2, 3)) + tryCatch({ + result_3 <- .Call("uniq_diff", input_3, as.integer(5), FALSE) + cat("Test 3 failed: Expected an error but none occurred.\n") + }, error = function(e) { + cat("Test 3 passed! Caught expected error: ", e$message, "\n") + }) + + # Test 4: NA values in the input + input_4 <- as.integer(c(1, 2, NA, 3, 4)) + result_4 <- .Call("uniq_diff", input_4, as.integer(5), TRUE) + expected_4 <- as.integer(c(1, 2, NA, 3, 4)) # Should allow NA if is_measure is TRUE + stopifnot(identical(result_4, expected_4)) + cat("Test 4 passed!\n") + + # Test 5: Empty input + input_5 <- as.integer(integer(0)) + result_5 <- .Call("uniq_diff", input_5, as.integer(5), FALSE) + stopifnot(length(result_5) == 0) # Should return empty + cat("Test 5 passed!\n") + + cat("All tests completed successfully!\n") +} + +# Run the tests +test_uniq_diff() From 18d0180d1b8a188420b715fd98f0f86421455002 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Thu, 27 Feb 2025 01:50:11 +0530 Subject: [PATCH 07/25] changes added --- src/fmelt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fmelt.c b/src/fmelt.c index 9b203c7910..366c6faa75 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -206,7 +206,7 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { } SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); int unique_i = 0; - for (int i = 0; i < length(is_duplicated); ++i) { + for (int i=0; i Date: Fri, 28 Feb 2025 01:48:58 +0530 Subject: [PATCH 08/25] add test --- inst/tests/tests.Rraw | 15 +++++++++++ src/fmelt.c | 4 ++- tests/testthat/test_melt.R | 55 -------------------------------------- 3 files changed, 18 insertions(+), 56 deletions(-) delete mode 100644 tests/testthat/test_melt.R diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e4231b5fa8..43570a2620 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21042,6 +21042,7 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list( # the integer overflow in #6729 is only noticeable with UBSan test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE }) +<<<<<<< HEAD if (exists("sort_by", "package:base")) { # sort_by.data.table DT1 = data.table(a=c(1, 3, 2, NA, 3), b=4:0) @@ -21069,3 +21070,17 @@ if (exists("sort_by", "package:base")) { test(2306.23, DT1[, sort_by(.SD, ~a + b)], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) } +======= +#test for enhancing error message of invalid column #6512 +uniq_diff <- function(...) { + .Call("uniq_diff", ...) +} +capture_error_message <- function(expr) { + msg <- tryCatch( + { expr; NULL }, + error = function(e) e$message + ) + msg +} +test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg)) # Ensure -1 is reported}, TRUE) +>>>>>>> 1b4a51d3 (add test case) diff --git a/src/fmelt.c b/src/fmelt.c index 366c6faa75..8eb257ae2e 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -185,7 +185,9 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { for (int i = 0; i < length(int_vec); ++i) { int col_number = INTEGER(int_vec)[i]; bool good_number = (col_number > 0 && col_number <= ncol); - if (is_measure) {good_number |= (col_number == NA_INTEGER);} + if (is_measure) { + good_number |= (col_number == NA_INTEGER); + } if (!good_number) { invalid_col_ptr[invalid_count++] = col_number; } else if (!LOGICAL(is_duplicated)[i]) { diff --git a/tests/testthat/test_melt.R b/tests/testthat/test_melt.R deleted file mode 100644 index 66bb3ac3cd..0000000000 --- a/tests/testthat/test_melt.R +++ /dev/null @@ -1,55 +0,0 @@ -options(width = 200) -# Load compiled shared object -dyn.load("/home/yadav/dataTable/data.table/src/fmelt.so") - -# Ensure the function is loaded -stopifnot(is.loaded("uniq_diff")) - -# Test cases -test_uniq_diff <- function() { - cat("Running tests for uniq_diff...\n") - - # Load required R functions - library(data.table) - - # Test 1: Valid integer vector input - input_1 <- as.integer(c(1, 2, 3, 4, 5)) - result_1 <- .Call("uniq_diff", input_1, as.integer(5), FALSE) - expected_1 <- input_1 # Should return unique values - stopifnot(identical(result_1, expected_1)) - cat("Test 1 passed!\n") - - # Test 2: Input with duplicates - input_2 <- as.integer(c(1, 2, 2, 3, 4, 4, 5)) - result_2 <- .Call("uniq_diff", input_2, as.integer(5), FALSE) - expected_2 <- as.integer(c(1, 2, 3, 4, 5)) # Should remove duplicates - stopifnot(identical(result_2, expected_2)) - cat("Test 2 passed!\n") - - # Test 3: Invalid column numbers (out of range) - input_3 <- as.integer(c(-1, 0, 1, 6, 2, 3)) - tryCatch({ - result_3 <- .Call("uniq_diff", input_3, as.integer(5), FALSE) - cat("Test 3 failed: Expected an error but none occurred.\n") - }, error = function(e) { - cat("Test 3 passed! Caught expected error: ", e$message, "\n") - }) - - # Test 4: NA values in the input - input_4 <- as.integer(c(1, 2, NA, 3, 4)) - result_4 <- .Call("uniq_diff", input_4, as.integer(5), TRUE) - expected_4 <- as.integer(c(1, 2, NA, 3, 4)) # Should allow NA if is_measure is TRUE - stopifnot(identical(result_4, expected_4)) - cat("Test 4 passed!\n") - - # Test 5: Empty input - input_5 <- as.integer(integer(0)) - result_5 <- .Call("uniq_diff", input_5, as.integer(5), FALSE) - stopifnot(length(result_5) == 0) # Should return empty - cat("Test 5 passed!\n") - - cat("All tests completed successfully!\n") -} - -# Run the tests -test_uniq_diff() From 11345b98356475fe050582894c48ea9f643cf504 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 01:54:15 +0530 Subject: [PATCH 09/25] add test --- inst/tests/tests.Rraw | 313 +++++++++++++++++++----------------------- 1 file changed, 139 insertions(+), 174 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 43570a2620..1d31d232ce 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -159,6 +159,9 @@ TZnotUTC = !identical(tt,"") && !is_utc(tt) # (3) function factory for matching messages exactly by substituting anything between delimiters [delim, fmt=TRUE] # (4) function factory for matching messages exactly by substituting a generic string [fmt=string] get_msg = function(e, delim, fmt=FALSE) { + ufq = options(useFancyQuotes = FALSE) # otherwise we get angled quotes, hard to match robustly + on.exit(options(ufq)) + condition = tryCatch({e; NULL}, error=identity, warning=identity) if (is.null(condition)) return(condition) msg = condition$message @@ -1361,20 +1364,43 @@ if (test_bit64) { test(431.5, DT[5,1:=as.integer64(NA)], data.table(a=factor(c(NA,NA,NA,NA,NA), levels=LETTERS[1:3]), b=1:5)) } -# Test that unsetting datatable.alloccol is caught, #2014 -test(432.1, data.table(a=1:3), options=list(datatable.alloccol=NULL), error="Has getOption('datatable.alloccol') somehow become unset?") -test(432.2, data.table(a=1:3), options=c(datatable.alloccol="1024"), error="getOption('datatable.alloccol') should be a number, by default 1024. But its type is 'character'.") -test(432.3, data.table(a=1:3), options=list(datatable.alloccol=c(10L,20L)), error="is a numeric vector ok but its length is 2. Its length should be 1.") -test(432.4, data.table(a=1:3), options=c(datatable.alloccol=NA_integer_), error="It must be >=0 and not NA.") -test(432.5, data.table(a=1:3), options=c(datatable.alloccol=-1), error="It must be >=0 and not NA.") - +old = getOption("datatable.alloccol") # Test that unsetting datatable.alloccol is caught, #2014 +options(datatable.alloccol=NULL) # In this =NULL case, options() in R 3.0.0 returned TRUE rather than the old value. This R bug was fixed in R 3.1.1. + # This is why getOption is called first rather than just using the result of option() like elsewhere in this test file. + # TODO: simplify this test if/when R dependency >= 3.1.1 +err1 = try(data.table(a=1:3), silent=TRUE) +options(datatable.alloccol="1024") +err2 = try(data.table(a=1:3), silent=TRUE) +options(datatable.alloccol=c(10L,20L)) +err3 = try(data.table(a=1:3), silent=TRUE) +options(datatable.alloccol=NA_integer_) +err4 = try(data.table(a=1:3), silent=TRUE) +options(datatable.alloccol=-1) +err5 = try(data.table(a=1:3), silent=TRUE) +options(datatable.alloccol=1024L) # otherwise test() itself fails in its internals with the alloc.col error +test(432.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1)) +test(432.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2)) +test(432.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3)) +test(432.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4)) +test(432.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5)) # Repeat the tests but this time with subsetting, to ensure the validity check on option happens for those too DT = data.table(a=1:3, b=4:6) -test(433.1, DT[2,], options=list(datatable.alloccol=NULL), error="Has getOption('datatable.alloccol') somehow become unset?") -test(433.2, DT[,2], options=c(datatable.alloccol="1024"), error="getOption('datatable.alloccol') should be a number, by default 1024. But its type is 'character'.") -test(433.3, DT[a>1], options=list(datatable.alloccol=c(10L,20L)), error="is a numeric vector ok but its length is 2. Its length should be 1.") -test(433.4, DT[,"b"], options=c(datatable.alloccol=NA_integer_), error="It must be >=0 and not NA.") -test(433.5, DT[2,"b"], options=c(datatable.alloccol=-1), error="It must be >=0 and not NA.") +options(datatable.alloccol=NULL) +err1 = try(DT[2,], silent=TRUE) +options(datatable.alloccol="1024") +err2 = try(DT[,2], silent=TRUE) +options(datatable.alloccol=c(10L,20L)) +err3 = try(DT[a>1], silent=TRUE) +options(datatable.alloccol=NA_integer_) +err4 = try(DT[,"b"], silent=TRUE) +options(datatable.alloccol=-1) +err5 = try(DT[2,"b"], silent=TRUE) +options(datatable.alloccol=1024L) # otherwise test() itself fails in its internals with the alloc.col error +test(433.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1)) +test(433.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2)) +test(433.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3)) +test(433.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4)) +test(433.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5)) # simple realloc test DT = data.table(a=1:3,b=4:6) @@ -7040,7 +7066,7 @@ ee = new.env() ee$DT = data.frame(x=1L, y=1:3) setattr(ee$DT, 'class', c("data.table", "data.frame")) test(1482.1, truelength(ee$DT), 0L) # make sure that the simulated environment is right. -test(1482.2, ee$DT[, z := 3:1], data.table(x=1L, y=1:3, z=3:1), warning="A shallow copy of this data.table was taken") +test(1482.2, ee$DT[, z := 3:1], data.table(x=1L, y=1:3, z=3:1), warning="Invalid .internal.selfref detected and") test(1482.3, truelength(ee$DT), 1027L) test(1482.4, ee$DT[, za := 4:6], data.table(x=1L, y=1:3, z=3:1, za=4:6)) test(1482.5, truelength(ee$DT), 1027L) # should have used spare slot i.e. no increase in tl @@ -7865,7 +7891,7 @@ test(1551.5, fread(str), rhs = setDT(read.table(testDir("issue_1095_fread.txt.bz2"), sep=",", comment.char="", stringsAsFactors=FALSE, quote="", strip.white=TRUE)) if (test_R.utils) { test(1551.61, fread(testDir("issue_1095_fread.txt.bz2"), logical01=FALSE), rhs, warning=w) - rhs[, names(.SD) := lapply(.SD, function(x) x == "Y"), .SDcols = c("V16", "V17", "V45")] + rhs[, names(.SD) := lapply(.SD, \(x) x == "Y"), .SDcols = c("V16", "V17", "V45")] test(1551.62, fread(testDir("issue_1095_fread.txt.bz2"), logical01=FALSE, logicalYN=TRUE), rhs, warning=w) } @@ -8541,13 +8567,14 @@ DT1 = data.table(a=1) test(1601.1, merge(DT1, DT1, by="a"), data.table(a=1, key="a")) test(1601.2, merge(DT1, DT0, by="a"), warning="Input data.table 'y' has no columns.", - error="The following columns listed in `by` are missing from y: [a]") + error="Elements listed in `by`") test(1601.3, merge(DT0, DT1, by="a"), warning="Input data.table 'x' has no columns.", - error="The following columns listed in `by` are missing from x: [a]") + error="Elements listed in `by`") test(1601.4, merge(DT0, DT0, by="a"), warning="Neither of the input data.tables to join have columns.", - error="The following columns listed in `by` are missing from x: [a]") + error="Elements listed in `by`") + # fix for #1549 d1 <- data.table(v1=1:2,x=x) d2 <- data.table(v1=3:4) @@ -8689,17 +8716,17 @@ test(1613.21, all.equal(DT2, DT1, ignore.row.order = TRUE), "Dataset 'current' h # test attributes: key DT1 <- data.table(a = 1:4, b = letters[1:4], key = "a") DT2 <- data.table(a = 1:4, b = letters[1:4]) -test(1613.22, all.equal(DT1, DT2), output="Datasets have different keys. 'target': [a]. 'current': has no key.") +test(1613.22, all.equal(DT1, DT2), "Datasets have different keys. 'target': [a]. 'current': has no key.") test(1613.23, all.equal(DT1, DT2, check.attributes = FALSE), TRUE) test(1613.24, all.equal(DT1, setkeyv(DT2, "a"), check.attributes = TRUE), TRUE) # test attributes: index DT1 <- data.table(a = 1:4, b = letters[1:4]) DT2 <- data.table(a = 1:4, b = letters[1:4]) setindexv(DT1, "b") -test(1613.25, all.equal(DT1, DT2), output="Datasets have different indices. 'target': [b]. 'current': has no index.") +test(1613.25, all.equal(DT1, DT2), "Datasets have different indices. 'target': [b]. 'current': has no index.") test(1613.26, all.equal(DT1, DT2, check.attributes = FALSE), TRUE) -test(1613.27, all.equal(DT1, setindexv(DT2, "a")), output="Datasets have different indices. 'target': [b]. 'current': [a].") -test(1613.28, all.equal(DT1, setindexv(DT2, "b")), output="Datasets have different indices. 'target': [b]. 'current': [a, b].") +test(1613.27, all.equal(DT1, setindexv(DT2, "a")), "Datasets have different indices. 'target': [b]. 'current': [a].") +test(1613.28, all.equal(DT1, setindexv(DT2, "b")), "Datasets have different indices. 'target': [b]. 'current': [a, b].") test(1613.29, all.equal(DT1, setindexv(setindexv(DT2, NULL), "b")), TRUE) # test custom attribute DT1 <- data.table(a = 1:4, b = letters[1:4]) @@ -10968,8 +10995,7 @@ DT = data.table( D = as.POSIXct(dt<-paste(d,t), tz="UTC"), E = as.POSIXct(paste0(dt,c(".999",".0",".5",".111112",".123456",".023",".0",".999999",".99",".0009")), tz="UTC")) -test(1740.1, fwrite(DT,dateTimeAs="iso"), - error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv")) +test(1740.1, fwrite(DT,dateTimeAs="iso"), error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv")) test(1740.2, fwrite(DT,dateTimeAs=c("ISO","squash")), error=base_messages$match_arg_length) test(1740.3, capture.output(fwrite(DT,dateTimeAs="ISO")), c( "A,B,C,D,E", @@ -11080,7 +11106,7 @@ test(1743.123, fread("a,b\n1+3i,2015-01-01", colClasses=c(NA,"IDate")), data.tab test(1743.13, lapply(fread("a,b\n09/05/98,2015-01-01", colClasses = "Date"), class), y=list(a="character", b=c("IDate", "Date")), warning=base_messages$ambiguous_date_fmt) ## Just invalid -test(1743.14, +test(1743.14, options = c(useFancyQuotes = FALSE), sapply(fread("a,b\n2017-01-01,1", colClasses=c("foo", "integer")), class), c(a="character", b="integer"), warning=base_messages$missing_coerce_method) test(1743.15, sapply(fread("a,b\n2017-01-01,1", colClasses=c("foo", "integer")), class), c(a="character", b="integer"), warning="the column has been left as type .*character") @@ -11787,15 +11813,15 @@ test(1775.1, capture.output(print(DT1, print.keys = TRUE)), c("Key: ", " a", "1: 1", "2: 2", "3: 3")) DT2 <- data.table(a = 1:3, b = 4:6) setindexv(DT2, c("b","a")) -test(1775.2, print(DT2, print.keys = TRUE), - output=c("Index: ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) +test(1775.2, capture.output(print(DT2, print.keys = TRUE)), + c("Index: ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) setindexv(DT2, "b") -test(1775.3, print(DT2, print.keys = TRUE), - output=c("Indices: , ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) +test(1775.3, capture.output(print(DT2, print.keys = TRUE)), + c("Indices: , ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) setkey(DT2, a) setindexv(DT2, c("b","a")) -test(1775.4, print(DT2, print.keys = TRUE), - output=c("Key: ", "Indices: , ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) ## index 'b' is still good, so we keep it +test(1775.4, capture.output(print(DT2, print.keys = TRUE)), + c("Key: ", "Indices: , ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) ## index 'b' is still good, so we keep it # dev regression #2285 cat("A B C\n1 2 3\n4 5 6", file=f<-tempfile()) @@ -12119,7 +12145,8 @@ test(1831.4, fread(paste0("A\n", "1.", src2)), data.table(A=1.1234567890098766)) DT = as.data.table(matrix(5L, nrow=10, ncol=10)) test(1832.1, fwrite(DT, f<-tempfile(), verbose=TRUE), output="Column writers") DT = as.data.table(matrix(5L, nrow=10, ncol=60)) -test(1832.2, fwrite(DT, f, verbose=TRUE), output = "\nColumn writers.* [.][.][.] ") +# Using capture.output directly to look for the "..." because test(,output=) intercepts [] for convenience elsewhere +test(1832.2, any(grepl("^Column writers.* [.][.][.] ", capture.output(fwrite(DT, f, verbose=TRUE))))) unlink(f) # ensure explicitly setting select to default value doesn't error, #2007 @@ -13519,14 +13546,14 @@ test(1962.016, merge(DT1, DT2, by.x = 'a', by.y = c('a', 'V')), test(1962.017, merge(DT1, DT2, by = 'V', by.x = 'a', by.y = 'a'), data.table(a = 2:3, V.x = c("a", "a"), V.y = c("b", "b"), key = 'a'), warning = 'Supplied both.*argument will be ignored') -test(1962.018, merge(DT1, DT2, by.x='z', by.y='a'), - error="The following columns listed in `by.x` are missing from x: [z]") -test(1962.019, merge(DT1, DT2, by.x='a', by.y='z'), - error="The following columns listed in `by.y` are missing from y: [z]") +test(1962.018, merge(DT1, DT2, by.x = 'z', by.y = 'a'), + error = 'Elements listed in `by.x`') +test(1962.019, merge(DT1, DT2, by.x = 'a', by.y = 'z'), + error = 'Elements listed in `by.y`') test(1962.0201, merge(DT1, DT2, by=character(0L)), ans) # was error before PR#5183 test(1962.0202, merge(DT1, DT2, by=NULL), ans) # test explicit NULL too as missing() could be used inside merge() -test(1962.021, merge(DT1, DT2, by='z'), - error='The following columns listed in `by` are missing from x: [z]') +test(1962.021, merge(DT1, DT2, by = 'z'), + error = 'must be valid column names in x and y') ## frank.R x = c(1, 1, 2, 5, 4, 3, 4, NA, 6) @@ -14915,7 +14942,7 @@ test(2037.1, foo(DT), output='Please remember to always setDT()') # no assignment was made to DT test(2037.2, names(DT), 'a') # _selrefok() verbose message was duplicated -test(2037.3, foo(DT), output="data.table internal attributes", notOutput="data.table internal attributes.*data.table internal attributes") +test(2037.3, unname(table(unlist(strsplit(capture.output(foo(DT)), '\n|\\s+')))['ptr']), 1L) # `between` invalid args, and verbose #3516 test(2038.01, between(1:5, 2, 4, incbounds=423), error="incbounds must be TRUE or FALSE") @@ -15139,13 +15166,13 @@ test(2044.60, dt1[dt2, ..cols, on="int==doubleInt", verbose=TRUE], test(2044.61, dt1[dt2, ..cols, on="int==realDouble", verbose=TRUE], # this was wrong in v1.12.2 (the fractions were truncated and joined to next lowest int) data.table(x.bool=c(NA,FALSE,NA,FALSE,NA), x.int=INT(NA,1,NA,2,NA), x.doubleInt=c(NA,1,NA,2,NA), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]), - output="Coercing integer column x.int to type double to match type of i.realDouble .which contains fractions.") + output="Coercing integer column x.int to type double to match type of i.realDouble which contains fractions") test(2044.62, dt1[dt2, ..cols, on="doubleInt==int", verbose=TRUE], data.table(x.bool=FALSE, x.int=1:5, x.doubleInt=as.double(1:5), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]), - output="Coercing integer column i.int .for join. to type double to match type of x.doubleInt") + output="Coercing integer column i.int to type double for join to match type of x.doubleInt") test(2044.63, dt1[dt2, ..cols, on="realDouble==int", verbose=TRUE], data.table(x.bool=c(rep(FALSE,4),TRUE), x.int=INT(2,4,6,8,10), x.doubleInt=c(2,4,6,8,10), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]), - output="Coercing integer column i.int .for join. to type double to match type of x.realDouble") + output="Coercing integer column i.int to type double for join to match type of x.realDouble") cols = c("x.int","x.char","x.fact","i.int","i.char","i.char") test(2044.64, dt1[dt2, ..cols, on="char==fact", verbose=TRUE], ans<-data.table(x.int=1:5, x.char=letters[1:5], x.fact=factor(letters[1:5]), i.int=1:5, i.char=letters[1:5], i.char=letters[1:5]), @@ -15180,15 +15207,15 @@ if (test_bit64) { dt1 = data.table(a=1, b=NA_character_) dt2 = data.table(a=2L, b=NA) test(2044.80, dt1[dt2, on="a==b", verbose=TRUE], data.table(a=NA, b=NA_character_, i.a=2L), - output=msg<-"Coercing logical column i.b .all-NA. to type double to match type of x.a") + output=msg<-"Coercing all-NA logical column i.b to type double to match type of x.a") test(2044.81, dt1[dt2, on="a==b", nomatch=0L, verbose=TRUE], data.table(a=logical(), b=character(), i.a=integer()), output=msg) test(2044.82, dt1[dt2, on="b==b", verbose=TRUE], data.table(a=1, b=NA, i.a=2L), - output=msg<-"Coercing logical column i.b .all-NA. to type character to match type of x.b") + output=msg<-"Coercing all-NA logical column i.b to type character to match type of x.b") test(2044.83, dt1[dt2, on="b==b", nomatch=0L, verbose=TRUE], data.table(a=1, b=NA, i.a=2L), output=msg) test(2044.84, dt1[dt2, on="b==a", verbose=TRUE], data.table(a=NA_real_, b=2L, i.b=NA), - output=msg<-"Coercing character column x.b .all-NA. to type integer to match type of i.a") + output=msg<-"Coercing all-NA character column x.b to type integer to match type of i.a") test(2044.85, dt1[dt2, on="b==a", nomatch=0L, verbose=TRUE], data.table(a=double(), b=integer(), i.b=logical()), output=msg) @@ -15420,7 +15447,7 @@ L = list(1:3, NULL, 4:6) test(2058.18, length(L), 3L) test(2058.19, as.data.table(L), data.table(V1=1:3, V2=4:6)) # V2 not V3 # no DT = data.table(a=1:3, b=c(4,5,6)) -test(2058.20, DT[,b:=list(NULL)], data.table(a=1:3)) # no +test(2058.20, DT[,b:=list(NULL)], data.table(a=1:3, b=list(NULL))) # no # rbindlist improved error message, #3638 DT = data.table(a=1) @@ -15615,7 +15642,7 @@ i = data.table(date = dbl_date, key = 'date') test(2064.1, x[i, class(date), verbose=TRUE], 'Date', output="Coercing double column i.date (which contains no fractions) to type integer to match type of x.date") test(2064.2, i[x, class(date), verbose=TRUE], 'Date', - output="Coercing integer column i.date .for join. to type double to match type of x.date") + output="Coercing integer column i.date to type double for join to match type of x.date") # complex values in grouping, #3639 set.seed(42) @@ -16544,69 +16571,69 @@ DT = data.table(a = vector("integer", 102L), b = "bbbbbbbbbbbbb", c = "ccccccccccccc", d = c("ddddddddddddd", "d")) -test(2125.02, print(DT, trunc.cols=TRUE), - output=c(" a b c", - " 1: 0 bbbbbbbbbbbbb ccccccccccccc", - " 2: 0 bbbbbbbbbbbbb ccccccccccccc", - " 3: 0 bbbbbbbbbbbbb ccccccccccccc", - " 4: 0 bbbbbbbbbbbbb ccccccccccccc", - " 5: 0 bbbbbbbbbbbbb ccccccccccccc", - " --- ", - " 98: 0 bbbbbbbbbbbbb ccccccccccccc", - " 99: 0 bbbbbbbbbbbbb ccccccccccccc", - "100: 0 bbbbbbbbbbbbb ccccccccccccc", - "101: 0 bbbbbbbbbbbbb ccccccccccccc", - "102: 0 bbbbbbbbbbbbb ccccccccccccc", - "1 variable not shown: [d]")) -test(2125.03, print(DT, trunc.cols=TRUE, row.names=FALSE), - output=c(" a b c", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " --- --- ---", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - "1 variable not shown: [d]" )) +test(2125.02, capture.output(print(DT, trunc.cols=TRUE)), + c(" a b c", + " 1: 0 bbbbbbbbbbbbb ccccccccccccc", + " 2: 0 bbbbbbbbbbbbb ccccccccccccc", + " 3: 0 bbbbbbbbbbbbb ccccccccccccc", + " 4: 0 bbbbbbbbbbbbb ccccccccccccc", + " 5: 0 bbbbbbbbbbbbb ccccccccccccc", + " --- ", + " 98: 0 bbbbbbbbbbbbb ccccccccccccc", + " 99: 0 bbbbbbbbbbbbb ccccccccccccc", + "100: 0 bbbbbbbbbbbbb ccccccccccccc", + "101: 0 bbbbbbbbbbbbb ccccccccccccc", + "102: 0 bbbbbbbbbbbbb ccccccccccccc", + "1 variable not shown: [d]")) +test(2125.03, capture.output(print(DT, trunc.cols=TRUE, row.names=FALSE)), + c(" a b c", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " --- --- ---", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + "1 variable not shown: [d]" )) # also testing #4266 -- getting width of row #s register right # TODO: understand why 2 variables truncated here. a,b,c combined have width # _exactly_ 40, but still wraps. If we set options(width=41) it won't truncate. # seems to be an issue with print.default. -test(2125.04, print(DT, trunc.cols=TRUE, class=TRUE), - output="2 variables not shown: [c , d ]") -test(2125.05, print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE), - output=c("^ a b c", ".*", - "1 variable not shown: \\[d \\]")) -test(2125.06, print(DT, trunc.cols=TRUE, col.names="none"), - output=c("^ 1: 0 bbbbbbbbbbbbb ccccccccccccc", ".*", - "1 variable not shown: \\[d\\]", "")) -test(2125.07, print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"), - output=c("^ 1: 0 bbbbbbbbbbbbb", ".*", - "2 variables not shown: \\[c, d\\]", ""), +test(2125.04, capture.output(print(DT, trunc.cols=TRUE, class=TRUE))[14L], + "2 variables not shown: [c , d ]") +test(2125.05, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE))[c(1,14)], + c(" a b c", + "1 variable not shown: [d ]" )) +test(2125.06, capture.output(print(DT, trunc.cols=TRUE, col.names="none"))[c(1,12)], + c(" 1: 0 bbbbbbbbbbbbb ccccccccccccc", + "1 variable not shown: [d]" )) +test(2125.07, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"))[c(1,13)], + c(" 1: 0 bbbbbbbbbbbbb", + "2 variables not shown: [c, d]" ), warning = "Column classes will be suppressed when col.names is 'none'") options("width" = 20) DT = data.table(a = vector("integer", 2), b = "bbbbbbbbbbbbb", c = "ccccccccccccc", d = "ddddddddddddd") -test(2125.08, print(DT, trunc.cols=TRUE), - output=c(" a b", - "1: 0 bbbbbbbbbbbbb", - "2: 0 bbbbbbbbbbbbb", - "2 variables not shown: [c, d]")) +test(2125.08, capture.output(print(DT, trunc.cols=TRUE)), + c(" a b", + "1: 0 bbbbbbbbbbbbb", + "2: 0 bbbbbbbbbbbbb", + "2 variables not shown: [c, d]")) options("width" = 10) DT = data.table(a = "aaaaaaaaaaaaa", b = "bbbbbbbbbbbbb", c = "ccccccccccccc", d = "ddddddddddddd") -test(2125.09, print(DT, trunc.cols=TRUE), - output="4 variables not shown: [a, b, c, d]") -test(2125.10, print(DT, trunc.cols=TRUE, class=TRUE), - output="4 variables not shown: [a , b , c , d ]") +test(2125.09, capture.output(print(DT, trunc.cols=TRUE)), + "4 variables not shown: [a, b, c, d]") +test(2125.10, capture.output(print(DT, trunc.cols=TRUE, class=TRUE)), + "4 variables not shown: [a , b , c , d ]") options(old_width) # segfault when i is NULL or zero-column, #4060 @@ -17987,7 +18014,7 @@ test(2230.4, setDF(merge(DT, y, by="k2", incomparables=c(1, NA, 4, 5))), merge(x test(2230.5, setDF(merge(DT, y, by="k2", incomparables=c(NA, 3, 4, 5))), merge(x, y, by="k2", incomparables=c(NA,3,4,5))) test(2230.6, merge(DT, y, by="k2", unk=1), merge(DT, y, by="k2"), warning="Unknown argument 'unk' has been passed.") test(2230.7, merge(DT, y, by="k2", NULL, NULL, FALSE, FALSE, FALSE, TRUE, c(".x", ".y"), TRUE, getOption("datatable.allow.cartesian"), NULL, 1L), - merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments.")) + merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x/by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments.")) # weighted.mean GForce optimized, #3977 old = options(datatable.optimize=1L) @@ -18501,9 +18528,7 @@ rm(.datatable.aware) # tests for trunc.char handling wide characters # 5096 local({ lc_ctype = Sys.getlocale('LC_CTYPE') - # Japanese multibyte characters require utf8. As of 2025, we're likely to be already running in a UTF-8 locale, but if not, try this setlocale() call as a last chance. - # Unfortunately, there is no guaranteed, portable way of switching to UTF-8 US English. - if (!l10n_info()$`UTF-8`) Sys.setlocale('LC_CTYPE', "en_US.UTF-8") + Sys.setlocale('LC_CTYPE', "en_US.UTF-8") # Japanese multibyte characters require utf8 on.exit(Sys.setlocale('LC_CTYPE', lc_ctype)) accented_a = "\u0061\u0301" ja_ichi = "\u4E00" @@ -20625,14 +20650,13 @@ test(2294.72, label = list(character = "C3", VCharA = "Total", integer = 2L))), warning = "For the following variables, the 'label' value was already in the data: [VCharB (label: C3), VIntA (label: 2)]") -# tests 1-3 disabled -- fix for #4784 causes various breaking changes, at least partially covered by 2295.4+. # setDT no longer leaks class modification to origin copy, #4784 -# d1 = data.frame(a=1, row.names='b') -# d2 = d1 -# setDT(d2) -# test(2295.1, !is.data.table(d1)) -# test(2295.2, rownames(d1), 'b') -# test(2295.3, is.data.table(d2)) +d1 = data.frame(a=1, row.names='b') +d2 = d1 +setDT(d2) +test(2295.1, !is.data.table(d1)) +test(2295.2, rownames(d1), 'b') +test(2295.3, is.data.table(d2)) # Ensure against regression noted in #6725 x = data.frame(a=1) e = environment() @@ -20645,18 +20669,6 @@ e = new.env(parent=topenv()) e$x = data.frame(a=1) foo('x', e) test(2295.5, is.data.table(e$x)) -# More regressions noted in #6735 -baz = function(x) setDT(x) -foo = function(x) { - bar = function() baz(x) - x = data.frame(a=1) - bar() - is.data.table(x) -} -test(2295.6, foo()) -x = data.frame(a=1) -baz(x) -test(2295.7, is.data.table(x)) # #6588: .checkTypos used to give arbitrary strings to stopf as the first argument test(2296, d2[x %no such operator% 1], error = '%no such operator%') @@ -20664,8 +20676,8 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%') # fix coercing integer/double for joins on multiple columns, #6602 x = data.table(a=1L) y = data.table(c=1L, d=1) -test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double") -test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double") +test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double") +test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double") x = data.table(a=1) y = data.table(c=1, d=1L) test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer") @@ -20757,33 +20769,29 @@ test(2303.2, DT[, .(N=1L), by=.(b=rev(a))], data.table(b=2:1, N=1L)) # ensure no DT = data.table(a=2:3, b=1:0, key=c('a', 'b')) test(2303.3, DT[, .N, by=.(ab=a^b, d=c(1L, 1L))], data.table(ab=c(2, 1), d=1L, N=1L)) -# NB: these tests have been edited in light of #6740 to be regression tests -# preventing existing behavior from breaking in 1.17.0 while we decide -# whether a breaking change is warranted & how to proceed. The specific tests -# with different behavior under #5558 001,002,005,006,011,012,015,016. # tests for new consistent replacement of list columns with list(NULL), #5558 # replacement of a list column with list(NULL) in a single-row data.table, using different assignment methods DT = data.table(L=list("A"), i=1L) ans = data.table(L=list(NULL), i=1L) # test using replacement with $ operator DT$L = list(NULL) -test(2304.001, DT, within(ans, rm('L'))) +test(2304.001, DT, ans) DT = data.table(L=list("A"), i=1L) # standard form with := operator -test(2304.002, copy(DT)[, L := list(NULL)], within(ans, rm('L'))) +test(2304.002, copy(DT)[, L := list(NULL)], ans) # functional form with := operator test(2304.003, copy(DT)[, `:=`(L=list(NULL))], ans) # functional form with 'let' alias test(2304.004, copy(DT)[, let(L=list(NULL))], ans) # using set() -test(2304.005, set(copy(DT), j="L", value=list(NULL)), within(ans, rm('L'))) +test(2304.005, set(copy(DT), j="L", value=list(NULL)), ans) # replacement of multiple list columns with list(NULL) in a single-row data.table, using different assignment methods DT = data.table(L1=list("A"), L2=list("B"), i=1L) ans = data.table(L1=list(NULL), L2=list(NULL), i=1L) DT$L1 = list(NULL) DT$L2 = list(NULL) -test(2304.006, DT, within(ans, rm('L1', 'L2'))) +test(2304.006, DT, ans) DT = data.table(L1=list("A"), L2=list("B"), i=1L) # standard form with := operator test(2304.007, copy(DT)[, c("L1", "L2") := list(list(NULL), list(NULL))], ans) @@ -20799,23 +20807,23 @@ DT = data.table(L=list("A", "B"), i=1L) ans = data.table(L=list(NULL, NULL), i=1L) # test using replacement with $ operator DT$L = list(NULL) -test(2304.011, DT, within(ans, rm('L'))) +test(2304.011, DT, ans) DT = data.table(L=list("A", "B"), i=1L) # standard form with := operator -test(2304.012, copy(DT)[, L := list(NULL)], within(ans, rm('L'))) +test(2304.012, copy(DT)[, L := list(NULL)], ans) # functional form with := operator test(2304.013, copy(DT)[, `:=`(L=list(NULL))], ans) # functional form with 'let' alias test(2304.014, copy(DT)[, let(L=list(NULL))], ans) # using set() -test(2304.015, set(copy(DT), j="L", value=list(NULL)), within(ans, rm('L'))) +test(2304.015, set(copy(DT), j="L", value=list(NULL)), ans) # replacement of multiple list columns with list(NULL) in a multi-row data.table, using different assignment methods DT = data.table(L1=list("A", "B"), L2=list("B", "C"), i=1L) ans = data.table(L1=list(NULL, NULL), L2=list(NULL, NULL), i=1L) DT$L1 = list(NULL) DT$L2 = list(NULL) -test(2304.016, DT, within(ans, rm('L1', 'L2'))) +test(2304.016, DT, ans) DT = data.table(L1=list("A", "B"), L2=list("B", "C"), i=1L) # standard form with := operator test(2304.017, copy(DT)[, c("L1", "L2") := list(list(NULL), list(NULL))], ans) @@ -21041,46 +21049,3 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list( # the integer overflow in #6729 is only noticeable with UBSan test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE }) - -<<<<<<< HEAD -if (exists("sort_by", "package:base")) { - # sort_by.data.table - DT1 = data.table(a=c(1, 3, 2, NA, 3), b=4:0) - DT2 = data.table(a=c("c", "a", "B")) # data.table uses C-locale and should sort_by if cedta() - DT3 = data.table(a=c(1, 2, 3), b=list(c("a", "b", "", NA), c(1, 3, 2, 0), c(TRUE, TRUE, FALSE, NA))) # list column - - # sort_by.data.table: basics - test(2306.01, sort_by(DT1, ~a + b), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) - test(2306.02, sort_by(DT1, ~I(a + b)), data.table(a=c(3, 2, 1, 3, NA), b=c(0L, 2L, 4L, 3L, 1L))) - test(2306.03, sort_by(DT2, ~a), data.table(a=c("B", "a", "c"))) - - # sort_by.data.table: list columns. - # NOTE 1: .formula2varlist works well with list columns. - # NOTE 2: 4 elem in DT of 3 row because forderv takes a list column as a DT. - test(2306.04, sort_by(DT3, ~b), DT3[order(b)]) # should be consistent. - - # sort_by.data.table: additional C-locale sorting - test(2306.10, DT2[, sort_by(.SD, a)], data.table(a=c("B", "a", "c"))) - test(2306.11, DT2[, sort_by(.SD, ~a)], data.table(a=c("B", "a", "c"))) - - # sort_by.data.table: various working interfaces - test(2306.20, sort_by(DT1, list(DT1$a, DT1$b)), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) - test(2306.21, sort_by(DT1, DT1[, .(a, b)]), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) - test(2306.22, DT1[, sort_by(.SD, .(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) - test(2306.23, DT1[, sort_by(.SD, ~a + b)], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) - test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) -} -======= -#test for enhancing error message of invalid column #6512 -uniq_diff <- function(...) { - .Call("uniq_diff", ...) -} -capture_error_message <- function(expr) { - msg <- tryCatch( - { expr; NULL }, - error = function(e) e$message - ) - msg -} -test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg)) # Ensure -1 is reported}, TRUE) ->>>>>>> 1b4a51d3 (add test case) From a26924cd6ecbf08c79b19f0e686b593f8232308f Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 01:55:43 +0530 Subject: [PATCH 10/25] add test --- inst/tests/tests.Rraw | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1d31d232ce..c443f26a63 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21049,3 +21049,16 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list( # the integer overflow in #6729 is only noticeable with UBSan test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE }) + +#test for enhancing error message of invalid column #6512 +uniq_diff <- function(...) { + .Call("uniq_diff", ...) +} +capture_error_message <- function(expr) { + msg <- tryCatch( + { expr; NULL }, + error = function(e) e$message + ) + msg +} +test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) From 7b74601648bb3c336a2d79234e5317454f4a4972 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 02:13:53 +0530 Subject: [PATCH 11/25] add test result --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c443f26a63..2a9346744a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21050,7 +21050,7 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list( # the integer overflow in #6729 is only noticeable with UBSan test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE }) -#test for enhancing error message of invalid column #6512 +# test for enhancing error message of invalid column #6512 uniq_diff <- function(...) { .Call("uniq_diff", ...) } From ec3ea650a904a44f036d630c2d3ea9486b0f8718 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 03:01:53 +0530 Subject: [PATCH 12/25] test added 1 --- inst/tests/tests.Rraw | 13 +++++++++++++ src/fmelt.c | 4 +--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2a9346744a..5acbc61b24 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21062,3 +21062,16 @@ capture_error_message <- function(expr) { msg } test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) + +# test for enhancing error message of invalid column #6512 +uniq_diff <- function(...) { + .Call("uniq_diff", ...) +} +capture_error_message <- function(expr) { + msg <- tryCatch( + { expr; NULL }, + error = function(e) e$message + ) + msg +} +test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) \ No newline at end of file diff --git a/src/fmelt.c b/src/fmelt.c index 8eb257ae2e..f031cc350f 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -185,9 +185,7 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { for (int i = 0; i < length(int_vec); ++i) { int col_number = INTEGER(int_vec)[i]; bool good_number = (col_number > 0 && col_number <= ncol); - if (is_measure) { - good_number |= (col_number == NA_INTEGER); - } + if (is_measure) good_number |= (col_number == NA_INTEGER); if (!good_number) { invalid_col_ptr[invalid_count++] = col_number; } else if (!LOGICAL(is_duplicated)[i]) { From ba8131c021aac5c55600491f05ce48883897e786 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 03:15:43 +0530 Subject: [PATCH 13/25] Revert "add test" This reverts commit 11345b98356475fe050582894c48ea9f643cf504. --- inst/tests/tests.Rraw | 313 +++++++++++++++++++++++------------------- 1 file changed, 174 insertions(+), 139 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5acbc61b24..a59563829f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -159,9 +159,6 @@ TZnotUTC = !identical(tt,"") && !is_utc(tt) # (3) function factory for matching messages exactly by substituting anything between delimiters [delim, fmt=TRUE] # (4) function factory for matching messages exactly by substituting a generic string [fmt=string] get_msg = function(e, delim, fmt=FALSE) { - ufq = options(useFancyQuotes = FALSE) # otherwise we get angled quotes, hard to match robustly - on.exit(options(ufq)) - condition = tryCatch({e; NULL}, error=identity, warning=identity) if (is.null(condition)) return(condition) msg = condition$message @@ -1364,43 +1361,20 @@ if (test_bit64) { test(431.5, DT[5,1:=as.integer64(NA)], data.table(a=factor(c(NA,NA,NA,NA,NA), levels=LETTERS[1:3]), b=1:5)) } -old = getOption("datatable.alloccol") # Test that unsetting datatable.alloccol is caught, #2014 -options(datatable.alloccol=NULL) # In this =NULL case, options() in R 3.0.0 returned TRUE rather than the old value. This R bug was fixed in R 3.1.1. - # This is why getOption is called first rather than just using the result of option() like elsewhere in this test file. - # TODO: simplify this test if/when R dependency >= 3.1.1 -err1 = try(data.table(a=1:3), silent=TRUE) -options(datatable.alloccol="1024") -err2 = try(data.table(a=1:3), silent=TRUE) -options(datatable.alloccol=c(10L,20L)) -err3 = try(data.table(a=1:3), silent=TRUE) -options(datatable.alloccol=NA_integer_) -err4 = try(data.table(a=1:3), silent=TRUE) -options(datatable.alloccol=-1) -err5 = try(data.table(a=1:3), silent=TRUE) -options(datatable.alloccol=1024L) # otherwise test() itself fails in its internals with the alloc.col error -test(432.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1)) -test(432.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2)) -test(432.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3)) -test(432.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4)) -test(432.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5)) +# Test that unsetting datatable.alloccol is caught, #2014 +test(432.1, data.table(a=1:3), options=list(datatable.alloccol=NULL), error="Has getOption('datatable.alloccol') somehow become unset?") +test(432.2, data.table(a=1:3), options=c(datatable.alloccol="1024"), error="getOption('datatable.alloccol') should be a number, by default 1024. But its type is 'character'.") +test(432.3, data.table(a=1:3), options=list(datatable.alloccol=c(10L,20L)), error="is a numeric vector ok but its length is 2. Its length should be 1.") +test(432.4, data.table(a=1:3), options=c(datatable.alloccol=NA_integer_), error="It must be >=0 and not NA.") +test(432.5, data.table(a=1:3), options=c(datatable.alloccol=-1), error="It must be >=0 and not NA.") + # Repeat the tests but this time with subsetting, to ensure the validity check on option happens for those too DT = data.table(a=1:3, b=4:6) -options(datatable.alloccol=NULL) -err1 = try(DT[2,], silent=TRUE) -options(datatable.alloccol="1024") -err2 = try(DT[,2], silent=TRUE) -options(datatable.alloccol=c(10L,20L)) -err3 = try(DT[a>1], silent=TRUE) -options(datatable.alloccol=NA_integer_) -err4 = try(DT[,"b"], silent=TRUE) -options(datatable.alloccol=-1) -err5 = try(DT[2,"b"], silent=TRUE) -options(datatable.alloccol=1024L) # otherwise test() itself fails in its internals with the alloc.col error -test(433.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1)) -test(433.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2)) -test(433.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3)) -test(433.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4)) -test(433.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5)) +test(433.1, DT[2,], options=list(datatable.alloccol=NULL), error="Has getOption('datatable.alloccol') somehow become unset?") +test(433.2, DT[,2], options=c(datatable.alloccol="1024"), error="getOption('datatable.alloccol') should be a number, by default 1024. But its type is 'character'.") +test(433.3, DT[a>1], options=list(datatable.alloccol=c(10L,20L)), error="is a numeric vector ok but its length is 2. Its length should be 1.") +test(433.4, DT[,"b"], options=c(datatable.alloccol=NA_integer_), error="It must be >=0 and not NA.") +test(433.5, DT[2,"b"], options=c(datatable.alloccol=-1), error="It must be >=0 and not NA.") # simple realloc test DT = data.table(a=1:3,b=4:6) @@ -7066,7 +7040,7 @@ ee = new.env() ee$DT = data.frame(x=1L, y=1:3) setattr(ee$DT, 'class', c("data.table", "data.frame")) test(1482.1, truelength(ee$DT), 0L) # make sure that the simulated environment is right. -test(1482.2, ee$DT[, z := 3:1], data.table(x=1L, y=1:3, z=3:1), warning="Invalid .internal.selfref detected and") +test(1482.2, ee$DT[, z := 3:1], data.table(x=1L, y=1:3, z=3:1), warning="A shallow copy of this data.table was taken") test(1482.3, truelength(ee$DT), 1027L) test(1482.4, ee$DT[, za := 4:6], data.table(x=1L, y=1:3, z=3:1, za=4:6)) test(1482.5, truelength(ee$DT), 1027L) # should have used spare slot i.e. no increase in tl @@ -7891,7 +7865,7 @@ test(1551.5, fread(str), rhs = setDT(read.table(testDir("issue_1095_fread.txt.bz2"), sep=",", comment.char="", stringsAsFactors=FALSE, quote="", strip.white=TRUE)) if (test_R.utils) { test(1551.61, fread(testDir("issue_1095_fread.txt.bz2"), logical01=FALSE), rhs, warning=w) - rhs[, names(.SD) := lapply(.SD, \(x) x == "Y"), .SDcols = c("V16", "V17", "V45")] + rhs[, names(.SD) := lapply(.SD, function(x) x == "Y"), .SDcols = c("V16", "V17", "V45")] test(1551.62, fread(testDir("issue_1095_fread.txt.bz2"), logical01=FALSE, logicalYN=TRUE), rhs, warning=w) } @@ -8567,14 +8541,13 @@ DT1 = data.table(a=1) test(1601.1, merge(DT1, DT1, by="a"), data.table(a=1, key="a")) test(1601.2, merge(DT1, DT0, by="a"), warning="Input data.table 'y' has no columns.", - error="Elements listed in `by`") + error="The following columns listed in `by` are missing from y: [a]") test(1601.3, merge(DT0, DT1, by="a"), warning="Input data.table 'x' has no columns.", - error="Elements listed in `by`") + error="The following columns listed in `by` are missing from x: [a]") test(1601.4, merge(DT0, DT0, by="a"), warning="Neither of the input data.tables to join have columns.", - error="Elements listed in `by`") - + error="The following columns listed in `by` are missing from x: [a]") # fix for #1549 d1 <- data.table(v1=1:2,x=x) d2 <- data.table(v1=3:4) @@ -8716,17 +8689,17 @@ test(1613.21, all.equal(DT2, DT1, ignore.row.order = TRUE), "Dataset 'current' h # test attributes: key DT1 <- data.table(a = 1:4, b = letters[1:4], key = "a") DT2 <- data.table(a = 1:4, b = letters[1:4]) -test(1613.22, all.equal(DT1, DT2), "Datasets have different keys. 'target': [a]. 'current': has no key.") +test(1613.22, all.equal(DT1, DT2), output="Datasets have different keys. 'target': [a]. 'current': has no key.") test(1613.23, all.equal(DT1, DT2, check.attributes = FALSE), TRUE) test(1613.24, all.equal(DT1, setkeyv(DT2, "a"), check.attributes = TRUE), TRUE) # test attributes: index DT1 <- data.table(a = 1:4, b = letters[1:4]) DT2 <- data.table(a = 1:4, b = letters[1:4]) setindexv(DT1, "b") -test(1613.25, all.equal(DT1, DT2), "Datasets have different indices. 'target': [b]. 'current': has no index.") +test(1613.25, all.equal(DT1, DT2), output="Datasets have different indices. 'target': [b]. 'current': has no index.") test(1613.26, all.equal(DT1, DT2, check.attributes = FALSE), TRUE) -test(1613.27, all.equal(DT1, setindexv(DT2, "a")), "Datasets have different indices. 'target': [b]. 'current': [a].") -test(1613.28, all.equal(DT1, setindexv(DT2, "b")), "Datasets have different indices. 'target': [b]. 'current': [a, b].") +test(1613.27, all.equal(DT1, setindexv(DT2, "a")), output="Datasets have different indices. 'target': [b]. 'current': [a].") +test(1613.28, all.equal(DT1, setindexv(DT2, "b")), output="Datasets have different indices. 'target': [b]. 'current': [a, b].") test(1613.29, all.equal(DT1, setindexv(setindexv(DT2, NULL), "b")), TRUE) # test custom attribute DT1 <- data.table(a = 1:4, b = letters[1:4]) @@ -10995,7 +10968,8 @@ DT = data.table( D = as.POSIXct(dt<-paste(d,t), tz="UTC"), E = as.POSIXct(paste0(dt,c(".999",".0",".5",".111112",".123456",".023",".0",".999999",".99",".0009")), tz="UTC")) -test(1740.1, fwrite(DT,dateTimeAs="iso"), error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv")) +test(1740.1, fwrite(DT,dateTimeAs="iso"), + error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv")) test(1740.2, fwrite(DT,dateTimeAs=c("ISO","squash")), error=base_messages$match_arg_length) test(1740.3, capture.output(fwrite(DT,dateTimeAs="ISO")), c( "A,B,C,D,E", @@ -11106,7 +11080,7 @@ test(1743.123, fread("a,b\n1+3i,2015-01-01", colClasses=c(NA,"IDate")), data.tab test(1743.13, lapply(fread("a,b\n09/05/98,2015-01-01", colClasses = "Date"), class), y=list(a="character", b=c("IDate", "Date")), warning=base_messages$ambiguous_date_fmt) ## Just invalid -test(1743.14, options = c(useFancyQuotes = FALSE), +test(1743.14, sapply(fread("a,b\n2017-01-01,1", colClasses=c("foo", "integer")), class), c(a="character", b="integer"), warning=base_messages$missing_coerce_method) test(1743.15, sapply(fread("a,b\n2017-01-01,1", colClasses=c("foo", "integer")), class), c(a="character", b="integer"), warning="the column has been left as type .*character") @@ -11813,15 +11787,15 @@ test(1775.1, capture.output(print(DT1, print.keys = TRUE)), c("Key: ", " a", "1: 1", "2: 2", "3: 3")) DT2 <- data.table(a = 1:3, b = 4:6) setindexv(DT2, c("b","a")) -test(1775.2, capture.output(print(DT2, print.keys = TRUE)), - c("Index: ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) +test(1775.2, print(DT2, print.keys = TRUE), + output=c("Index: ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) setindexv(DT2, "b") -test(1775.3, capture.output(print(DT2, print.keys = TRUE)), - c("Indices: , ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) +test(1775.3, print(DT2, print.keys = TRUE), + output=c("Indices: , ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) setkey(DT2, a) setindexv(DT2, c("b","a")) -test(1775.4, capture.output(print(DT2, print.keys = TRUE)), - c("Key: ", "Indices: , ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) ## index 'b' is still good, so we keep it +test(1775.4, print(DT2, print.keys = TRUE), + output=c("Key: ", "Indices: , ", " a b", "1: 1 4", "2: 2 5", "3: 3 6")) ## index 'b' is still good, so we keep it # dev regression #2285 cat("A B C\n1 2 3\n4 5 6", file=f<-tempfile()) @@ -12145,8 +12119,7 @@ test(1831.4, fread(paste0("A\n", "1.", src2)), data.table(A=1.1234567890098766)) DT = as.data.table(matrix(5L, nrow=10, ncol=10)) test(1832.1, fwrite(DT, f<-tempfile(), verbose=TRUE), output="Column writers") DT = as.data.table(matrix(5L, nrow=10, ncol=60)) -# Using capture.output directly to look for the "..." because test(,output=) intercepts [] for convenience elsewhere -test(1832.2, any(grepl("^Column writers.* [.][.][.] ", capture.output(fwrite(DT, f, verbose=TRUE))))) +test(1832.2, fwrite(DT, f, verbose=TRUE), output = "\nColumn writers.* [.][.][.] ") unlink(f) # ensure explicitly setting select to default value doesn't error, #2007 @@ -13546,14 +13519,14 @@ test(1962.016, merge(DT1, DT2, by.x = 'a', by.y = c('a', 'V')), test(1962.017, merge(DT1, DT2, by = 'V', by.x = 'a', by.y = 'a'), data.table(a = 2:3, V.x = c("a", "a"), V.y = c("b", "b"), key = 'a'), warning = 'Supplied both.*argument will be ignored') -test(1962.018, merge(DT1, DT2, by.x = 'z', by.y = 'a'), - error = 'Elements listed in `by.x`') -test(1962.019, merge(DT1, DT2, by.x = 'a', by.y = 'z'), - error = 'Elements listed in `by.y`') +test(1962.018, merge(DT1, DT2, by.x='z', by.y='a'), + error="The following columns listed in `by.x` are missing from x: [z]") +test(1962.019, merge(DT1, DT2, by.x='a', by.y='z'), + error="The following columns listed in `by.y` are missing from y: [z]") test(1962.0201, merge(DT1, DT2, by=character(0L)), ans) # was error before PR#5183 test(1962.0202, merge(DT1, DT2, by=NULL), ans) # test explicit NULL too as missing() could be used inside merge() -test(1962.021, merge(DT1, DT2, by = 'z'), - error = 'must be valid column names in x and y') +test(1962.021, merge(DT1, DT2, by='z'), + error='The following columns listed in `by` are missing from x: [z]') ## frank.R x = c(1, 1, 2, 5, 4, 3, 4, NA, 6) @@ -14942,7 +14915,7 @@ test(2037.1, foo(DT), output='Please remember to always setDT()') # no assignment was made to DT test(2037.2, names(DT), 'a') # _selrefok() verbose message was duplicated -test(2037.3, unname(table(unlist(strsplit(capture.output(foo(DT)), '\n|\\s+')))['ptr']), 1L) +test(2037.3, foo(DT), output="data.table internal attributes", notOutput="data.table internal attributes.*data.table internal attributes") # `between` invalid args, and verbose #3516 test(2038.01, between(1:5, 2, 4, incbounds=423), error="incbounds must be TRUE or FALSE") @@ -15166,13 +15139,13 @@ test(2044.60, dt1[dt2, ..cols, on="int==doubleInt", verbose=TRUE], test(2044.61, dt1[dt2, ..cols, on="int==realDouble", verbose=TRUE], # this was wrong in v1.12.2 (the fractions were truncated and joined to next lowest int) data.table(x.bool=c(NA,FALSE,NA,FALSE,NA), x.int=INT(NA,1,NA,2,NA), x.doubleInt=c(NA,1,NA,2,NA), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]), - output="Coercing integer column x.int to type double to match type of i.realDouble which contains fractions") + output="Coercing integer column x.int to type double to match type of i.realDouble .which contains fractions.") test(2044.62, dt1[dt2, ..cols, on="doubleInt==int", verbose=TRUE], data.table(x.bool=FALSE, x.int=1:5, x.doubleInt=as.double(1:5), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]), - output="Coercing integer column i.int to type double for join to match type of x.doubleInt") + output="Coercing integer column i.int .for join. to type double to match type of x.doubleInt") test(2044.63, dt1[dt2, ..cols, on="realDouble==int", verbose=TRUE], data.table(x.bool=c(rep(FALSE,4),TRUE), x.int=INT(2,4,6,8,10), x.doubleInt=c(2,4,6,8,10), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]), - output="Coercing integer column i.int to type double for join to match type of x.realDouble") + output="Coercing integer column i.int .for join. to type double to match type of x.realDouble") cols = c("x.int","x.char","x.fact","i.int","i.char","i.char") test(2044.64, dt1[dt2, ..cols, on="char==fact", verbose=TRUE], ans<-data.table(x.int=1:5, x.char=letters[1:5], x.fact=factor(letters[1:5]), i.int=1:5, i.char=letters[1:5], i.char=letters[1:5]), @@ -15207,15 +15180,15 @@ if (test_bit64) { dt1 = data.table(a=1, b=NA_character_) dt2 = data.table(a=2L, b=NA) test(2044.80, dt1[dt2, on="a==b", verbose=TRUE], data.table(a=NA, b=NA_character_, i.a=2L), - output=msg<-"Coercing all-NA logical column i.b to type double to match type of x.a") + output=msg<-"Coercing logical column i.b .all-NA. to type double to match type of x.a") test(2044.81, dt1[dt2, on="a==b", nomatch=0L, verbose=TRUE], data.table(a=logical(), b=character(), i.a=integer()), output=msg) test(2044.82, dt1[dt2, on="b==b", verbose=TRUE], data.table(a=1, b=NA, i.a=2L), - output=msg<-"Coercing all-NA logical column i.b to type character to match type of x.b") + output=msg<-"Coercing logical column i.b .all-NA. to type character to match type of x.b") test(2044.83, dt1[dt2, on="b==b", nomatch=0L, verbose=TRUE], data.table(a=1, b=NA, i.a=2L), output=msg) test(2044.84, dt1[dt2, on="b==a", verbose=TRUE], data.table(a=NA_real_, b=2L, i.b=NA), - output=msg<-"Coercing all-NA character column x.b to type integer to match type of i.a") + output=msg<-"Coercing character column x.b .all-NA. to type integer to match type of i.a") test(2044.85, dt1[dt2, on="b==a", nomatch=0L, verbose=TRUE], data.table(a=double(), b=integer(), i.b=logical()), output=msg) @@ -15447,7 +15420,7 @@ L = list(1:3, NULL, 4:6) test(2058.18, length(L), 3L) test(2058.19, as.data.table(L), data.table(V1=1:3, V2=4:6)) # V2 not V3 # no DT = data.table(a=1:3, b=c(4,5,6)) -test(2058.20, DT[,b:=list(NULL)], data.table(a=1:3, b=list(NULL))) # no +test(2058.20, DT[,b:=list(NULL)], data.table(a=1:3)) # no # rbindlist improved error message, #3638 DT = data.table(a=1) @@ -15642,7 +15615,7 @@ i = data.table(date = dbl_date, key = 'date') test(2064.1, x[i, class(date), verbose=TRUE], 'Date', output="Coercing double column i.date (which contains no fractions) to type integer to match type of x.date") test(2064.2, i[x, class(date), verbose=TRUE], 'Date', - output="Coercing integer column i.date to type double for join to match type of x.date") + output="Coercing integer column i.date .for join. to type double to match type of x.date") # complex values in grouping, #3639 set.seed(42) @@ -16571,69 +16544,69 @@ DT = data.table(a = vector("integer", 102L), b = "bbbbbbbbbbbbb", c = "ccccccccccccc", d = c("ddddddddddddd", "d")) -test(2125.02, capture.output(print(DT, trunc.cols=TRUE)), - c(" a b c", - " 1: 0 bbbbbbbbbbbbb ccccccccccccc", - " 2: 0 bbbbbbbbbbbbb ccccccccccccc", - " 3: 0 bbbbbbbbbbbbb ccccccccccccc", - " 4: 0 bbbbbbbbbbbbb ccccccccccccc", - " 5: 0 bbbbbbbbbbbbb ccccccccccccc", - " --- ", - " 98: 0 bbbbbbbbbbbbb ccccccccccccc", - " 99: 0 bbbbbbbbbbbbb ccccccccccccc", - "100: 0 bbbbbbbbbbbbb ccccccccccccc", - "101: 0 bbbbbbbbbbbbb ccccccccccccc", - "102: 0 bbbbbbbbbbbbb ccccccccccccc", - "1 variable not shown: [d]")) -test(2125.03, capture.output(print(DT, trunc.cols=TRUE, row.names=FALSE)), - c(" a b c", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " --- --- ---", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - " 0 bbbbbbbbbbbbb ccccccccccccc", - "1 variable not shown: [d]" )) +test(2125.02, print(DT, trunc.cols=TRUE), + output=c(" a b c", + " 1: 0 bbbbbbbbbbbbb ccccccccccccc", + " 2: 0 bbbbbbbbbbbbb ccccccccccccc", + " 3: 0 bbbbbbbbbbbbb ccccccccccccc", + " 4: 0 bbbbbbbbbbbbb ccccccccccccc", + " 5: 0 bbbbbbbbbbbbb ccccccccccccc", + " --- ", + " 98: 0 bbbbbbbbbbbbb ccccccccccccc", + " 99: 0 bbbbbbbbbbbbb ccccccccccccc", + "100: 0 bbbbbbbbbbbbb ccccccccccccc", + "101: 0 bbbbbbbbbbbbb ccccccccccccc", + "102: 0 bbbbbbbbbbbbb ccccccccccccc", + "1 variable not shown: [d]")) +test(2125.03, print(DT, trunc.cols=TRUE, row.names=FALSE), + output=c(" a b c", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " --- --- ---", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + " 0 bbbbbbbbbbbbb ccccccccccccc", + "1 variable not shown: [d]" )) # also testing #4266 -- getting width of row #s register right # TODO: understand why 2 variables truncated here. a,b,c combined have width # _exactly_ 40, but still wraps. If we set options(width=41) it won't truncate. # seems to be an issue with print.default. -test(2125.04, capture.output(print(DT, trunc.cols=TRUE, class=TRUE))[14L], - "2 variables not shown: [c , d ]") -test(2125.05, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE))[c(1,14)], - c(" a b c", - "1 variable not shown: [d ]" )) -test(2125.06, capture.output(print(DT, trunc.cols=TRUE, col.names="none"))[c(1,12)], - c(" 1: 0 bbbbbbbbbbbbb ccccccccccccc", - "1 variable not shown: [d]" )) -test(2125.07, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"))[c(1,13)], - c(" 1: 0 bbbbbbbbbbbbb", - "2 variables not shown: [c, d]" ), +test(2125.04, print(DT, trunc.cols=TRUE, class=TRUE), + output="2 variables not shown: [c , d ]") +test(2125.05, print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE), + output=c("^ a b c", ".*", + "1 variable not shown: \\[d \\]")) +test(2125.06, print(DT, trunc.cols=TRUE, col.names="none"), + output=c("^ 1: 0 bbbbbbbbbbbbb ccccccccccccc", ".*", + "1 variable not shown: \\[d\\]", "")) +test(2125.07, print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"), + output=c("^ 1: 0 bbbbbbbbbbbbb", ".*", + "2 variables not shown: \\[c, d\\]", ""), warning = "Column classes will be suppressed when col.names is 'none'") options("width" = 20) DT = data.table(a = vector("integer", 2), b = "bbbbbbbbbbbbb", c = "ccccccccccccc", d = "ddddddddddddd") -test(2125.08, capture.output(print(DT, trunc.cols=TRUE)), - c(" a b", - "1: 0 bbbbbbbbbbbbb", - "2: 0 bbbbbbbbbbbbb", - "2 variables not shown: [c, d]")) +test(2125.08, print(DT, trunc.cols=TRUE), + output=c(" a b", + "1: 0 bbbbbbbbbbbbb", + "2: 0 bbbbbbbbbbbbb", + "2 variables not shown: [c, d]")) options("width" = 10) DT = data.table(a = "aaaaaaaaaaaaa", b = "bbbbbbbbbbbbb", c = "ccccccccccccc", d = "ddddddddddddd") -test(2125.09, capture.output(print(DT, trunc.cols=TRUE)), - "4 variables not shown: [a, b, c, d]") -test(2125.10, capture.output(print(DT, trunc.cols=TRUE, class=TRUE)), - "4 variables not shown: [a , b , c , d ]") +test(2125.09, print(DT, trunc.cols=TRUE), + output="4 variables not shown: [a, b, c, d]") +test(2125.10, print(DT, trunc.cols=TRUE, class=TRUE), + output="4 variables not shown: [a , b , c , d ]") options(old_width) # segfault when i is NULL or zero-column, #4060 @@ -18014,7 +17987,7 @@ test(2230.4, setDF(merge(DT, y, by="k2", incomparables=c(1, NA, 4, 5))), merge(x test(2230.5, setDF(merge(DT, y, by="k2", incomparables=c(NA, 3, 4, 5))), merge(x, y, by="k2", incomparables=c(NA,3,4,5))) test(2230.6, merge(DT, y, by="k2", unk=1), merge(DT, y, by="k2"), warning="Unknown argument 'unk' has been passed.") test(2230.7, merge(DT, y, by="k2", NULL, NULL, FALSE, FALSE, FALSE, TRUE, c(".x", ".y"), TRUE, getOption("datatable.allow.cartesian"), NULL, 1L), - merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x/by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments.")) + merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments.")) # weighted.mean GForce optimized, #3977 old = options(datatable.optimize=1L) @@ -18528,7 +18501,9 @@ rm(.datatable.aware) # tests for trunc.char handling wide characters # 5096 local({ lc_ctype = Sys.getlocale('LC_CTYPE') - Sys.setlocale('LC_CTYPE', "en_US.UTF-8") # Japanese multibyte characters require utf8 + # Japanese multibyte characters require utf8. As of 2025, we're likely to be already running in a UTF-8 locale, but if not, try this setlocale() call as a last chance. + # Unfortunately, there is no guaranteed, portable way of switching to UTF-8 US English. + if (!l10n_info()$`UTF-8`) Sys.setlocale('LC_CTYPE', "en_US.UTF-8") on.exit(Sys.setlocale('LC_CTYPE', lc_ctype)) accented_a = "\u0061\u0301" ja_ichi = "\u4E00" @@ -20650,13 +20625,14 @@ test(2294.72, label = list(character = "C3", VCharA = "Total", integer = 2L))), warning = "For the following variables, the 'label' value was already in the data: [VCharB (label: C3), VIntA (label: 2)]") +# tests 1-3 disabled -- fix for #4784 causes various breaking changes, at least partially covered by 2295.4+. # setDT no longer leaks class modification to origin copy, #4784 -d1 = data.frame(a=1, row.names='b') -d2 = d1 -setDT(d2) -test(2295.1, !is.data.table(d1)) -test(2295.2, rownames(d1), 'b') -test(2295.3, is.data.table(d2)) +# d1 = data.frame(a=1, row.names='b') +# d2 = d1 +# setDT(d2) +# test(2295.1, !is.data.table(d1)) +# test(2295.2, rownames(d1), 'b') +# test(2295.3, is.data.table(d2)) # Ensure against regression noted in #6725 x = data.frame(a=1) e = environment() @@ -20669,6 +20645,18 @@ e = new.env(parent=topenv()) e$x = data.frame(a=1) foo('x', e) test(2295.5, is.data.table(e$x)) +# More regressions noted in #6735 +baz = function(x) setDT(x) +foo = function(x) { + bar = function() baz(x) + x = data.frame(a=1) + bar() + is.data.table(x) +} +test(2295.6, foo()) +x = data.frame(a=1) +baz(x) +test(2295.7, is.data.table(x)) # #6588: .checkTypos used to give arbitrary strings to stopf as the first argument test(2296, d2[x %no such operator% 1], error = '%no such operator%') @@ -20676,8 +20664,8 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%') # fix coercing integer/double for joins on multiple columns, #6602 x = data.table(a=1L) y = data.table(c=1L, d=1) -test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double") -test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double") +test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double") +test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double") x = data.table(a=1) y = data.table(c=1, d=1L) test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer") @@ -20769,29 +20757,33 @@ test(2303.2, DT[, .(N=1L), by=.(b=rev(a))], data.table(b=2:1, N=1L)) # ensure no DT = data.table(a=2:3, b=1:0, key=c('a', 'b')) test(2303.3, DT[, .N, by=.(ab=a^b, d=c(1L, 1L))], data.table(ab=c(2, 1), d=1L, N=1L)) +# NB: these tests have been edited in light of #6740 to be regression tests +# preventing existing behavior from breaking in 1.17.0 while we decide +# whether a breaking change is warranted & how to proceed. The specific tests +# with different behavior under #5558 001,002,005,006,011,012,015,016. # tests for new consistent replacement of list columns with list(NULL), #5558 # replacement of a list column with list(NULL) in a single-row data.table, using different assignment methods DT = data.table(L=list("A"), i=1L) ans = data.table(L=list(NULL), i=1L) # test using replacement with $ operator DT$L = list(NULL) -test(2304.001, DT, ans) +test(2304.001, DT, within(ans, rm('L'))) DT = data.table(L=list("A"), i=1L) # standard form with := operator -test(2304.002, copy(DT)[, L := list(NULL)], ans) +test(2304.002, copy(DT)[, L := list(NULL)], within(ans, rm('L'))) # functional form with := operator test(2304.003, copy(DT)[, `:=`(L=list(NULL))], ans) # functional form with 'let' alias test(2304.004, copy(DT)[, let(L=list(NULL))], ans) # using set() -test(2304.005, set(copy(DT), j="L", value=list(NULL)), ans) +test(2304.005, set(copy(DT), j="L", value=list(NULL)), within(ans, rm('L'))) # replacement of multiple list columns with list(NULL) in a single-row data.table, using different assignment methods DT = data.table(L1=list("A"), L2=list("B"), i=1L) ans = data.table(L1=list(NULL), L2=list(NULL), i=1L) DT$L1 = list(NULL) DT$L2 = list(NULL) -test(2304.006, DT, ans) +test(2304.006, DT, within(ans, rm('L1', 'L2'))) DT = data.table(L1=list("A"), L2=list("B"), i=1L) # standard form with := operator test(2304.007, copy(DT)[, c("L1", "L2") := list(list(NULL), list(NULL))], ans) @@ -20807,23 +20799,23 @@ DT = data.table(L=list("A", "B"), i=1L) ans = data.table(L=list(NULL, NULL), i=1L) # test using replacement with $ operator DT$L = list(NULL) -test(2304.011, DT, ans) +test(2304.011, DT, within(ans, rm('L'))) DT = data.table(L=list("A", "B"), i=1L) # standard form with := operator -test(2304.012, copy(DT)[, L := list(NULL)], ans) +test(2304.012, copy(DT)[, L := list(NULL)], within(ans, rm('L'))) # functional form with := operator test(2304.013, copy(DT)[, `:=`(L=list(NULL))], ans) # functional form with 'let' alias test(2304.014, copy(DT)[, let(L=list(NULL))], ans) # using set() -test(2304.015, set(copy(DT), j="L", value=list(NULL)), ans) +test(2304.015, set(copy(DT), j="L", value=list(NULL)), within(ans, rm('L'))) # replacement of multiple list columns with list(NULL) in a multi-row data.table, using different assignment methods DT = data.table(L1=list("A", "B"), L2=list("B", "C"), i=1L) ans = data.table(L1=list(NULL, NULL), L2=list(NULL, NULL), i=1L) DT$L1 = list(NULL) DT$L2 = list(NULL) -test(2304.016, DT, ans) +test(2304.016, DT, within(ans, rm('L1', 'L2'))) DT = data.table(L1=list("A", "B"), L2=list("B", "C"), i=1L) # standard form with := operator test(2304.017, copy(DT)[, c("L1", "L2") := list(list(NULL), list(NULL))], ans) @@ -21050,6 +21042,49 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list( # the integer overflow in #6729 is only noticeable with UBSan test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE }) +<<<<<<< HEAD +if (exists("sort_by", "package:base")) { + # sort_by.data.table + DT1 = data.table(a=c(1, 3, 2, NA, 3), b=4:0) + DT2 = data.table(a=c("c", "a", "B")) # data.table uses C-locale and should sort_by if cedta() + DT3 = data.table(a=c(1, 2, 3), b=list(c("a", "b", "", NA), c(1, 3, 2, 0), c(TRUE, TRUE, FALSE, NA))) # list column + + # sort_by.data.table: basics + test(2306.01, sort_by(DT1, ~a + b), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) + test(2306.02, sort_by(DT1, ~I(a + b)), data.table(a=c(3, 2, 1, 3, NA), b=c(0L, 2L, 4L, 3L, 1L))) + test(2306.03, sort_by(DT2, ~a), data.table(a=c("B", "a", "c"))) + + # sort_by.data.table: list columns. + # NOTE 1: .formula2varlist works well with list columns. + # NOTE 2: 4 elem in DT of 3 row because forderv takes a list column as a DT. + test(2306.04, sort_by(DT3, ~b), DT3[order(b)]) # should be consistent. + + # sort_by.data.table: additional C-locale sorting + test(2306.10, DT2[, sort_by(.SD, a)], data.table(a=c("B", "a", "c"))) + test(2306.11, DT2[, sort_by(.SD, ~a)], data.table(a=c("B", "a", "c"))) + + # sort_by.data.table: various working interfaces + test(2306.20, sort_by(DT1, list(DT1$a, DT1$b)), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) + test(2306.21, sort_by(DT1, DT1[, .(a, b)]), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) + test(2306.22, DT1[, sort_by(.SD, .(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) + test(2306.23, DT1[, sort_by(.SD, ~a + b)], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) + test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) +} +======= +#test for enhancing error message of invalid column #6512 +uniq_diff <- function(...) { + .Call("uniq_diff", ...) +} +capture_error_message <- function(expr) { + msg <- tryCatch( + { expr; NULL }, + error = function(e) e$message + ) + msg +} +test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg)) # Ensure -1 is reported}, TRUE) +>>>>>>> 1b4a51d3 (add test case) + # test for enhancing error message of invalid column #6512 uniq_diff <- function(...) { .Call("uniq_diff", ...) From 2493cb51758ad6d9a79e4b97775bb17fa2e5629a Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 03:20:33 +0530 Subject: [PATCH 14/25] add test3 --- inst/tests/tests.Rraw | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a59563829f..43570a2620 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21084,29 +21084,3 @@ capture_error_message <- function(expr) { } test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg)) # Ensure -1 is reported}, TRUE) >>>>>>> 1b4a51d3 (add test case) - -# test for enhancing error message of invalid column #6512 -uniq_diff <- function(...) { - .Call("uniq_diff", ...) -} -capture_error_message <- function(expr) { - msg <- tryCatch( - { expr; NULL }, - error = function(e) e$message - ) - msg -} -test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) - -# test for enhancing error message of invalid column #6512 -uniq_diff <- function(...) { - .Call("uniq_diff", ...) -} -capture_error_message <- function(expr) { - msg <- tryCatch( - { expr; NULL }, - error = function(e) e$message - ) - msg -} -test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) \ No newline at end of file From e66596fa7259875308e43cb01fefcc43a8ee2f30 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 03:48:12 +0530 Subject: [PATCH 15/25] resolved merge conflict --- inst/tests/tests.Rraw | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 43570a2620..e4231b5fa8 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21042,7 +21042,6 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list( # the integer overflow in #6729 is only noticeable with UBSan test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE }) -<<<<<<< HEAD if (exists("sort_by", "package:base")) { # sort_by.data.table DT1 = data.table(a=c(1, 3, 2, NA, 3), b=4:0) @@ -21070,17 +21069,3 @@ if (exists("sort_by", "package:base")) { test(2306.23, DT1[, sort_by(.SD, ~a + b)], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) } -======= -#test for enhancing error message of invalid column #6512 -uniq_diff <- function(...) { - .Call("uniq_diff", ...) -} -capture_error_message <- function(expr) { - msg <- tryCatch( - { expr; NULL }, - error = function(e) e$message - ) - msg -} -test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg)) # Ensure -1 is reported}, TRUE) ->>>>>>> 1b4a51d3 (add test case) From 89bcb2cdd363e32c191e7f2df897b14529be7792 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 03:51:35 +0530 Subject: [PATCH 16/25] test added for enhancing error message --- inst/tests/tests.Rraw | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e4231b5fa8..ce89efd5fd 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21069,3 +21069,16 @@ if (exists("sort_by", "package:base")) { test(2306.23, DT1[, sort_by(.SD, ~a + b)], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) } + +# test for enhancing error message of invalid column #6512 +uniq_diff <- function(...) { + .Call("uniq_diff", ...) +} +capture_error_message <- function(expr) { + msg <- tryCatch( + { expr; NULL }, + error = function(e) e$message + ) + msg +} +test(2307, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) From bf867b018c376bc8640260cb3b2564e1ce317a82 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Sun, 26 Jan 2025 12:19:47 +0530 Subject: [PATCH 17/25] Enhancing of error message --- po/fr.po | 4 ++-- po/zh_CN.po | 4 ++-- src/fmelt.c | 15 ++++++++++++++- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/po/fr.po b/po/fr.po index e63209de3c..264163e6a3 100644 --- a/po/fr.po +++ b/po/fr.po @@ -1301,8 +1301,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "Type inconnu de 'measure.vars' %s à l'indice %d de la liste" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid." -msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides." +msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides; veuillez corriger en supprimant : %s" #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/po/zh_CN.po b/po/zh_CN.po index 493adf3767..f1bdb098b3 100644 --- a/po/zh_CN.po +++ b/po/zh_CN.po @@ -1154,8 +1154,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "未知'measure.vars'类型 %s,位于列表中 %d" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid." -msgstr "'measure.vars'里,一或多个数值无效" +msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" +msgstr "'measure.vars'里,一或多个数值无效;请通过删除以下数值来修复:%s" #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/src/fmelt.c b/src/fmelt.c index f031cc350f..8a13fcd945 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -1,5 +1,7 @@ #include "data.table.h" #include + + // #include // the debugging machinery + breakpoint aidee // raise(SIGINT); @@ -176,8 +178,12 @@ bool is_default_measure(SEXP vec) { // maybe unlist, then unique, then set_diff. SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { + // Protect input list/vector, unlisting if necessary SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list); + + // Check for duplicated elements in the input vector SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); + int n_unique_cols = 0; SEXP invalid_columns = PROTECT(allocVector(INTSXP, length(int_vec))); int* invalid_col_ptr = INTEGER(invalid_columns); @@ -206,16 +212,23 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { } SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); int unique_i = 0; - for (int i=0; i Date: Mon, 27 Jan 2025 19:17:42 +0530 Subject: [PATCH 18/25] Revert to initial changes --- po/fr.po | 4 ++-- po/zh_CN.po | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/po/fr.po b/po/fr.po index 264163e6a3..e63209de3c 100644 --- a/po/fr.po +++ b/po/fr.po @@ -1301,8 +1301,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "Type inconnu de 'measure.vars' %s à l'indice %d de la liste" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" -msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides; veuillez corriger en supprimant : %s" +msgid "One or more values in 'measure.vars' is invalid." +msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides." #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." diff --git a/po/zh_CN.po b/po/zh_CN.po index f1bdb098b3..493adf3767 100644 --- a/po/zh_CN.po +++ b/po/zh_CN.po @@ -1154,8 +1154,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list" msgstr "未知'measure.vars'类型 %s,位于列表中 %d" #: fmelt.c:187 -msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s" -msgstr "'measure.vars'里,一或多个数值无效;请通过删除以下数值来修复:%s" +msgid "One or more values in 'measure.vars' is invalid." +msgstr "'measure.vars'里,一或多个数值无效" #: fmelt.c:189 msgid "One or more values in 'id.vars' is invalid." From 599ff52cfff0f4b94ff664ba51724e273bc656dd Mon Sep 17 00:00:00 2001 From: aitap Date: Wed, 26 Feb 2025 15:07:53 +0000 Subject: [PATCH 19/25] Provide a .formula2varlist implementation (#6842) Since base::.formula2varlist is not an API and it is now needed in two places, provide our own implementation. --- R/data.table.R | 4 ++-- R/utils.R | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index d1f6798100..99e908f63e 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2454,7 +2454,7 @@ split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TR # same as split.data.frame - handling all exceptions, factor orders etc, in a single stream of processing was a nightmare in factor and drop consistency # evaluate formula mirroring split.data.frame #5392. Mimics base::.formula2varlist. if (inherits(f, "formula")) - f = eval(attr(terms(f), "variables"), x, environment(f)) + f = formula_vars(f, x) # be sure to use x[ind, , drop = FALSE], not x[ind], in case downstream methods don't follow the same subsetting semantics (#5365) return(lapply(split(x = seq_len(nrow(x)), f = f, drop = drop, ...), function(ind) x[ind, , drop = FALSE])) } @@ -2530,7 +2530,7 @@ sort_by.data.table <- function(x, y, ...) { if (!cedta()) return(NextMethod()) # nocov if (inherits(y, "formula")) - y <- .formula2varlist(y, x) + y <- formula_vars(y, x) if (!is.list(y)) y <- list(y) # use forder instead of base 'order' diff --git a/R/utils.R b/R/utils.R index 50b165629d..cc4d933ff8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -212,3 +212,11 @@ rss = function() { #5515 #5517 round(ans / 1024.0, 1L) # return MB # nocov end } + +formula_vars = function(f, x) { # .formula2varlist is not API and seems to have appeared after R-4.2, #6841 + terms <- terms(f) + setNames( + eval(attr(terms, "variables"), x, environment(f)), + attr(terms, "term.labels") + ) +} From fcd1cabdb881f4e7063b5a67074c26dae90e3cec Mon Sep 17 00:00:00 2001 From: Mukulyadav2004 <145585624+Mukulyadav2004@users.noreply.github.com> Date: Thu, 27 Feb 2025 15:41:39 +0530 Subject: [PATCH 20/25] Fix index printing by adding index info to header (#6816) Produce the class header from `toprint` instead of just `x`. Fixes: #6806 Co-authored-by: Ivan K --- R/print.data.table.R | 2 +- inst/tests/tests.Rraw | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/R/print.data.table.R b/R/print.data.table.R index 7517a4f128..a37020f502 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -86,6 +86,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), if (show.indices) toprint = cbind(toprint, index_dt) } require_bit64_if_needed(x) + classes = classes1(toprint) toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...) # na.encode=FALSE so that NA in character cols print as # FR #353 - add row.names = logical argument to print.data.table @@ -100,7 +101,6 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), factor = "", POSIXct = "", logical = "", IDate = "", integer64 = "", raw = "", expression = "", ordered = "") - classes = classes1(x) abbs = unname(class_abb[classes]) if ( length(idx <- which(is.na(abbs))) ) abbs[idx] = paste0("<", classes[idx], ">") toprint = rbind(abbs, toprint) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ce89efd5fd..da41f8b9f8 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21070,6 +21070,12 @@ if (exists("sort_by", "package:base")) { test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L))) } +DT <- data.table(a = 1:2, b = 2:1) +setindex(DT, b) +# make sure that print(DT) doesn't warn due to the header missing index column types, #6806 +# can't use output= here because the print() call is outside withCallingHandlers(...) +test(2307, { capture.output(print(DT, class = TRUE, show.indices = TRUE)); TRUE }) + # test for enhancing error message of invalid column #6512 uniq_diff <- function(...) { .Call("uniq_diff", ...) @@ -21081,4 +21087,4 @@ capture_error_message <- function(expr) { ) msg } -test(2307, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) +test(2307, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) \ No newline at end of file From 7823be777f5e3da138b1ee0f8503fcd82dd3008b Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 13:15:54 +0530 Subject: [PATCH 21/25] xyz --- inst/tests/tests.Rraw | 2 +- src/fmelt.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index da41f8b9f8..0004e63390 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21087,4 +21087,4 @@ capture_error_message <- function(expr) { ) msg } -test(2307, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) \ No newline at end of file +test(2308, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) \ No newline at end of file diff --git a/src/fmelt.c b/src/fmelt.c index 8a13fcd945..c1866bdc94 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -1,7 +1,5 @@ #include "data.table.h" #include - - // #include // the debugging machinery + breakpoint aidee // raise(SIGINT); From 3f5bed96d78a3f26c7fb2f402f80de181fcb32a1 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 15:29:45 +0530 Subject: [PATCH 22/25] modify test --- inst/tests/tests.Rraw | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 47a52f3331..ffa600c61d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21077,14 +21077,4 @@ setindex(DT, b) test(2307, { capture.output(print(DT, class = TRUE, show.indices = TRUE)); TRUE }) # test for enhancing error message of invalid column #6512 -uniq_diff <- function(...) { - .Call("uniq_diff", ...) -} -capture_error_message <- function(expr) { - msg <- tryCatch( - { expr; NULL }, - error = function(e) e$message - ) - msg -} -test(2308, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) +test(2308, {msg <- tryCatch({ .Call("uniq_diff", as.integer(c(1, 2, -1, 4)), 4, FALSE); NULL }, error = function(e) e$message)print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) From c56a711a568fe75e664098f84f31564d48d3f414 Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 15:56:34 +0530 Subject: [PATCH 23/25] test case added --- inst/tests/tests.Rraw | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ffa600c61d..4c10144585 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21077,4 +21077,8 @@ setindex(DT, b) test(2307, { capture.output(print(DT, class = TRUE, show.indices = TRUE)); TRUE }) # test for enhancing error message of invalid column #6512 -test(2308, {msg <- tryCatch({ .Call("uniq_diff", as.integer(c(1, 2, -1, 4)), 4, FALSE); NULL }, error = function(e) e$message)print(msg)return(grepl("\\[-1\\]", msg))}, TRUE) +test(2308, { + msg <- tryCatch({ .Call("uniq_diff", as.integer(c(1, 2, -1, 4)), 4, FALSE); NULL }, error = function(e) e$message) + print(msg) + return(grepl("\\[-1\\]", msg)) +}, TRUE) From 778f20ba4bb01524b90e2b3a386affce82a9ea9b Mon Sep 17 00:00:00 2001 From: Divendra2006 Date: Fri, 28 Feb 2025 16:05:57 +0530 Subject: [PATCH 24/25] issues resolved --- src/fmelt.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/fmelt.c b/src/fmelt.c index c1866bdc94..e17536eacb 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -176,12 +176,8 @@ bool is_default_measure(SEXP vec) { // maybe unlist, then unique, then set_diff. SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { - // Protect input list/vector, unlisting if necessary SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list); - - // Check for duplicated elements in the input vector SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); - int n_unique_cols = 0; SEXP invalid_columns = PROTECT(allocVector(INTSXP, length(int_vec))); int* invalid_col_ptr = INTEGER(invalid_columns); @@ -189,7 +185,7 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { for (int i = 0; i < length(int_vec); ++i) { int col_number = INTEGER(int_vec)[i]; bool good_number = (col_number > 0 && col_number <= ncol); - if (is_measure) good_number |= (col_number == NA_INTEGER); + if (is_measure) good_number |= (col_number==NA_INTEGER); if (!good_number) { invalid_col_ptr[invalid_count++] = col_number; } else if (!LOGICAL(is_duplicated)[i]) { @@ -210,23 +206,16 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) { } SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); int unique_i = 0; - - // Populate the unique column numbers into the new vector - for (int i = 0; i < length(is_duplicated); ++i) { + for (int i=0; i Date: Fri, 28 Feb 2025 21:17:34 +0530 Subject: [PATCH 25/25] update test case --- inst/tests/tests.Rraw | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4c10144585..0e5dee8e2a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21078,7 +21078,5 @@ test(2307, { capture.output(print(DT, class = TRUE, show.indices = TRUE)); TRUE # test for enhancing error message of invalid column #6512 test(2308, { - msg <- tryCatch({ .Call("uniq_diff", as.integer(c(1, 2, -1, 4)), 4, FALSE); NULL }, error = function(e) e$message) - print(msg) - return(grepl("\\[-1\\]", msg)) -}, TRUE) + melt(data.table(A = 1:5, B = 6:10), id.vars = c("A", "-1")) +}, error = "One or more values in 'id.vars' are invalid")