From a2fa23f6b70cff8ac9c72041c6eeecf3115122eb Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Sun, 26 Jan 2025 12:19:47 +0530
Subject: [PATCH 01/25] Enhancing of error message

---
 po/data.table.pot |  2 +-
 po/es.po          |  4 +--
 po/fr.po          |  4 +--
 po/pt_BR.po       |  4 +--
 po/zh_CN.po       |  4 +--
 src/fmelt.c       | 68 ++++++++++++++++++++++++++++++++++++++++-------
 6 files changed, 67 insertions(+), 19 deletions(-)

diff --git a/po/data.table.pot b/po/data.table.pot
index e11d4c3da7..43e8850bda 100644
--- a/po/data.table.pot
+++ b/po/data.table.pot
@@ -1024,7 +1024,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr ""
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid."
+msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
 msgstr ""
 
 #: fmelt.c:189
diff --git a/po/es.po b/po/es.po
index ea5f2d9ce4..50a980298a 100644
--- a/po/es.po
+++ b/po/es.po
@@ -1281,8 +1281,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "Tipo 'measure.vars' desconocido %s en el índice %d de la lista"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid."
-msgstr "Uno o más valores en 'measure.vars' no son válidos."
+msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgstr "Uno o más valores en 'measure.vars' no son válidos; por favor corrige eliminando: %s"
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/po/fr.po b/po/fr.po
index e63209de3c..264163e6a3 100644
--- a/po/fr.po
+++ b/po/fr.po
@@ -1301,8 +1301,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "Type inconnu de 'measure.vars' %s à l'indice %d de la liste"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid."
-msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides."
+msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides; veuillez corriger en supprimant : %s"
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/po/pt_BR.po b/po/pt_BR.po
index e214a2a3bd..4d54b6796c 100644
--- a/po/pt_BR.po
+++ b/po/pt_BR.po
@@ -1281,8 +1281,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "'measure.vars'com tipo desconhecido %s no índice %d da lista"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid."
-msgstr "Um ou mais valores em 'measure.vars' são inválidos."
+msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgstr "Um ou mais valores em 'measure.vars' são inválidos; por favor, corrija removendo: %s"
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/po/zh_CN.po b/po/zh_CN.po
index 493adf3767..f1bdb098b3 100644
--- a/po/zh_CN.po
+++ b/po/zh_CN.po
@@ -1154,8 +1154,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "未知'measure.vars'类型 %s，位于列表中 %d"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid."
-msgstr "'measure.vars'里，一或多个数值无效"
+msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgstr "'measure.vars'里，一或多个数值无效；请通过删除以下数值来修复：%s"
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/src/fmelt.c b/src/fmelt.c
index 59e82455b4..199e04a015 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -1,5 +1,7 @@
 #include "data.table.h"
 #include <Rdefines.h>
+
+
 // #include <signal.h> // the debugging machinery + breakpoint aidee
 // raise(SIGINT);
 
@@ -176,33 +178,79 @@ bool is_default_measure(SEXP vec) {
 
 // maybe unlist, then unique, then set_diff.
 SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
+  // Protect input list/vector, unlisting if necessary
   SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list);
+  
+  // Check for duplicated elements in the input vector
   SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); 
+  
   int n_unique_cols = 0;
-  for (int i=0; i<length(int_vec); ++i) {
+  
+  // Allocate a vector to store invalid column indices (initially max size is length of int_vec)
+  SEXP invalid_columns = PROTECT(allocVector(INTSXP, length(int_vec)));
+  int* invalid_col_ptr = INTEGER(invalid_columns);
+  int invalid_count = 0;
+  
+  // Iterate through the column numbers to identify invalid and unique columns
+  for (int i = 0; i < length(int_vec); ++i) {
     int col_number = INTEGER(int_vec)[i];
+    
+    // Check if the column number is within valid range
     bool good_number = 0 < col_number && col_number <= ncol;
-    if (is_measure) good_number |= (col_number==NA_INTEGER);
-    if (!good_number) {
-      if (is_measure) {
-        error(_("One or more values in 'measure.vars' is invalid."));
-      } else {
-        error(_("One or more values in 'id.vars' is invalid."));
+    
+    // Special check for 'measure' case (NA_INTEGER handling)
+    if (is_measure) good_number |= (col_number == NA_INTEGER);
+    
+    // Collect invalid columns if not valid or out of range
+    if (!good_number || col_number == 0) {
+      invalid_col_ptr[invalid_count++] = col_number;
+    } else if (!LOGICAL(is_duplicated)[i]) {
+      n_unique_cols++;
+    }
+  }
+  
+  // If invalid columns are found, construct the error message
+  if (invalid_count > 0) {
+    // Buffer for concatenated invalid column messages
+    char buffer[4096] = ""; // Large enough to store the concatenated string
+    for (int i = 0; i < invalid_count; ++i) {
+      char temp[32];
+      snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); // Format the column number
+
+      if (i > 0) {
+        strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); // Add separator
       }
-    } else if (!LOGICAL(is_duplicated)[i]) n_unique_cols++;
+      strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); // Append to the buffer
+    }
+
+    // Throw the error with the concatenated message
+    error(_("One or more values in '%s' are invalid; please fix by removing: %s"), 
+          is_measure ? "measure.vars" : "id.vars", buffer);
   }
+  
+  // Proceed with collecting unique columns
   SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); 
   int unique_i = 0;
-  for (int i=0; i<length(is_duplicated); ++i) {
+  
+  // Populate the unique column numbers into the new vector
+  for (int i = 0; i < length(is_duplicated); ++i) {
     if (!LOGICAL(is_duplicated)[i]) {
       INTEGER(unique_col_numbers)[unique_i++] = INTEGER(int_vec)[i];
     }
   }
+  
+  // Apply set difference to get final unique column indices
   SEXP out = set_diff(unique_col_numbers, ncol);
-  UNPROTECT(3);
+  
+  // Unprotect all allocated objects
+  UNPROTECT(4); // Unprotect input, duplication check, invalid columns, and unique columns
+  
   return out;
 }
 
+
+
+
 SEXP cols_to_int_or_list(SEXP cols, SEXP dtnames, bool is_measure) {
   switch(TYPEOF(cols)) {
   case STRSXP  : return chmatch(cols, dtnames, 0); 

From 3c1a7c629139970c0477470d48e379473b8cef6a Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Sun, 26 Jan 2025 14:36:09 +0530
Subject: [PATCH 02/25] Add Suggested changes

---
 po/es.po    |  2 +-
 po/fr.po    |  2 +-
 po/pt_BR.po |  2 +-
 po/zh_CN.po |  2 +-
 src/fmelt.c | 29 +++++++----------------------
 5 files changed, 11 insertions(+), 26 deletions(-)

diff --git a/po/es.po b/po/es.po
index 50a980298a..92be106585 100644
--- a/po/es.po
+++ b/po/es.po
@@ -1282,7 +1282,7 @@ msgstr "Tipo 'measure.vars' desconocido %s en el índice %d de la lista"
 
 #: fmelt.c:187
 msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
-msgstr "Uno o más valores en 'measure.vars' no son válidos; por favor corrige eliminando: %s"
+msgstr "Uno o más valores en 'measure.vars' no son válidos."
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/po/fr.po b/po/fr.po
index 264163e6a3..752b1afb54 100644
--- a/po/fr.po
+++ b/po/fr.po
@@ -1302,7 +1302,7 @@ msgstr "Type inconnu de 'measure.vars' %s à l'indice %d de la liste"
 
 #: fmelt.c:187
 msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
-msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides; veuillez corriger en supprimant : %s"
+msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides."
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/po/pt_BR.po b/po/pt_BR.po
index 4d54b6796c..6eb6aa1b3a 100644
--- a/po/pt_BR.po
+++ b/po/pt_BR.po
@@ -1282,7 +1282,7 @@ msgstr "'measure.vars'com tipo desconhecido %s no índice %d da lista"
 
 #: fmelt.c:187
 msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
-msgstr "Um ou mais valores em 'measure.vars' são inválidos; por favor, corrija removendo: %s"
+msgstr "Um ou mais valores em 'measure.vars' são inválidos."
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/po/zh_CN.po b/po/zh_CN.po
index f1bdb098b3..86f9a966fb 100644
--- a/po/zh_CN.po
+++ b/po/zh_CN.po
@@ -1155,7 +1155,7 @@ msgstr "未知'measure.vars'类型 %s，位于列表中 %d"
 
 #: fmelt.c:187
 msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
-msgstr "'measure.vars'里，一或多个数值无效；请通过删除以下数值来修复：%s"
+msgstr "'measure.vars'里，一或多个数值无效"
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/src/fmelt.c b/src/fmelt.c
index 199e04a015..0bffa31dc0 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -1,7 +1,6 @@
 #include "data.table.h"
 #include <Rdefines.h>
 
-
 // #include <signal.h> // the debugging machinery + breakpoint aidee
 // raise(SIGINT);
 
@@ -178,30 +177,23 @@ bool is_default_measure(SEXP vec) {
 
 // maybe unlist, then unique, then set_diff.
 SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
-  // Protect input list/vector, unlisting if necessary
   SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list);
   
-  // Check for duplicated elements in the input vector
   SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); 
   
   int n_unique_cols = 0;
-  
-  // Allocate a vector to store invalid column indices (initially max size is length of int_vec)
+
   SEXP invalid_columns = PROTECT(allocVector(INTSXP, length(int_vec)));
   int* invalid_col_ptr = INTEGER(invalid_columns);
   int invalid_count = 0;
   
-  // Iterate through the column numbers to identify invalid and unique columns
   for (int i = 0; i < length(int_vec); ++i) {
     int col_number = INTEGER(int_vec)[i];
     
-    // Check if the column number is within valid range
     bool good_number = 0 < col_number && col_number <= ncol;
     
-    // Special check for 'measure' case (NA_INTEGER handling)
     if (is_measure) good_number |= (col_number == NA_INTEGER);
     
-    // Collect invalid columns if not valid or out of range
     if (!good_number || col_number == 0) {
       invalid_col_ptr[invalid_count++] = col_number;
     } else if (!LOGICAL(is_duplicated)[i]) {
@@ -209,41 +201,34 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
     }
   }
   
-  // If invalid columns are found, construct the error message
   if (invalid_count > 0) {
-    // Buffer for concatenated invalid column messages
-    char buffer[4096] = ""; // Large enough to store the concatenated string
+    char buffer[4096] = ""; 
     for (int i = 0; i < invalid_count; ++i) {
       char temp[32];
-      snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); // Format the column number
+      snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); 
 
       if (i > 0) {
-        strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); // Add separator
+        strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); 
       }
-      strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); // Append to the buffer
+      strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); 
     }
 
-    // Throw the error with the concatenated message
     error(_("One or more values in '%s' are invalid; please fix by removing: %s"), 
           is_measure ? "measure.vars" : "id.vars", buffer);
   }
-  
-  // Proceed with collecting unique columns
+ 
   SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); 
   int unique_i = 0;
   
-  // Populate the unique column numbers into the new vector
   for (int i = 0; i < length(is_duplicated); ++i) {
     if (!LOGICAL(is_duplicated)[i]) {
       INTEGER(unique_col_numbers)[unique_i++] = INTEGER(int_vec)[i];
     }
   }
   
-  // Apply set difference to get final unique column indices
   SEXP out = set_diff(unique_col_numbers, ncol);
   
-  // Unprotect all allocated objects
-  UNPROTECT(4); // Unprotect input, duplication check, invalid columns, and unique columns
+  UNPROTECT(4);
   
   return out;
 }

From e75bc7319ebf7161df640ef001de1eae3fd3c982 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Mon, 27 Jan 2025 19:17:42 +0530
Subject: [PATCH 03/25] Revert to initial changes

---
 po/data.table.pot | 2 +-
 po/es.po          | 2 +-
 po/fr.po          | 2 +-
 po/pt_BR.po       | 2 +-
 po/zh_CN.po       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/po/data.table.pot b/po/data.table.pot
index 43e8850bda..e11d4c3da7 100644
--- a/po/data.table.pot
+++ b/po/data.table.pot
@@ -1024,7 +1024,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr ""
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgid "One or more values in 'measure.vars' is invalid."
 msgstr ""
 
 #: fmelt.c:189
diff --git a/po/es.po b/po/es.po
index 92be106585..ea5f2d9ce4 100644
--- a/po/es.po
+++ b/po/es.po
@@ -1281,7 +1281,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "Tipo 'measure.vars' desconocido %s en el índice %d de la lista"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgid "One or more values in 'measure.vars' is invalid."
 msgstr "Uno o más valores en 'measure.vars' no son válidos."
 
 #: fmelt.c:189
diff --git a/po/fr.po b/po/fr.po
index 752b1afb54..e63209de3c 100644
--- a/po/fr.po
+++ b/po/fr.po
@@ -1301,7 +1301,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "Type inconnu de 'measure.vars' %s à l'indice %d de la liste"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgid "One or more values in 'measure.vars' is invalid."
 msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides."
 
 #: fmelt.c:189
diff --git a/po/pt_BR.po b/po/pt_BR.po
index 6eb6aa1b3a..e214a2a3bd 100644
--- a/po/pt_BR.po
+++ b/po/pt_BR.po
@@ -1281,7 +1281,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "'measure.vars'com tipo desconhecido %s no índice %d da lista"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgid "One or more values in 'measure.vars' is invalid."
 msgstr "Um ou mais valores em 'measure.vars' são inválidos."
 
 #: fmelt.c:189
diff --git a/po/zh_CN.po b/po/zh_CN.po
index 86f9a966fb..493adf3767 100644
--- a/po/zh_CN.po
+++ b/po/zh_CN.po
@@ -1154,7 +1154,7 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "未知'measure.vars'类型 %s，位于列表中 %d"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgid "One or more values in 'measure.vars' is invalid."
 msgstr "'measure.vars'里，一或多个数值无效"
 
 #: fmelt.c:189

From ea98c199df92ba4e1b3085ca37936c373ae5c641 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Tue, 18 Feb 2025 17:53:24 +0530
Subject: [PATCH 04/25] removal of empty lines

---
 src/fmelt.c | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/src/fmelt.c b/src/fmelt.c
index 0bffa31dc0..0768900543 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -1,6 +1,5 @@
 #include "data.table.h"
 #include <Rdefines.h>
-
 // #include <signal.h> // the debugging machinery + breakpoint aidee
 // raise(SIGINT);
 
@@ -178,64 +177,46 @@ bool is_default_measure(SEXP vec) {
 // maybe unlist, then unique, then set_diff.
 SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
   SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list);
-  
   SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); 
-  
   int n_unique_cols = 0;
-
   SEXP invalid_columns = PROTECT(allocVector(INTSXP, length(int_vec)));
   int* invalid_col_ptr = INTEGER(invalid_columns);
   int invalid_count = 0;
-  
   for (int i = 0; i < length(int_vec); ++i) {
     int col_number = INTEGER(int_vec)[i];
-    
     bool good_number = 0 < col_number && col_number <= ncol;
-    
     if (is_measure) good_number |= (col_number == NA_INTEGER);
-    
     if (!good_number || col_number == 0) {
       invalid_col_ptr[invalid_count++] = col_number;
     } else if (!LOGICAL(is_duplicated)[i]) {
       n_unique_cols++;
     }
   }
-  
   if (invalid_count > 0) {
     char buffer[4096] = ""; 
     for (int i = 0; i < invalid_count; ++i) {
       char temp[32];
       snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); 
-
       if (i > 0) {
         strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); 
       }
       strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); 
     }
-
-    error(_("One or more values in '%s' are invalid; please fix by removing: %s"), 
+      error(_("One or more values in '%s' are invalid; please fix by removing: %s"), 
           is_measure ? "measure.vars" : "id.vars", buffer);
   }
- 
   SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); 
   int unique_i = 0;
-  
   for (int i = 0; i < length(is_duplicated); ++i) {
     if (!LOGICAL(is_duplicated)[i]) {
       INTEGER(unique_col_numbers)[unique_i++] = INTEGER(int_vec)[i];
     }
   }
-  
   SEXP out = set_diff(unique_col_numbers, ncol);
-  
   UNPROTECT(4);
-  
   return out;
 }
 
-
-
-
 SEXP cols_to_int_or_list(SEXP cols, SEXP dtnames, bool is_measure) {
   switch(TYPEOF(cols)) {
   case STRSXP  : return chmatch(cols, dtnames, 0); 

From 822964ffcbc9a2052d31e92d207cd8ccbecb8439 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Wed, 19 Feb 2025 19:26:26 +0530
Subject: [PATCH 05/25] Apply changes from maintainer's commit dbcabb0 to avoid
 repeated strncat() calls

---
 src/fmelt.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/fmelt.c b/src/fmelt.c
index 0768900543..b4d8c9baab 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -193,14 +193,13 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
     }
   }
   if (invalid_count > 0) {
-    char buffer[4096] = ""; 
+    char buffer[4096] = "", *nexti = buffer;
+    size_t remaining = sizeof buffer;
     for (int i = 0; i < invalid_count; ++i) {
-      char temp[32];
-      snprintf(temp, 32, "[%d]", invalid_col_ptr[i]); 
-      if (i > 0) {
-        strncat(buffer, ", ", sizeof(buffer) - strlen(buffer) - 1); 
-      }
-      strncat(buffer, temp, sizeof(buffer) - strlen(buffer) - 1); 
+      int offset = snprintf(nexti, remaining, "%s[%d]", i > 0 ? ", " : "", invalid_col_ptr[i]);
+      if (offset < 0 || (size_t)offset >= remaining) break;
+      nexti += offset;
+      remaining -= offset;
     }
       error(_("One or more values in '%s' are invalid; please fix by removing: %s"), 
           is_measure ? "measure.vars" : "id.vars", buffer);

From eff26ca2c3a5c3e494442fedb3ba59dc19a8c858 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Thu, 27 Feb 2025 01:46:23 +0530
Subject: [PATCH 06/25] add test case

---
 src/fmelt.c                | 10 +++----
 tests/testthat/test_melt.R | 55 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 5 deletions(-)
 create mode 100644 tests/testthat/test_melt.R

diff --git a/src/fmelt.c b/src/fmelt.c
index e0afe4a5e2..9b203c7910 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -184,9 +184,9 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
   int invalid_count = 0;
   for (int i = 0; i < length(int_vec); ++i) {
     int col_number = INTEGER(int_vec)[i];
-    bool good_number = 0 < col_number && col_number <= ncol;
-    if (is_measure) good_number |= (col_number == NA_INTEGER);
-    if (!good_number || col_number == 0) {
+    bool good_number = (col_number > 0 && col_number <= ncol);
+    if (is_measure) {good_number |= (col_number == NA_INTEGER);}
+    if (!good_number) {
       invalid_col_ptr[invalid_count++] = col_number;
     } else if (!LOGICAL(is_duplicated)[i]) {
       n_unique_cols++;
@@ -201,8 +201,8 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
       nexti += offset;
       remaining -= offset;
     }
-      error(_("One or more values in '%s' are invalid; please fix by removing: %s"), 
-          is_measure ? "measure.vars" : "id.vars", buffer);
+    error(_("One or more values in '%s' are invalid; please fix by removing: %s"), 
+    is_measure ? "measure.vars" : "id.vars", buffer);
   }
   SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); 
   int unique_i = 0;
diff --git a/tests/testthat/test_melt.R b/tests/testthat/test_melt.R
new file mode 100644
index 0000000000..66bb3ac3cd
--- /dev/null
+++ b/tests/testthat/test_melt.R
@@ -0,0 +1,55 @@
+options(width = 200)
+# Load compiled shared object
+dyn.load("/home/yadav/dataTable/data.table/src/fmelt.so")
+
+# Ensure the function is loaded
+stopifnot(is.loaded("uniq_diff"))
+
+# Test cases
+test_uniq_diff <- function() {
+  cat("Running tests for uniq_diff...\n")
+  
+  # Load required R functions
+  library(data.table)
+  
+  # Test 1: Valid integer vector input
+  input_1 <- as.integer(c(1, 2, 3, 4, 5))
+  result_1 <- .Call("uniq_diff", input_1, as.integer(5), FALSE)
+  expected_1 <- input_1  # Should return unique values
+  stopifnot(identical(result_1, expected_1))
+  cat("Test 1 passed!\n")
+  
+  # Test 2: Input with duplicates
+  input_2 <- as.integer(c(1, 2, 2, 3, 4, 4, 5))
+  result_2 <- .Call("uniq_diff", input_2, as.integer(5), FALSE)
+  expected_2 <- as.integer(c(1, 2, 3, 4, 5))  # Should remove duplicates
+  stopifnot(identical(result_2, expected_2))
+  cat("Test 2 passed!\n")
+  
+  # Test 3: Invalid column numbers (out of range)
+  input_3 <- as.integer(c(-1, 0, 1, 6, 2, 3))
+  tryCatch({
+    result_3 <- .Call("uniq_diff", input_3, as.integer(5), FALSE)
+    cat("Test 3 failed: Expected an error but none occurred.\n")
+  }, error = function(e) {
+    cat("Test 3 passed! Caught expected error: ", e$message, "\n")
+  })
+  
+  # Test 4: NA values in the input
+  input_4 <- as.integer(c(1, 2, NA, 3, 4))
+  result_4 <- .Call("uniq_diff", input_4, as.integer(5), TRUE)
+  expected_4 <- as.integer(c(1, 2, NA, 3, 4))  # Should allow NA if is_measure is TRUE
+  stopifnot(identical(result_4, expected_4))
+  cat("Test 4 passed!\n")
+  
+  # Test 5: Empty input
+  input_5 <- as.integer(integer(0))
+  result_5 <- .Call("uniq_diff", input_5, as.integer(5), FALSE)
+  stopifnot(length(result_5) == 0)  # Should return empty
+  cat("Test 5 passed!\n")
+  
+  cat("All tests completed successfully!\n")
+}
+
+# Run the tests
+test_uniq_diff()

From 18d0180d1b8a188420b715fd98f0f86421455002 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Thu, 27 Feb 2025 01:50:11 +0530
Subject: [PATCH 07/25] changes added

---
 src/fmelt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fmelt.c b/src/fmelt.c
index 9b203c7910..366c6faa75 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -206,7 +206,7 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
   }
   SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); 
   int unique_i = 0;
-  for (int i = 0; i < length(is_duplicated); ++i) {
+  for (int i=0; i<length(is_duplicated); ++i) {
     if (!LOGICAL(is_duplicated)[i]) {
       INTEGER(unique_col_numbers)[unique_i++] = INTEGER(int_vec)[i];
     }

From 94173d1173a20154a15aa0499243fad0fd5c4c78 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 01:48:58 +0530
Subject: [PATCH 08/25] add test

---
 inst/tests/tests.Rraw      | 15 +++++++++++
 src/fmelt.c                |  4 ++-
 tests/testthat/test_melt.R | 55 --------------------------------------
 3 files changed, 18 insertions(+), 56 deletions(-)
 delete mode 100644 tests/testthat/test_melt.R

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e4231b5fa8..43570a2620 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21042,6 +21042,7 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list(
 # the integer overflow in #6729 is only noticeable with UBSan
 test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE })
 
+<<<<<<< HEAD
 if (exists("sort_by", "package:base")) {
   # sort_by.data.table
   DT1 = data.table(a=c(1, 3, 2, NA, 3), b=4:0)
@@ -21069,3 +21070,17 @@ if (exists("sort_by", "package:base")) {
   test(2306.23, DT1[, sort_by(.SD, ~a + b)], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
   test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
 }
+=======
+#test for enhancing error message of invalid column #6512
+uniq_diff <- function(...) {
+  .Call("uniq_diff", ...)
+}
+capture_error_message <- function(expr) {
+  msg <- tryCatch(
+    { expr; NULL }, 
+    error = function(e) e$message
+  )
+  msg
+}
+test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))  # Ensure -1 is reported}, TRUE)
+>>>>>>> 1b4a51d3 (add test case)
diff --git a/src/fmelt.c b/src/fmelt.c
index 366c6faa75..8eb257ae2e 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -185,7 +185,9 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
   for (int i = 0; i < length(int_vec); ++i) {
     int col_number = INTEGER(int_vec)[i];
     bool good_number = (col_number > 0 && col_number <= ncol);
-    if (is_measure) {good_number |= (col_number == NA_INTEGER);}
+    if (is_measure) {
+      good_number |= (col_number == NA_INTEGER);
+    }
     if (!good_number) {
       invalid_col_ptr[invalid_count++] = col_number;
     } else if (!LOGICAL(is_duplicated)[i]) {
diff --git a/tests/testthat/test_melt.R b/tests/testthat/test_melt.R
deleted file mode 100644
index 66bb3ac3cd..0000000000
--- a/tests/testthat/test_melt.R
+++ /dev/null
@@ -1,55 +0,0 @@
-options(width = 200)
-# Load compiled shared object
-dyn.load("/home/yadav/dataTable/data.table/src/fmelt.so")
-
-# Ensure the function is loaded
-stopifnot(is.loaded("uniq_diff"))
-
-# Test cases
-test_uniq_diff <- function() {
-  cat("Running tests for uniq_diff...\n")
-  
-  # Load required R functions
-  library(data.table)
-  
-  # Test 1: Valid integer vector input
-  input_1 <- as.integer(c(1, 2, 3, 4, 5))
-  result_1 <- .Call("uniq_diff", input_1, as.integer(5), FALSE)
-  expected_1 <- input_1  # Should return unique values
-  stopifnot(identical(result_1, expected_1))
-  cat("Test 1 passed!\n")
-  
-  # Test 2: Input with duplicates
-  input_2 <- as.integer(c(1, 2, 2, 3, 4, 4, 5))
-  result_2 <- .Call("uniq_diff", input_2, as.integer(5), FALSE)
-  expected_2 <- as.integer(c(1, 2, 3, 4, 5))  # Should remove duplicates
-  stopifnot(identical(result_2, expected_2))
-  cat("Test 2 passed!\n")
-  
-  # Test 3: Invalid column numbers (out of range)
-  input_3 <- as.integer(c(-1, 0, 1, 6, 2, 3))
-  tryCatch({
-    result_3 <- .Call("uniq_diff", input_3, as.integer(5), FALSE)
-    cat("Test 3 failed: Expected an error but none occurred.\n")
-  }, error = function(e) {
-    cat("Test 3 passed! Caught expected error: ", e$message, "\n")
-  })
-  
-  # Test 4: NA values in the input
-  input_4 <- as.integer(c(1, 2, NA, 3, 4))
-  result_4 <- .Call("uniq_diff", input_4, as.integer(5), TRUE)
-  expected_4 <- as.integer(c(1, 2, NA, 3, 4))  # Should allow NA if is_measure is TRUE
-  stopifnot(identical(result_4, expected_4))
-  cat("Test 4 passed!\n")
-  
-  # Test 5: Empty input
-  input_5 <- as.integer(integer(0))
-  result_5 <- .Call("uniq_diff", input_5, as.integer(5), FALSE)
-  stopifnot(length(result_5) == 0)  # Should return empty
-  cat("Test 5 passed!\n")
-  
-  cat("All tests completed successfully!\n")
-}
-
-# Run the tests
-test_uniq_diff()

From 11345b98356475fe050582894c48ea9f643cf504 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 01:54:15 +0530
Subject: [PATCH 09/25] add test

---
 inst/tests/tests.Rraw | 313 +++++++++++++++++++-----------------------
 1 file changed, 139 insertions(+), 174 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 43570a2620..1d31d232ce 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -159,6 +159,9 @@ TZnotUTC = !identical(tt,"") && !is_utc(tt)
 #   (3) function factory for matching messages exactly by substituting anything between delimiters [delim, fmt=TRUE]
 #   (4) function factory for matching messages exactly by substituting a generic string [fmt=string]
 get_msg = function(e, delim, fmt=FALSE) {
+  ufq = options(useFancyQuotes = FALSE) # otherwise we get angled quotes, hard to match robustly
+  on.exit(options(ufq))
+
   condition = tryCatch({e; NULL}, error=identity, warning=identity)
   if (is.null(condition)) return(condition)
   msg = condition$message
@@ -1361,20 +1364,43 @@ if (test_bit64) {
   test(431.5, DT[5,1:=as.integer64(NA)], data.table(a=factor(c(NA,NA,NA,NA,NA), levels=LETTERS[1:3]), b=1:5))
 }
 
-# Test that unsetting datatable.alloccol is caught, #2014
-test(432.1, data.table(a=1:3), options=list(datatable.alloccol=NULL), error="Has getOption('datatable.alloccol') somehow become unset?")
-test(432.2, data.table(a=1:3), options=c(datatable.alloccol="1024"), error="getOption('datatable.alloccol') should be a number, by default 1024. But its type is 'character'.")
-test(432.3, data.table(a=1:3), options=list(datatable.alloccol=c(10L,20L)), error="is a numeric vector ok but its length is 2. Its length should be 1.")
-test(432.4, data.table(a=1:3), options=c(datatable.alloccol=NA_integer_), error="It must be >=0 and not NA.")
-test(432.5, data.table(a=1:3), options=c(datatable.alloccol=-1), error="It must be >=0 and not NA.")
-
+old = getOption("datatable.alloccol")  # Test that unsetting datatable.alloccol is caught, #2014
+options(datatable.alloccol=NULL) # In this =NULL case, options() in R 3.0.0 returned TRUE rather than the old value. This R bug was fixed in R 3.1.1.
+                                 # This is why getOption is called first rather than just using the result of option() like elsewhere in this test file.
+                                 # TODO: simplify this test if/when R dependency >= 3.1.1
+err1 = try(data.table(a=1:3), silent=TRUE)
+options(datatable.alloccol="1024")
+err2 = try(data.table(a=1:3), silent=TRUE)
+options(datatable.alloccol=c(10L,20L))
+err3 = try(data.table(a=1:3), silent=TRUE)
+options(datatable.alloccol=NA_integer_)
+err4 = try(data.table(a=1:3), silent=TRUE)
+options(datatable.alloccol=-1)
+err5 = try(data.table(a=1:3), silent=TRUE)
+options(datatable.alloccol=1024L)   # otherwise test() itself fails in its internals with the alloc.col error
+test(432.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1))
+test(432.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2))
+test(432.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3))
+test(432.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4))
+test(432.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5))
 # Repeat the tests but this time with subsetting, to ensure the validity check on option happens for those too
 DT = data.table(a=1:3, b=4:6)
-test(433.1, DT[2,], options=list(datatable.alloccol=NULL), error="Has getOption('datatable.alloccol') somehow become unset?")
-test(433.2, DT[,2], options=c(datatable.alloccol="1024"), error="getOption('datatable.alloccol') should be a number, by default 1024. But its type is 'character'.")
-test(433.3, DT[a>1], options=list(datatable.alloccol=c(10L,20L)), error="is a numeric vector ok but its length is 2. Its length should be 1.")
-test(433.4, DT[,"b"], options=c(datatable.alloccol=NA_integer_), error="It must be >=0 and not NA.")
-test(433.5, DT[2,"b"], options=c(datatable.alloccol=-1), error="It must be >=0 and not NA.")
+options(datatable.alloccol=NULL)
+err1 = try(DT[2,], silent=TRUE)
+options(datatable.alloccol="1024")
+err2 = try(DT[,2], silent=TRUE)
+options(datatable.alloccol=c(10L,20L))
+err3 = try(DT[a>1], silent=TRUE)
+options(datatable.alloccol=NA_integer_)
+err4 = try(DT[,"b"], silent=TRUE)
+options(datatable.alloccol=-1)
+err5 = try(DT[2,"b"], silent=TRUE)
+options(datatable.alloccol=1024L)   # otherwise test() itself fails in its internals with the alloc.col error
+test(433.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1))
+test(433.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2))
+test(433.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3))
+test(433.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4))
+test(433.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5))
 
 # simple realloc test
 DT = data.table(a=1:3,b=4:6)
@@ -7040,7 +7066,7 @@ ee = new.env()
 ee$DT = data.frame(x=1L, y=1:3)
 setattr(ee$DT, 'class', c("data.table", "data.frame"))
 test(1482.1, truelength(ee$DT), 0L) # make sure that the simulated environment is right.
-test(1482.2, ee$DT[, z := 3:1], data.table(x=1L, y=1:3, z=3:1), warning="A shallow copy of this data.table was taken")
+test(1482.2, ee$DT[, z := 3:1], data.table(x=1L, y=1:3, z=3:1), warning="Invalid .internal.selfref detected and")
 test(1482.3, truelength(ee$DT), 1027L)
 test(1482.4, ee$DT[, za := 4:6], data.table(x=1L, y=1:3, z=3:1, za=4:6))
 test(1482.5, truelength(ee$DT), 1027L)   # should have used spare slot i.e. no increase in tl
@@ -7865,7 +7891,7 @@ test(1551.5, fread(str),
 rhs = setDT(read.table(testDir("issue_1095_fread.txt.bz2"), sep=",", comment.char="", stringsAsFactors=FALSE, quote="", strip.white=TRUE))
 if (test_R.utils) {
   test(1551.61, fread(testDir("issue_1095_fread.txt.bz2"), logical01=FALSE), rhs, warning=w)
-  rhs[, names(.SD) := lapply(.SD, function(x) x == "Y"), .SDcols = c("V16", "V17", "V45")]
+  rhs[, names(.SD) := lapply(.SD, \(x) x == "Y"), .SDcols = c("V16", "V17", "V45")]
   test(1551.62, fread(testDir("issue_1095_fread.txt.bz2"), logical01=FALSE, logicalYN=TRUE), rhs, warning=w)
 }
 
@@ -8541,13 +8567,14 @@ DT1 = data.table(a=1)
 test(1601.1, merge(DT1, DT1, by="a"), data.table(a=1, key="a"))
 test(1601.2, merge(DT1, DT0, by="a"),
      warning="Input data.table 'y' has no columns.",
-     error="The following columns listed in `by` are missing from y: [a]")
+     error="Elements listed in `by`")
 test(1601.3, merge(DT0, DT1, by="a"),
      warning="Input data.table 'x' has no columns.",
-     error="The following columns listed in `by` are missing from x: [a]")
+     error="Elements listed in `by`")
 test(1601.4, merge(DT0, DT0, by="a"),
      warning="Neither of the input data.tables to join have columns.",
-     error="The following columns listed in `by` are missing from x: [a]")
+     error="Elements listed in `by`")
+
 # fix for #1549
 d1 <- data.table(v1=1:2,x=x)
 d2 <- data.table(v1=3:4)
@@ -8689,17 +8716,17 @@ test(1613.21, all.equal(DT2, DT1, ignore.row.order = TRUE), "Dataset 'current' h
 # test attributes: key
 DT1 <- data.table(a = 1:4, b = letters[1:4], key = "a")
 DT2 <- data.table(a = 1:4, b = letters[1:4])
-test(1613.22, all.equal(DT1, DT2), output="Datasets have different keys. 'target': [a]. 'current': has no key.")
+test(1613.22, all.equal(DT1, DT2), "Datasets have different keys. 'target': [a]. 'current': has no key.")
 test(1613.23, all.equal(DT1, DT2, check.attributes = FALSE), TRUE)
 test(1613.24, all.equal(DT1, setkeyv(DT2, "a"), check.attributes = TRUE), TRUE)
 # test attributes: index
 DT1 <- data.table(a = 1:4, b = letters[1:4])
 DT2 <- data.table(a = 1:4, b = letters[1:4])
 setindexv(DT1, "b")
-test(1613.25, all.equal(DT1, DT2), output="Datasets have different indices. 'target': [b]. 'current': has no index.")
+test(1613.25, all.equal(DT1, DT2), "Datasets have different indices. 'target': [b]. 'current': has no index.")
 test(1613.26, all.equal(DT1, DT2, check.attributes = FALSE), TRUE)
-test(1613.27, all.equal(DT1, setindexv(DT2, "a")), output="Datasets have different indices. 'target': [b]. 'current': [a].")
-test(1613.28, all.equal(DT1, setindexv(DT2, "b")), output="Datasets have different indices. 'target': [b]. 'current': [a, b].")
+test(1613.27, all.equal(DT1, setindexv(DT2, "a")), "Datasets have different indices. 'target': [b]. 'current': [a].")
+test(1613.28, all.equal(DT1, setindexv(DT2, "b")), "Datasets have different indices. 'target': [b]. 'current': [a, b].")
 test(1613.29, all.equal(DT1, setindexv(setindexv(DT2, NULL), "b")), TRUE)
 # test custom attribute
 DT1 <- data.table(a = 1:4, b = letters[1:4])
@@ -10968,8 +10995,7 @@ DT = data.table(
   D = as.POSIXct(dt<-paste(d,t), tz="UTC"),
   E = as.POSIXct(paste0(dt,c(".999",".0",".5",".111112",".123456",".023",".0",".999999",".99",".0009")), tz="UTC"))
 
-test(1740.1, fwrite(DT,dateTimeAs="iso"),
-     error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv"))
+test(1740.1, fwrite(DT,dateTimeAs="iso"), error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv"))
 test(1740.2, fwrite(DT,dateTimeAs=c("ISO","squash")), error=base_messages$match_arg_length)
 test(1740.3, capture.output(fwrite(DT,dateTimeAs="ISO")), c(
 "A,B,C,D,E",
@@ -11080,7 +11106,7 @@ test(1743.123, fread("a,b\n1+3i,2015-01-01", colClasses=c(NA,"IDate")), data.tab
 test(1743.13, lapply(fread("a,b\n09/05/98,2015-01-01", colClasses = "Date"), class), y=list(a="character", b=c("IDate", "Date")), warning=base_messages$ambiguous_date_fmt)
 
 ## Just invalid
-test(1743.14,
+test(1743.14, options = c(useFancyQuotes = FALSE),
      sapply(fread("a,b\n2017-01-01,1", colClasses=c("foo", "integer")), class), c(a="character", b="integer"),
      warning=base_messages$missing_coerce_method)
 test(1743.15, sapply(fread("a,b\n2017-01-01,1", colClasses=c("foo", "integer")), class), c(a="character", b="integer"), warning="the column has been left as type .*character")
@@ -11787,15 +11813,15 @@ test(1775.1, capture.output(print(DT1, print.keys = TRUE)),
      c("Key: <a>", "   a", "1: 1", "2: 2", "3: 3"))
 DT2 <- data.table(a = 1:3, b = 4:6)
 setindexv(DT2, c("b","a"))
-test(1775.2, print(DT2, print.keys = TRUE),
-     output=c("Index: <b__a>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6"))
+test(1775.2, capture.output(print(DT2, print.keys = TRUE)),
+     c("Index: <b__a>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6"))
 setindexv(DT2, "b")
-test(1775.3, print(DT2, print.keys = TRUE),
-     output=c("Indices: <b__a>, <b>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6"))
+test(1775.3, capture.output(print(DT2, print.keys = TRUE)),
+     c("Indices: <b__a>, <b>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6"))
 setkey(DT2, a)
 setindexv(DT2, c("b","a"))
-test(1775.4, print(DT2, print.keys = TRUE),
-     output=c("Key: <a>", "Indices: <b__a>, <b>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6")) ## index 'b' is still good, so we keep it
+test(1775.4, capture.output(print(DT2, print.keys = TRUE)),
+     c("Key: <a>", "Indices: <b__a>, <b>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6")) ## index 'b' is still good, so we keep it
 
 # dev regression #2285
 cat("A  B  C\n1  2  3\n4  5  6", file=f<-tempfile())
@@ -12119,7 +12145,8 @@ test(1831.4, fread(paste0("A\n", "1.", src2)), data.table(A=1.1234567890098766))
 DT = as.data.table(matrix(5L, nrow=10, ncol=10))
 test(1832.1, fwrite(DT, f<-tempfile(), verbose=TRUE), output="Column writers")
 DT = as.data.table(matrix(5L, nrow=10, ncol=60))
-test(1832.2, fwrite(DT, f, verbose=TRUE), output = "\nColumn writers.* [.][.][.] ")
+# Using capture.output directly to look for the "..." because test(,output=) intercepts [] for convenience elsewhere
+test(1832.2, any(grepl("^Column writers.* [.][.][.] ", capture.output(fwrite(DT, f, verbose=TRUE)))))
 unlink(f)
 
 # ensure explicitly setting select to default value doesn't error, #2007
@@ -13519,14 +13546,14 @@ test(1962.016, merge(DT1, DT2, by.x = 'a', by.y = c('a', 'V')),
 test(1962.017, merge(DT1, DT2, by = 'V', by.x = 'a', by.y = 'a'),
      data.table(a = 2:3, V.x = c("a", "a"), V.y = c("b", "b"), key = 'a'),
      warning = 'Supplied both.*argument will be ignored')
-test(1962.018, merge(DT1, DT2, by.x='z', by.y='a'),
-     error="The following columns listed in `by.x` are missing from x: [z]")
-test(1962.019, merge(DT1, DT2, by.x='a', by.y='z'),
-     error="The following columns listed in `by.y` are missing from y: [z]")
+test(1962.018, merge(DT1, DT2, by.x = 'z', by.y = 'a'),
+     error = 'Elements listed in `by.x`')
+test(1962.019, merge(DT1, DT2, by.x = 'a', by.y = 'z'),
+     error = 'Elements listed in `by.y`')
 test(1962.0201, merge(DT1, DT2, by=character(0L)), ans)  # was error before PR#5183
 test(1962.0202, merge(DT1, DT2, by=NULL),          ans)  # test explicit NULL too as missing() could be used inside merge()
-test(1962.021, merge(DT1, DT2, by='z'),
-     error='The following columns listed in `by` are missing from x: [z]')
+test(1962.021, merge(DT1, DT2, by = 'z'),
+     error = 'must be valid column names in x and y')
 
 ## frank.R
 x = c(1, 1, 2, 5, 4, 3, 4, NA, 6)
@@ -14915,7 +14942,7 @@ test(2037.1, foo(DT), output='Please remember to always setDT()')
 # no assignment was made to DT
 test(2037.2, names(DT), 'a')
 # _selrefok() verbose message was duplicated
-test(2037.3, foo(DT), output="data.table internal attributes", notOutput="data.table internal attributes.*data.table internal attributes")
+test(2037.3, unname(table(unlist(strsplit(capture.output(foo(DT)), '\n|\\s+')))['ptr']), 1L)
 
 # `between` invalid args, and verbose #3516
 test(2038.01, between(1:5, 2, 4, incbounds=423), error="incbounds must be TRUE or FALSE")
@@ -15139,13 +15166,13 @@ test(2044.60, dt1[dt2, ..cols, on="int==doubleInt", verbose=TRUE],
 test(2044.61, dt1[dt2, ..cols, on="int==realDouble", verbose=TRUE],  # this was wrong in v1.12.2 (the fractions were truncated and joined to next lowest int)
               data.table(x.bool=c(NA,FALSE,NA,FALSE,NA), x.int=INT(NA,1,NA,2,NA), x.doubleInt=c(NA,1,NA,2,NA),
                          i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
-              output="Coercing integer column x.int to type double to match type of i.realDouble .which contains fractions.")
+              output="Coercing integer column x.int to type double to match type of i.realDouble which contains fractions")
 test(2044.62, dt1[dt2, ..cols, on="doubleInt==int", verbose=TRUE],
               data.table(x.bool=FALSE, x.int=1:5, x.doubleInt=as.double(1:5), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
-              output="Coercing integer column i.int .for join. to type double to match type of x.doubleInt")
+              output="Coercing integer column i.int to type double for join to match type of x.doubleInt")
 test(2044.63, dt1[dt2, ..cols, on="realDouble==int", verbose=TRUE],
               data.table(x.bool=c(rep(FALSE,4),TRUE), x.int=INT(2,4,6,8,10), x.doubleInt=c(2,4,6,8,10), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
-              output="Coercing integer column i.int .for join. to type double to match type of x.realDouble")
+              output="Coercing integer column i.int to type double for join to match type of x.realDouble")
 cols = c("x.int","x.char","x.fact","i.int","i.char","i.char")
 test(2044.64, dt1[dt2, ..cols, on="char==fact", verbose=TRUE],
               ans<-data.table(x.int=1:5, x.char=letters[1:5], x.fact=factor(letters[1:5]), i.int=1:5, i.char=letters[1:5], i.char=letters[1:5]),
@@ -15180,15 +15207,15 @@ if (test_bit64) {
 dt1 = data.table(a=1,  b=NA_character_)
 dt2 = data.table(a=2L, b=NA)
 test(2044.80, dt1[dt2, on="a==b",             verbose=TRUE], data.table(a=NA, b=NA_character_, i.a=2L),
-              output=msg<-"Coercing logical column i.b .all-NA. to type double to match type of x.a")
+              output=msg<-"Coercing all-NA logical column i.b to type double to match type of x.a")
 test(2044.81, dt1[dt2, on="a==b", nomatch=0L, verbose=TRUE], data.table(a=logical(), b=character(), i.a=integer()),
               output=msg)
 test(2044.82, dt1[dt2, on="b==b",             verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
-              output=msg<-"Coercing logical column i.b .all-NA. to type character to match type of x.b")
+              output=msg<-"Coercing all-NA logical column i.b to type character to match type of x.b")
 test(2044.83, dt1[dt2, on="b==b", nomatch=0L, verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
               output=msg)
 test(2044.84, dt1[dt2, on="b==a",             verbose=TRUE], data.table(a=NA_real_, b=2L, i.b=NA),
-              output=msg<-"Coercing character column x.b .all-NA. to type integer to match type of i.a")
+              output=msg<-"Coercing all-NA character column x.b to type integer to match type of i.a")
 test(2044.85, dt1[dt2, on="b==a", nomatch=0L, verbose=TRUE], data.table(a=double(), b=integer(), i.b=logical()),
               output=msg)
 
@@ -15420,7 +15447,7 @@ L = list(1:3, NULL, 4:6)
 test(2058.18, length(L), 3L)
 test(2058.19, as.data.table(L), data.table(V1=1:3, V2=4:6))  # V2 not V3        # no
 DT = data.table(a=1:3, b=c(4,5,6))
-test(2058.20, DT[,b:=list(NULL)], data.table(a=1:3))                            # no
+test(2058.20, DT[,b:=list(NULL)], data.table(a=1:3, b=list(NULL)))              # no
 
 # rbindlist improved error message, #3638
 DT = data.table(a=1)
@@ -15615,7 +15642,7 @@ i = data.table(date = dbl_date, key = 'date')
 test(2064.1, x[i, class(date), verbose=TRUE], 'Date',
              output="Coercing double column i.date (which contains no fractions) to type integer to match type of x.date")
 test(2064.2, i[x, class(date), verbose=TRUE], 'Date',
-             output="Coercing integer column i.date .for join. to type double to match type of x.date")
+             output="Coercing integer column i.date to type double for join to match type of x.date")
 
 # complex values in grouping, #3639
 set.seed(42)
@@ -16544,69 +16571,69 @@ DT = data.table(a = vector("integer", 102L),
                 b = "bbbbbbbbbbbbb",
                 c = "ccccccccccccc",
                 d = c("ddddddddddddd", "d"))
-test(2125.02, print(DT, trunc.cols=TRUE),
-     output=c("     a             b             c",
-              "  1: 0 bbbbbbbbbbbbb ccccccccccccc",
-              "  2: 0 bbbbbbbbbbbbb ccccccccccccc",
-              "  3: 0 bbbbbbbbbbbbb ccccccccccccc",
-              "  4: 0 bbbbbbbbbbbbb ccccccccccccc",
-              "  5: 0 bbbbbbbbbbbbb ccccccccccccc",
-              " ---                              ",
-              " 98: 0 bbbbbbbbbbbbb ccccccccccccc",
-              " 99: 0 bbbbbbbbbbbbb ccccccccccccc",
-              "100: 0 bbbbbbbbbbbbb ccccccccccccc",
-              "101: 0 bbbbbbbbbbbbb ccccccccccccc",
-              "102: 0 bbbbbbbbbbbbb ccccccccccccc",
-              "1 variable not shown: [d]"))
-test(2125.03, print(DT, trunc.cols=TRUE, row.names=FALSE),
-     output=c("   a             b             c",
-              "   0 bbbbbbbbbbbbb ccccccccccccc",
-              "   0 bbbbbbbbbbbbb ccccccccccccc",
-              "   0 bbbbbbbbbbbbb ccccccccccccc",
-              "   0 bbbbbbbbbbbbb ccccccccccccc",
-              "   0 bbbbbbbbbbbbb ccccccccccccc",
-              " ---           ---           ---",
-              "   0 bbbbbbbbbbbbb ccccccccccccc",
-              "   0 bbbbbbbbbbbbb ccccccccccccc",
-              "   0 bbbbbbbbbbbbb ccccccccccccc",
-              "   0 bbbbbbbbbbbbb ccccccccccccc",
-              "   0 bbbbbbbbbbbbb ccccccccccccc",
-              "1 variable not shown: [d]" ))
+test(2125.02, capture.output(print(DT, trunc.cols=TRUE)),
+     c("     a             b             c",
+       "  1: 0 bbbbbbbbbbbbb ccccccccccccc",
+       "  2: 0 bbbbbbbbbbbbb ccccccccccccc",
+       "  3: 0 bbbbbbbbbbbbb ccccccccccccc",
+       "  4: 0 bbbbbbbbbbbbb ccccccccccccc",
+       "  5: 0 bbbbbbbbbbbbb ccccccccccccc",
+       " ---                              ",
+       " 98: 0 bbbbbbbbbbbbb ccccccccccccc",
+       " 99: 0 bbbbbbbbbbbbb ccccccccccccc",
+       "100: 0 bbbbbbbbbbbbb ccccccccccccc",
+       "101: 0 bbbbbbbbbbbbb ccccccccccccc",
+       "102: 0 bbbbbbbbbbbbb ccccccccccccc",
+       "1 variable not shown: [d]"))
+test(2125.03, capture.output(print(DT, trunc.cols=TRUE, row.names=FALSE)),
+     c("   a             b             c",
+       "   0 bbbbbbbbbbbbb ccccccccccccc",
+       "   0 bbbbbbbbbbbbb ccccccccccccc",
+       "   0 bbbbbbbbbbbbb ccccccccccccc",
+       "   0 bbbbbbbbbbbbb ccccccccccccc",
+       "   0 bbbbbbbbbbbbb ccccccccccccc",
+       " ---           ---           ---",
+       "   0 bbbbbbbbbbbbb ccccccccccccc",
+       "   0 bbbbbbbbbbbbb ccccccccccccc",
+       "   0 bbbbbbbbbbbbb ccccccccccccc",
+       "   0 bbbbbbbbbbbbb ccccccccccccc",
+       "   0 bbbbbbbbbbbbb ccccccccccccc",
+       "1 variable not shown: [d]" ))
 # also testing #4266 -- getting width of row #s register right
 #   TODO: understand why 2 variables truncated here. a,b,c combined have width
 #     _exactly_ 40, but still wraps. If we set options(width=41) it won't truncate.
 #     seems to be an issue with print.default.
-test(2125.04, print(DT, trunc.cols=TRUE, class=TRUE),
-     output="2 variables not shown: [c <char>, d <char>]")
-test(2125.05, print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE),
-     output=c("^     a             b             c", ".*",
-              "1 variable not shown: \\[d <char>\\]"))
-test(2125.06, print(DT, trunc.cols=TRUE, col.names="none"),
-     output=c("^  1: 0 bbbbbbbbbbbbb ccccccccccccc", ".*",
-              "1 variable not shown: \\[d\\]", ""))
-test(2125.07, print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"),
-     output=c("^  1: 0 bbbbbbbbbbbbb", ".*",
-              "2 variables not shown: \\[c, d\\]", ""),
+test(2125.04, capture.output(print(DT, trunc.cols=TRUE, class=TRUE))[14L],
+     "2 variables not shown: [c <char>, d <char>]")
+test(2125.05, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE))[c(1,14)],
+     c("     a             b             c",
+       "1 variable not shown: [d <char>]" ))
+test(2125.06, capture.output(print(DT, trunc.cols=TRUE, col.names="none"))[c(1,12)],
+     c("  1: 0 bbbbbbbbbbbbb ccccccccccccc",
+       "1 variable not shown: [d]" ))
+test(2125.07, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"))[c(1,13)],
+     c("  1: 0 bbbbbbbbbbbbb",
+       "2 variables not shown: [c, d]" ),
      warning = "Column classes will be suppressed when col.names is 'none'")
 options("width" = 20)
 DT = data.table(a = vector("integer", 2),
                 b = "bbbbbbbbbbbbb",
                 c = "ccccccccccccc",
                 d = "ddddddddddddd")
-test(2125.08, print(DT, trunc.cols=TRUE),
-     output=c("   a             b",
-              "1: 0 bbbbbbbbbbbbb",
-              "2: 0 bbbbbbbbbbbbb",
-              "2 variables not shown: [c, d]"))
+test(2125.08, capture.output(print(DT, trunc.cols=TRUE)),
+     c("   a             b",
+       "1: 0 bbbbbbbbbbbbb",
+       "2: 0 bbbbbbbbbbbbb",
+       "2 variables not shown: [c, d]"))
 options("width" = 10)
 DT = data.table(a = "aaaaaaaaaaaaa",
                 b = "bbbbbbbbbbbbb",
                 c = "ccccccccccccc",
                 d = "ddddddddddddd")
-test(2125.09, print(DT, trunc.cols=TRUE),
-     output="4 variables not shown: [a, b, c, d]")
-test(2125.10, print(DT, trunc.cols=TRUE, class=TRUE),
-     output="4 variables not shown: [a <char>, b <char>, c <char>, d <char>]")
+test(2125.09, capture.output(print(DT, trunc.cols=TRUE)),
+     "4 variables not shown: [a, b, c, d]")
+test(2125.10, capture.output(print(DT, trunc.cols=TRUE, class=TRUE)),
+     "4 variables not shown: [a <char>, b <char>, c <char>, d <char>]")
 options(old_width)
 
 # segfault when i is NULL or zero-column, #4060
@@ -17987,7 +18014,7 @@ test(2230.4, setDF(merge(DT, y, by="k2", incomparables=c(1, NA, 4, 5))), merge(x
 test(2230.5, setDF(merge(DT, y, by="k2", incomparables=c(NA, 3, 4, 5))), merge(x, y, by="k2", incomparables=c(NA,3,4,5)))
 test(2230.6, merge(DT, y, by="k2", unk=1), merge(DT, y, by="k2"), warning="Unknown argument 'unk' has been passed.")
 test(2230.7, merge(DT, y, by="k2", NULL, NULL, FALSE, FALSE, FALSE, TRUE, c(".x", ".y"), TRUE, getOption("datatable.allow.cartesian"), NULL, 1L),
-             merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments."))
+             merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x/by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments."))
 
 # weighted.mean GForce optimized, #3977
 old = options(datatable.optimize=1L)
@@ -18501,9 +18528,7 @@ rm(.datatable.aware)
 # tests for trunc.char handling wide characters # 5096
 local({
   lc_ctype = Sys.getlocale('LC_CTYPE')
-  # Japanese multibyte characters require utf8. As of 2025, we're likely to be already running in a UTF-8 locale, but if not, try this setlocale() call as a last chance.
-  # Unfortunately, there is no guaranteed, portable way of switching to UTF-8 US English.
-  if (!l10n_info()$`UTF-8`) Sys.setlocale('LC_CTYPE', "en_US.UTF-8")
+  Sys.setlocale('LC_CTYPE', "en_US.UTF-8") # Japanese multibyte characters require utf8
   on.exit(Sys.setlocale('LC_CTYPE', lc_ctype))
   accented_a = "\u0061\u0301"
   ja_ichi = "\u4E00"
@@ -20625,14 +20650,13 @@ test(2294.72,
                     label = list(character = "C3", VCharA = "Total", integer = 2L))),
      warning = "For the following variables, the 'label' value was already in the data: [VCharB (label: C3), VIntA (label: 2)]")
 
-# tests 1-3 disabled -- fix for #4784 causes various breaking changes, at least partially covered by 2295.4+.
 # setDT no longer leaks class modification to origin copy, #4784
-# d1 = data.frame(a=1, row.names='b')
-# d2 = d1
-# setDT(d2)
-# test(2295.1, !is.data.table(d1))
-# test(2295.2, rownames(d1), 'b')
-# test(2295.3, is.data.table(d2))
+d1 = data.frame(a=1, row.names='b')
+d2 = d1
+setDT(d2)
+test(2295.1, !is.data.table(d1))
+test(2295.2, rownames(d1), 'b')
+test(2295.3, is.data.table(d2))
 # Ensure against regression noted in #6725
 x = data.frame(a=1)
 e = environment()
@@ -20645,18 +20669,6 @@ e = new.env(parent=topenv())
 e$x = data.frame(a=1)
 foo('x', e)
 test(2295.5, is.data.table(e$x))
-# More regressions noted in #6735
-baz = function(x) setDT(x)
-foo = function(x) {
-  bar = function() baz(x)
-  x = data.frame(a=1)
-  bar()
-  is.data.table(x)
-}
-test(2295.6, foo())
-x = data.frame(a=1)
-baz(x)
-test(2295.7, is.data.table(x))
 
 # #6588: .checkTypos used to give arbitrary strings to stopf as the first argument
 test(2296, d2[x %no such operator% 1], error = '%no such operator%')
@@ -20664,8 +20676,8 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%')
 # fix coercing integer/double for joins on multiple columns, #6602
 x = data.table(a=1L)
 y = data.table(c=1L, d=1)
-test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double")
-test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double")
+test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
+test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
 x = data.table(a=1)
 y = data.table(c=1, d=1L)
 test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer")
@@ -20757,33 +20769,29 @@ test(2303.2, DT[, .(N=1L), by=.(b=rev(a))], data.table(b=2:1, N=1L)) # ensure no
 DT = data.table(a=2:3, b=1:0, key=c('a', 'b'))
 test(2303.3, DT[, .N, by=.(ab=a^b, d=c(1L, 1L))], data.table(ab=c(2, 1), d=1L, N=1L))
 
-# NB: these tests have been edited in light of #6740 to be regression tests
-#   preventing existing behavior from breaking in 1.17.0 while we decide
-#   whether a breaking change is warranted & how to proceed. The specific tests
-#   with different behavior under #5558 001,002,005,006,011,012,015,016.
 # tests for new consistent replacement of list columns with list(NULL), #5558
 # replacement of a list column with list(NULL) in a single-row data.table, using different assignment methods
 DT = data.table(L=list("A"), i=1L)
 ans = data.table(L=list(NULL), i=1L)
 # test using replacement with $ operator
 DT$L = list(NULL)
-test(2304.001, DT, within(ans, rm('L')))
+test(2304.001, DT, ans)
 DT = data.table(L=list("A"), i=1L)
 # standard form with := operator
-test(2304.002, copy(DT)[, L := list(NULL)], within(ans, rm('L')))
+test(2304.002, copy(DT)[, L := list(NULL)], ans)
 # functional form with := operator
 test(2304.003, copy(DT)[, `:=`(L=list(NULL))], ans)
 # functional form with 'let' alias
 test(2304.004, copy(DT)[, let(L=list(NULL))], ans)
 # using set()
-test(2304.005, set(copy(DT), j="L", value=list(NULL)), within(ans, rm('L')))
+test(2304.005, set(copy(DT), j="L", value=list(NULL)), ans)
 
 # replacement of multiple list columns with list(NULL) in a single-row data.table, using different assignment methods
 DT = data.table(L1=list("A"), L2=list("B"), i=1L)
 ans = data.table(L1=list(NULL), L2=list(NULL), i=1L)
 DT$L1 = list(NULL)
 DT$L2 = list(NULL)
-test(2304.006, DT, within(ans, rm('L1', 'L2')))
+test(2304.006, DT, ans)
 DT = data.table(L1=list("A"), L2=list("B"), i=1L)
 # standard form with := operator
 test(2304.007, copy(DT)[, c("L1", "L2") := list(list(NULL), list(NULL))], ans)
@@ -20799,23 +20807,23 @@ DT = data.table(L=list("A", "B"), i=1L)
 ans = data.table(L=list(NULL, NULL), i=1L)
 # test using replacement with $ operator
 DT$L = list(NULL)
-test(2304.011, DT, within(ans, rm('L')))
+test(2304.011, DT, ans)
 DT = data.table(L=list("A", "B"), i=1L)
 # standard form with := operator
-test(2304.012, copy(DT)[, L := list(NULL)], within(ans, rm('L')))
+test(2304.012, copy(DT)[, L := list(NULL)], ans)
 # functional form with := operator
 test(2304.013, copy(DT)[, `:=`(L=list(NULL))], ans)
 # functional form with 'let' alias
 test(2304.014, copy(DT)[, let(L=list(NULL))], ans)
 # using set()
-test(2304.015, set(copy(DT), j="L", value=list(NULL)), within(ans, rm('L')))
+test(2304.015, set(copy(DT), j="L", value=list(NULL)), ans)
 
 # replacement of multiple list columns with list(NULL) in a multi-row data.table, using different assignment methods
 DT = data.table(L1=list("A", "B"), L2=list("B", "C"), i=1L)
 ans = data.table(L1=list(NULL, NULL), L2=list(NULL, NULL), i=1L)
 DT$L1 = list(NULL)
 DT$L2 = list(NULL)
-test(2304.016, DT, within(ans, rm('L1', 'L2')))
+test(2304.016, DT, ans)
 DT = data.table(L1=list("A", "B"), L2=list("B", "C"), i=1L)
 # standard form with := operator
 test(2304.017, copy(DT)[, c("L1", "L2") := list(list(NULL), list(NULL))], ans)
@@ -21041,46 +21049,3 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list(
 
 # the integer overflow in #6729 is only noticeable with UBSan
 test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE })
-
-<<<<<<< HEAD
-if (exists("sort_by", "package:base")) {
-  # sort_by.data.table
-  DT1 = data.table(a=c(1, 3, 2, NA, 3), b=4:0)
-  DT2 = data.table(a=c("c", "a", "B")) # data.table uses C-locale and should sort_by if cedta()
-  DT3 = data.table(a=c(1, 2, 3), b=list(c("a", "b", "", NA), c(1, 3, 2, 0), c(TRUE, TRUE, FALSE, NA))) # list column
-
-  # sort_by.data.table: basics
-  test(2306.01, sort_by(DT1, ~a + b), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
-  test(2306.02, sort_by(DT1, ~I(a + b)), data.table(a=c(3, 2, 1, 3, NA), b=c(0L, 2L, 4L, 3L, 1L)))
-  test(2306.03, sort_by(DT2, ~a), data.table(a=c("B", "a", "c")))
-
-  # sort_by.data.table: list columns.
-  # NOTE 1: .formula2varlist works well with list columns.
-  # NOTE 2: 4 elem in DT of 3 row because forderv takes a list column as a DT.
-  test(2306.04, sort_by(DT3, ~b), DT3[order(b)]) # should be consistent.
-
-  # sort_by.data.table: additional C-locale sorting
-  test(2306.10, DT2[, sort_by(.SD, a)], data.table(a=c("B", "a", "c")))
-  test(2306.11, DT2[, sort_by(.SD, ~a)], data.table(a=c("B", "a", "c")))
-
-  # sort_by.data.table: various working interfaces
-  test(2306.20, sort_by(DT1, list(DT1$a, DT1$b)), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
-  test(2306.21, sort_by(DT1, DT1[, .(a, b)]), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
-  test(2306.22, DT1[, sort_by(.SD, .(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
-  test(2306.23, DT1[, sort_by(.SD, ~a + b)], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
-  test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
-}
-=======
-#test for enhancing error message of invalid column #6512
-uniq_diff <- function(...) {
-  .Call("uniq_diff", ...)
-}
-capture_error_message <- function(expr) {
-  msg <- tryCatch(
-    { expr; NULL }, 
-    error = function(e) e$message
-  )
-  msg
-}
-test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))  # Ensure -1 is reported}, TRUE)
->>>>>>> 1b4a51d3 (add test case)

From a26924cd6ecbf08c79b19f0e686b593f8232308f Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 01:55:43 +0530
Subject: [PATCH 10/25] add test

---
 inst/tests/tests.Rraw | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 1d31d232ce..c443f26a63 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21049,3 +21049,16 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list(
 
 # the integer overflow in #6729 is only noticeable with UBSan
 test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE })
+
+#test for enhancing error message of invalid column #6512
+uniq_diff <- function(...) {
+  .Call("uniq_diff", ...)
+}
+capture_error_message <- function(expr) {
+  msg <- tryCatch(
+    { expr; NULL }, 
+    error = function(e) e$message
+  )
+  msg
+}
+test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)

From 7b74601648bb3c336a2d79234e5317454f4a4972 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 02:13:53 +0530
Subject: [PATCH 11/25] add test result

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index c443f26a63..2a9346744a 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21050,7 +21050,7 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list(
 # the integer overflow in #6729 is only noticeable with UBSan
 test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE })
 
-#test for enhancing error message of invalid column #6512
+# test for enhancing error message of invalid column #6512
 uniq_diff <- function(...) {
   .Call("uniq_diff", ...)
 }

From ec3ea650a904a44f036d630c2d3ea9486b0f8718 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 03:01:53 +0530
Subject: [PATCH 12/25] test added 1

---
 inst/tests/tests.Rraw | 13 +++++++++++++
 src/fmelt.c           |  4 +---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 2a9346744a..5acbc61b24 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21062,3 +21062,16 @@ capture_error_message <- function(expr) {
   msg
 }
 test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)
+
+# test for enhancing error message of invalid column #6512
+uniq_diff <- function(...) {
+  .Call("uniq_diff", ...)
+}
+capture_error_message <- function(expr) {
+  msg <- tryCatch(
+    { expr; NULL }, 
+    error = function(e) e$message
+  )
+  msg
+}
+test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)
\ No newline at end of file
diff --git a/src/fmelt.c b/src/fmelt.c
index 8eb257ae2e..f031cc350f 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -185,9 +185,7 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
   for (int i = 0; i < length(int_vec); ++i) {
     int col_number = INTEGER(int_vec)[i];
     bool good_number = (col_number > 0 && col_number <= ncol);
-    if (is_measure) {
-      good_number |= (col_number == NA_INTEGER);
-    }
+    if (is_measure) good_number |= (col_number == NA_INTEGER);
     if (!good_number) {
       invalid_col_ptr[invalid_count++] = col_number;
     } else if (!LOGICAL(is_duplicated)[i]) {

From ba8131c021aac5c55600491f05ce48883897e786 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 03:15:43 +0530
Subject: [PATCH 13/25] Revert "add test"

This reverts commit 11345b98356475fe050582894c48ea9f643cf504.
---
 inst/tests/tests.Rraw | 313 +++++++++++++++++++++++-------------------
 1 file changed, 174 insertions(+), 139 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 5acbc61b24..a59563829f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -159,9 +159,6 @@ TZnotUTC = !identical(tt,"") && !is_utc(tt)
 #   (3) function factory for matching messages exactly by substituting anything between delimiters [delim, fmt=TRUE]
 #   (4) function factory for matching messages exactly by substituting a generic string [fmt=string]
 get_msg = function(e, delim, fmt=FALSE) {
-  ufq = options(useFancyQuotes = FALSE) # otherwise we get angled quotes, hard to match robustly
-  on.exit(options(ufq))
-
   condition = tryCatch({e; NULL}, error=identity, warning=identity)
   if (is.null(condition)) return(condition)
   msg = condition$message
@@ -1364,43 +1361,20 @@ if (test_bit64) {
   test(431.5, DT[5,1:=as.integer64(NA)], data.table(a=factor(c(NA,NA,NA,NA,NA), levels=LETTERS[1:3]), b=1:5))
 }
 
-old = getOption("datatable.alloccol")  # Test that unsetting datatable.alloccol is caught, #2014
-options(datatable.alloccol=NULL) # In this =NULL case, options() in R 3.0.0 returned TRUE rather than the old value. This R bug was fixed in R 3.1.1.
-                                 # This is why getOption is called first rather than just using the result of option() like elsewhere in this test file.
-                                 # TODO: simplify this test if/when R dependency >= 3.1.1
-err1 = try(data.table(a=1:3), silent=TRUE)
-options(datatable.alloccol="1024")
-err2 = try(data.table(a=1:3), silent=TRUE)
-options(datatable.alloccol=c(10L,20L))
-err3 = try(data.table(a=1:3), silent=TRUE)
-options(datatable.alloccol=NA_integer_)
-err4 = try(data.table(a=1:3), silent=TRUE)
-options(datatable.alloccol=-1)
-err5 = try(data.table(a=1:3), silent=TRUE)
-options(datatable.alloccol=1024L)   # otherwise test() itself fails in its internals with the alloc.col error
-test(432.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1))
-test(432.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2))
-test(432.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3))
-test(432.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4))
-test(432.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5))
+# Test that unsetting datatable.alloccol is caught, #2014
+test(432.1, data.table(a=1:3), options=list(datatable.alloccol=NULL), error="Has getOption('datatable.alloccol') somehow become unset?")
+test(432.2, data.table(a=1:3), options=c(datatable.alloccol="1024"), error="getOption('datatable.alloccol') should be a number, by default 1024. But its type is 'character'.")
+test(432.3, data.table(a=1:3), options=list(datatable.alloccol=c(10L,20L)), error="is a numeric vector ok but its length is 2. Its length should be 1.")
+test(432.4, data.table(a=1:3), options=c(datatable.alloccol=NA_integer_), error="It must be >=0 and not NA.")
+test(432.5, data.table(a=1:3), options=c(datatable.alloccol=-1), error="It must be >=0 and not NA.")
+
 # Repeat the tests but this time with subsetting, to ensure the validity check on option happens for those too
 DT = data.table(a=1:3, b=4:6)
-options(datatable.alloccol=NULL)
-err1 = try(DT[2,], silent=TRUE)
-options(datatable.alloccol="1024")
-err2 = try(DT[,2], silent=TRUE)
-options(datatable.alloccol=c(10L,20L))
-err3 = try(DT[a>1], silent=TRUE)
-options(datatable.alloccol=NA_integer_)
-err4 = try(DT[,"b"], silent=TRUE)
-options(datatable.alloccol=-1)
-err5 = try(DT[2,"b"], silent=TRUE)
-options(datatable.alloccol=1024L)   # otherwise test() itself fails in its internals with the alloc.col error
-test(433.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1))
-test(433.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2))
-test(433.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3))
-test(433.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4))
-test(433.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5))
+test(433.1, DT[2,], options=list(datatable.alloccol=NULL), error="Has getOption('datatable.alloccol') somehow become unset?")
+test(433.2, DT[,2], options=c(datatable.alloccol="1024"), error="getOption('datatable.alloccol') should be a number, by default 1024. But its type is 'character'.")
+test(433.3, DT[a>1], options=list(datatable.alloccol=c(10L,20L)), error="is a numeric vector ok but its length is 2. Its length should be 1.")
+test(433.4, DT[,"b"], options=c(datatable.alloccol=NA_integer_), error="It must be >=0 and not NA.")
+test(433.5, DT[2,"b"], options=c(datatable.alloccol=-1), error="It must be >=0 and not NA.")
 
 # simple realloc test
 DT = data.table(a=1:3,b=4:6)
@@ -7066,7 +7040,7 @@ ee = new.env()
 ee$DT = data.frame(x=1L, y=1:3)
 setattr(ee$DT, 'class', c("data.table", "data.frame"))
 test(1482.1, truelength(ee$DT), 0L) # make sure that the simulated environment is right.
-test(1482.2, ee$DT[, z := 3:1], data.table(x=1L, y=1:3, z=3:1), warning="Invalid .internal.selfref detected and")
+test(1482.2, ee$DT[, z := 3:1], data.table(x=1L, y=1:3, z=3:1), warning="A shallow copy of this data.table was taken")
 test(1482.3, truelength(ee$DT), 1027L)
 test(1482.4, ee$DT[, za := 4:6], data.table(x=1L, y=1:3, z=3:1, za=4:6))
 test(1482.5, truelength(ee$DT), 1027L)   # should have used spare slot i.e. no increase in tl
@@ -7891,7 +7865,7 @@ test(1551.5, fread(str),
 rhs = setDT(read.table(testDir("issue_1095_fread.txt.bz2"), sep=",", comment.char="", stringsAsFactors=FALSE, quote="", strip.white=TRUE))
 if (test_R.utils) {
   test(1551.61, fread(testDir("issue_1095_fread.txt.bz2"), logical01=FALSE), rhs, warning=w)
-  rhs[, names(.SD) := lapply(.SD, \(x) x == "Y"), .SDcols = c("V16", "V17", "V45")]
+  rhs[, names(.SD) := lapply(.SD, function(x) x == "Y"), .SDcols = c("V16", "V17", "V45")]
   test(1551.62, fread(testDir("issue_1095_fread.txt.bz2"), logical01=FALSE, logicalYN=TRUE), rhs, warning=w)
 }
 
@@ -8567,14 +8541,13 @@ DT1 = data.table(a=1)
 test(1601.1, merge(DT1, DT1, by="a"), data.table(a=1, key="a"))
 test(1601.2, merge(DT1, DT0, by="a"),
      warning="Input data.table 'y' has no columns.",
-     error="Elements listed in `by`")
+     error="The following columns listed in `by` are missing from y: [a]")
 test(1601.3, merge(DT0, DT1, by="a"),
      warning="Input data.table 'x' has no columns.",
-     error="Elements listed in `by`")
+     error="The following columns listed in `by` are missing from x: [a]")
 test(1601.4, merge(DT0, DT0, by="a"),
      warning="Neither of the input data.tables to join have columns.",
-     error="Elements listed in `by`")
-
+     error="The following columns listed in `by` are missing from x: [a]")
 # fix for #1549
 d1 <- data.table(v1=1:2,x=x)
 d2 <- data.table(v1=3:4)
@@ -8716,17 +8689,17 @@ test(1613.21, all.equal(DT2, DT1, ignore.row.order = TRUE), "Dataset 'current' h
 # test attributes: key
 DT1 <- data.table(a = 1:4, b = letters[1:4], key = "a")
 DT2 <- data.table(a = 1:4, b = letters[1:4])
-test(1613.22, all.equal(DT1, DT2), "Datasets have different keys. 'target': [a]. 'current': has no key.")
+test(1613.22, all.equal(DT1, DT2), output="Datasets have different keys. 'target': [a]. 'current': has no key.")
 test(1613.23, all.equal(DT1, DT2, check.attributes = FALSE), TRUE)
 test(1613.24, all.equal(DT1, setkeyv(DT2, "a"), check.attributes = TRUE), TRUE)
 # test attributes: index
 DT1 <- data.table(a = 1:4, b = letters[1:4])
 DT2 <- data.table(a = 1:4, b = letters[1:4])
 setindexv(DT1, "b")
-test(1613.25, all.equal(DT1, DT2), "Datasets have different indices. 'target': [b]. 'current': has no index.")
+test(1613.25, all.equal(DT1, DT2), output="Datasets have different indices. 'target': [b]. 'current': has no index.")
 test(1613.26, all.equal(DT1, DT2, check.attributes = FALSE), TRUE)
-test(1613.27, all.equal(DT1, setindexv(DT2, "a")), "Datasets have different indices. 'target': [b]. 'current': [a].")
-test(1613.28, all.equal(DT1, setindexv(DT2, "b")), "Datasets have different indices. 'target': [b]. 'current': [a, b].")
+test(1613.27, all.equal(DT1, setindexv(DT2, "a")), output="Datasets have different indices. 'target': [b]. 'current': [a].")
+test(1613.28, all.equal(DT1, setindexv(DT2, "b")), output="Datasets have different indices. 'target': [b]. 'current': [a, b].")
 test(1613.29, all.equal(DT1, setindexv(setindexv(DT2, NULL), "b")), TRUE)
 # test custom attribute
 DT1 <- data.table(a = 1:4, b = letters[1:4])
@@ -10995,7 +10968,8 @@ DT = data.table(
   D = as.POSIXct(dt<-paste(d,t), tz="UTC"),
   E = as.POSIXct(paste0(dt,c(".999",".0",".5",".111112",".123456",".023",".0",".999999",".99",".0009")), tz="UTC"))
 
-test(1740.1, fwrite(DT,dateTimeAs="iso"), error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv"))
+test(1740.1, fwrite(DT,dateTimeAs="iso"),
+     error=base_messages$match_arg_4_choices("ISO", "squash", "epoch", "write.csv"))
 test(1740.2, fwrite(DT,dateTimeAs=c("ISO","squash")), error=base_messages$match_arg_length)
 test(1740.3, capture.output(fwrite(DT,dateTimeAs="ISO")), c(
 "A,B,C,D,E",
@@ -11106,7 +11080,7 @@ test(1743.123, fread("a,b\n1+3i,2015-01-01", colClasses=c(NA,"IDate")), data.tab
 test(1743.13, lapply(fread("a,b\n09/05/98,2015-01-01", colClasses = "Date"), class), y=list(a="character", b=c("IDate", "Date")), warning=base_messages$ambiguous_date_fmt)
 
 ## Just invalid
-test(1743.14, options = c(useFancyQuotes = FALSE),
+test(1743.14,
      sapply(fread("a,b\n2017-01-01,1", colClasses=c("foo", "integer")), class), c(a="character", b="integer"),
      warning=base_messages$missing_coerce_method)
 test(1743.15, sapply(fread("a,b\n2017-01-01,1", colClasses=c("foo", "integer")), class), c(a="character", b="integer"), warning="the column has been left as type .*character")
@@ -11813,15 +11787,15 @@ test(1775.1, capture.output(print(DT1, print.keys = TRUE)),
      c("Key: <a>", "   a", "1: 1", "2: 2", "3: 3"))
 DT2 <- data.table(a = 1:3, b = 4:6)
 setindexv(DT2, c("b","a"))
-test(1775.2, capture.output(print(DT2, print.keys = TRUE)),
-     c("Index: <b__a>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6"))
+test(1775.2, print(DT2, print.keys = TRUE),
+     output=c("Index: <b__a>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6"))
 setindexv(DT2, "b")
-test(1775.3, capture.output(print(DT2, print.keys = TRUE)),
-     c("Indices: <b__a>, <b>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6"))
+test(1775.3, print(DT2, print.keys = TRUE),
+     output=c("Indices: <b__a>, <b>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6"))
 setkey(DT2, a)
 setindexv(DT2, c("b","a"))
-test(1775.4, capture.output(print(DT2, print.keys = TRUE)),
-     c("Key: <a>", "Indices: <b__a>, <b>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6")) ## index 'b' is still good, so we keep it
+test(1775.4, print(DT2, print.keys = TRUE),
+     output=c("Key: <a>", "Indices: <b__a>, <b>", "   a b", "1: 1 4", "2: 2 5", "3: 3 6")) ## index 'b' is still good, so we keep it
 
 # dev regression #2285
 cat("A  B  C\n1  2  3\n4  5  6", file=f<-tempfile())
@@ -12145,8 +12119,7 @@ test(1831.4, fread(paste0("A\n", "1.", src2)), data.table(A=1.1234567890098766))
 DT = as.data.table(matrix(5L, nrow=10, ncol=10))
 test(1832.1, fwrite(DT, f<-tempfile(), verbose=TRUE), output="Column writers")
 DT = as.data.table(matrix(5L, nrow=10, ncol=60))
-# Using capture.output directly to look for the "..." because test(,output=) intercepts [] for convenience elsewhere
-test(1832.2, any(grepl("^Column writers.* [.][.][.] ", capture.output(fwrite(DT, f, verbose=TRUE)))))
+test(1832.2, fwrite(DT, f, verbose=TRUE), output = "\nColumn writers.* [.][.][.] ")
 unlink(f)
 
 # ensure explicitly setting select to default value doesn't error, #2007
@@ -13546,14 +13519,14 @@ test(1962.016, merge(DT1, DT2, by.x = 'a', by.y = c('a', 'V')),
 test(1962.017, merge(DT1, DT2, by = 'V', by.x = 'a', by.y = 'a'),
      data.table(a = 2:3, V.x = c("a", "a"), V.y = c("b", "b"), key = 'a'),
      warning = 'Supplied both.*argument will be ignored')
-test(1962.018, merge(DT1, DT2, by.x = 'z', by.y = 'a'),
-     error = 'Elements listed in `by.x`')
-test(1962.019, merge(DT1, DT2, by.x = 'a', by.y = 'z'),
-     error = 'Elements listed in `by.y`')
+test(1962.018, merge(DT1, DT2, by.x='z', by.y='a'),
+     error="The following columns listed in `by.x` are missing from x: [z]")
+test(1962.019, merge(DT1, DT2, by.x='a', by.y='z'),
+     error="The following columns listed in `by.y` are missing from y: [z]")
 test(1962.0201, merge(DT1, DT2, by=character(0L)), ans)  # was error before PR#5183
 test(1962.0202, merge(DT1, DT2, by=NULL),          ans)  # test explicit NULL too as missing() could be used inside merge()
-test(1962.021, merge(DT1, DT2, by = 'z'),
-     error = 'must be valid column names in x and y')
+test(1962.021, merge(DT1, DT2, by='z'),
+     error='The following columns listed in `by` are missing from x: [z]')
 
 ## frank.R
 x = c(1, 1, 2, 5, 4, 3, 4, NA, 6)
@@ -14942,7 +14915,7 @@ test(2037.1, foo(DT), output='Please remember to always setDT()')
 # no assignment was made to DT
 test(2037.2, names(DT), 'a')
 # _selrefok() verbose message was duplicated
-test(2037.3, unname(table(unlist(strsplit(capture.output(foo(DT)), '\n|\\s+')))['ptr']), 1L)
+test(2037.3, foo(DT), output="data.table internal attributes", notOutput="data.table internal attributes.*data.table internal attributes")
 
 # `between` invalid args, and verbose #3516
 test(2038.01, between(1:5, 2, 4, incbounds=423), error="incbounds must be TRUE or FALSE")
@@ -15166,13 +15139,13 @@ test(2044.60, dt1[dt2, ..cols, on="int==doubleInt", verbose=TRUE],
 test(2044.61, dt1[dt2, ..cols, on="int==realDouble", verbose=TRUE],  # this was wrong in v1.12.2 (the fractions were truncated and joined to next lowest int)
               data.table(x.bool=c(NA,FALSE,NA,FALSE,NA), x.int=INT(NA,1,NA,2,NA), x.doubleInt=c(NA,1,NA,2,NA),
                          i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
-              output="Coercing integer column x.int to type double to match type of i.realDouble which contains fractions")
+              output="Coercing integer column x.int to type double to match type of i.realDouble .which contains fractions.")
 test(2044.62, dt1[dt2, ..cols, on="doubleInt==int", verbose=TRUE],
               data.table(x.bool=FALSE, x.int=1:5, x.doubleInt=as.double(1:5), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
-              output="Coercing integer column i.int to type double for join to match type of x.doubleInt")
+              output="Coercing integer column i.int .for join. to type double to match type of x.doubleInt")
 test(2044.63, dt1[dt2, ..cols, on="realDouble==int", verbose=TRUE],
               data.table(x.bool=c(rep(FALSE,4),TRUE), x.int=INT(2,4,6,8,10), x.doubleInt=c(2,4,6,8,10), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
-              output="Coercing integer column i.int to type double for join to match type of x.realDouble")
+              output="Coercing integer column i.int .for join. to type double to match type of x.realDouble")
 cols = c("x.int","x.char","x.fact","i.int","i.char","i.char")
 test(2044.64, dt1[dt2, ..cols, on="char==fact", verbose=TRUE],
               ans<-data.table(x.int=1:5, x.char=letters[1:5], x.fact=factor(letters[1:5]), i.int=1:5, i.char=letters[1:5], i.char=letters[1:5]),
@@ -15207,15 +15180,15 @@ if (test_bit64) {
 dt1 = data.table(a=1,  b=NA_character_)
 dt2 = data.table(a=2L, b=NA)
 test(2044.80, dt1[dt2, on="a==b",             verbose=TRUE], data.table(a=NA, b=NA_character_, i.a=2L),
-              output=msg<-"Coercing all-NA logical column i.b to type double to match type of x.a")
+              output=msg<-"Coercing logical column i.b .all-NA. to type double to match type of x.a")
 test(2044.81, dt1[dt2, on="a==b", nomatch=0L, verbose=TRUE], data.table(a=logical(), b=character(), i.a=integer()),
               output=msg)
 test(2044.82, dt1[dt2, on="b==b",             verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
-              output=msg<-"Coercing all-NA logical column i.b to type character to match type of x.b")
+              output=msg<-"Coercing logical column i.b .all-NA. to type character to match type of x.b")
 test(2044.83, dt1[dt2, on="b==b", nomatch=0L, verbose=TRUE], data.table(a=1, b=NA, i.a=2L),
               output=msg)
 test(2044.84, dt1[dt2, on="b==a",             verbose=TRUE], data.table(a=NA_real_, b=2L, i.b=NA),
-              output=msg<-"Coercing all-NA character column x.b to type integer to match type of i.a")
+              output=msg<-"Coercing character column x.b .all-NA. to type integer to match type of i.a")
 test(2044.85, dt1[dt2, on="b==a", nomatch=0L, verbose=TRUE], data.table(a=double(), b=integer(), i.b=logical()),
               output=msg)
 
@@ -15447,7 +15420,7 @@ L = list(1:3, NULL, 4:6)
 test(2058.18, length(L), 3L)
 test(2058.19, as.data.table(L), data.table(V1=1:3, V2=4:6))  # V2 not V3        # no
 DT = data.table(a=1:3, b=c(4,5,6))
-test(2058.20, DT[,b:=list(NULL)], data.table(a=1:3, b=list(NULL)))              # no
+test(2058.20, DT[,b:=list(NULL)], data.table(a=1:3))                            # no
 
 # rbindlist improved error message, #3638
 DT = data.table(a=1)
@@ -15642,7 +15615,7 @@ i = data.table(date = dbl_date, key = 'date')
 test(2064.1, x[i, class(date), verbose=TRUE], 'Date',
              output="Coercing double column i.date (which contains no fractions) to type integer to match type of x.date")
 test(2064.2, i[x, class(date), verbose=TRUE], 'Date',
-             output="Coercing integer column i.date to type double for join to match type of x.date")
+             output="Coercing integer column i.date .for join. to type double to match type of x.date")
 
 # complex values in grouping, #3639
 set.seed(42)
@@ -16571,69 +16544,69 @@ DT = data.table(a = vector("integer", 102L),
                 b = "bbbbbbbbbbbbb",
                 c = "ccccccccccccc",
                 d = c("ddddddddddddd", "d"))
-test(2125.02, capture.output(print(DT, trunc.cols=TRUE)),
-     c("     a             b             c",
-       "  1: 0 bbbbbbbbbbbbb ccccccccccccc",
-       "  2: 0 bbbbbbbbbbbbb ccccccccccccc",
-       "  3: 0 bbbbbbbbbbbbb ccccccccccccc",
-       "  4: 0 bbbbbbbbbbbbb ccccccccccccc",
-       "  5: 0 bbbbbbbbbbbbb ccccccccccccc",
-       " ---                              ",
-       " 98: 0 bbbbbbbbbbbbb ccccccccccccc",
-       " 99: 0 bbbbbbbbbbbbb ccccccccccccc",
-       "100: 0 bbbbbbbbbbbbb ccccccccccccc",
-       "101: 0 bbbbbbbbbbbbb ccccccccccccc",
-       "102: 0 bbbbbbbbbbbbb ccccccccccccc",
-       "1 variable not shown: [d]"))
-test(2125.03, capture.output(print(DT, trunc.cols=TRUE, row.names=FALSE)),
-     c("   a             b             c",
-       "   0 bbbbbbbbbbbbb ccccccccccccc",
-       "   0 bbbbbbbbbbbbb ccccccccccccc",
-       "   0 bbbbbbbbbbbbb ccccccccccccc",
-       "   0 bbbbbbbbbbbbb ccccccccccccc",
-       "   0 bbbbbbbbbbbbb ccccccccccccc",
-       " ---           ---           ---",
-       "   0 bbbbbbbbbbbbb ccccccccccccc",
-       "   0 bbbbbbbbbbbbb ccccccccccccc",
-       "   0 bbbbbbbbbbbbb ccccccccccccc",
-       "   0 bbbbbbbbbbbbb ccccccccccccc",
-       "   0 bbbbbbbbbbbbb ccccccccccccc",
-       "1 variable not shown: [d]" ))
+test(2125.02, print(DT, trunc.cols=TRUE),
+     output=c("     a             b             c",
+              "  1: 0 bbbbbbbbbbbbb ccccccccccccc",
+              "  2: 0 bbbbbbbbbbbbb ccccccccccccc",
+              "  3: 0 bbbbbbbbbbbbb ccccccccccccc",
+              "  4: 0 bbbbbbbbbbbbb ccccccccccccc",
+              "  5: 0 bbbbbbbbbbbbb ccccccccccccc",
+              " ---                              ",
+              " 98: 0 bbbbbbbbbbbbb ccccccccccccc",
+              " 99: 0 bbbbbbbbbbbbb ccccccccccccc",
+              "100: 0 bbbbbbbbbbbbb ccccccccccccc",
+              "101: 0 bbbbbbbbbbbbb ccccccccccccc",
+              "102: 0 bbbbbbbbbbbbb ccccccccccccc",
+              "1 variable not shown: [d]"))
+test(2125.03, print(DT, trunc.cols=TRUE, row.names=FALSE),
+     output=c("   a             b             c",
+              "   0 bbbbbbbbbbbbb ccccccccccccc",
+              "   0 bbbbbbbbbbbbb ccccccccccccc",
+              "   0 bbbbbbbbbbbbb ccccccccccccc",
+              "   0 bbbbbbbbbbbbb ccccccccccccc",
+              "   0 bbbbbbbbbbbbb ccccccccccccc",
+              " ---           ---           ---",
+              "   0 bbbbbbbbbbbbb ccccccccccccc",
+              "   0 bbbbbbbbbbbbb ccccccccccccc",
+              "   0 bbbbbbbbbbbbb ccccccccccccc",
+              "   0 bbbbbbbbbbbbb ccccccccccccc",
+              "   0 bbbbbbbbbbbbb ccccccccccccc",
+              "1 variable not shown: [d]" ))
 # also testing #4266 -- getting width of row #s register right
 #   TODO: understand why 2 variables truncated here. a,b,c combined have width
 #     _exactly_ 40, but still wraps. If we set options(width=41) it won't truncate.
 #     seems to be an issue with print.default.
-test(2125.04, capture.output(print(DT, trunc.cols=TRUE, class=TRUE))[14L],
-     "2 variables not shown: [c <char>, d <char>]")
-test(2125.05, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE))[c(1,14)],
-     c("     a             b             c",
-       "1 variable not shown: [d <char>]" ))
-test(2125.06, capture.output(print(DT, trunc.cols=TRUE, col.names="none"))[c(1,12)],
-     c("  1: 0 bbbbbbbbbbbbb ccccccccccccc",
-       "1 variable not shown: [d]" ))
-test(2125.07, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"))[c(1,13)],
-     c("  1: 0 bbbbbbbbbbbbb",
-       "2 variables not shown: [c, d]" ),
+test(2125.04, print(DT, trunc.cols=TRUE, class=TRUE),
+     output="2 variables not shown: [c <char>, d <char>]")
+test(2125.05, print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE),
+     output=c("^     a             b             c", ".*",
+              "1 variable not shown: \\[d <char>\\]"))
+test(2125.06, print(DT, trunc.cols=TRUE, col.names="none"),
+     output=c("^  1: 0 bbbbbbbbbbbbb ccccccccccccc", ".*",
+              "1 variable not shown: \\[d\\]", ""))
+test(2125.07, print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"),
+     output=c("^  1: 0 bbbbbbbbbbbbb", ".*",
+              "2 variables not shown: \\[c, d\\]", ""),
      warning = "Column classes will be suppressed when col.names is 'none'")
 options("width" = 20)
 DT = data.table(a = vector("integer", 2),
                 b = "bbbbbbbbbbbbb",
                 c = "ccccccccccccc",
                 d = "ddddddddddddd")
-test(2125.08, capture.output(print(DT, trunc.cols=TRUE)),
-     c("   a             b",
-       "1: 0 bbbbbbbbbbbbb",
-       "2: 0 bbbbbbbbbbbbb",
-       "2 variables not shown: [c, d]"))
+test(2125.08, print(DT, trunc.cols=TRUE),
+     output=c("   a             b",
+              "1: 0 bbbbbbbbbbbbb",
+              "2: 0 bbbbbbbbbbbbb",
+              "2 variables not shown: [c, d]"))
 options("width" = 10)
 DT = data.table(a = "aaaaaaaaaaaaa",
                 b = "bbbbbbbbbbbbb",
                 c = "ccccccccccccc",
                 d = "ddddddddddddd")
-test(2125.09, capture.output(print(DT, trunc.cols=TRUE)),
-     "4 variables not shown: [a, b, c, d]")
-test(2125.10, capture.output(print(DT, trunc.cols=TRUE, class=TRUE)),
-     "4 variables not shown: [a <char>, b <char>, c <char>, d <char>]")
+test(2125.09, print(DT, trunc.cols=TRUE),
+     output="4 variables not shown: [a, b, c, d]")
+test(2125.10, print(DT, trunc.cols=TRUE, class=TRUE),
+     output="4 variables not shown: [a <char>, b <char>, c <char>, d <char>]")
 options(old_width)
 
 # segfault when i is NULL or zero-column, #4060
@@ -18014,7 +17987,7 @@ test(2230.4, setDF(merge(DT, y, by="k2", incomparables=c(1, NA, 4, 5))), merge(x
 test(2230.5, setDF(merge(DT, y, by="k2", incomparables=c(NA, 3, 4, 5))), merge(x, y, by="k2", incomparables=c(NA,3,4,5)))
 test(2230.6, merge(DT, y, by="k2", unk=1), merge(DT, y, by="k2"), warning="Unknown argument 'unk' has been passed.")
 test(2230.7, merge(DT, y, by="k2", NULL, NULL, FALSE, FALSE, FALSE, TRUE, c(".x", ".y"), TRUE, getOption("datatable.allow.cartesian"), NULL, 1L),
-             merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x/by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments."))
+             merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments."))
 
 # weighted.mean GForce optimized, #3977
 old = options(datatable.optimize=1L)
@@ -18528,7 +18501,9 @@ rm(.datatable.aware)
 # tests for trunc.char handling wide characters # 5096
 local({
   lc_ctype = Sys.getlocale('LC_CTYPE')
-  Sys.setlocale('LC_CTYPE', "en_US.UTF-8") # Japanese multibyte characters require utf8
+  # Japanese multibyte characters require utf8. As of 2025, we're likely to be already running in a UTF-8 locale, but if not, try this setlocale() call as a last chance.
+  # Unfortunately, there is no guaranteed, portable way of switching to UTF-8 US English.
+  if (!l10n_info()$`UTF-8`) Sys.setlocale('LC_CTYPE', "en_US.UTF-8")
   on.exit(Sys.setlocale('LC_CTYPE', lc_ctype))
   accented_a = "\u0061\u0301"
   ja_ichi = "\u4E00"
@@ -20650,13 +20625,14 @@ test(2294.72,
                     label = list(character = "C3", VCharA = "Total", integer = 2L))),
      warning = "For the following variables, the 'label' value was already in the data: [VCharB (label: C3), VIntA (label: 2)]")
 
+# tests 1-3 disabled -- fix for #4784 causes various breaking changes, at least partially covered by 2295.4+.
 # setDT no longer leaks class modification to origin copy, #4784
-d1 = data.frame(a=1, row.names='b')
-d2 = d1
-setDT(d2)
-test(2295.1, !is.data.table(d1))
-test(2295.2, rownames(d1), 'b')
-test(2295.3, is.data.table(d2))
+# d1 = data.frame(a=1, row.names='b')
+# d2 = d1
+# setDT(d2)
+# test(2295.1, !is.data.table(d1))
+# test(2295.2, rownames(d1), 'b')
+# test(2295.3, is.data.table(d2))
 # Ensure against regression noted in #6725
 x = data.frame(a=1)
 e = environment()
@@ -20669,6 +20645,18 @@ e = new.env(parent=topenv())
 e$x = data.frame(a=1)
 foo('x', e)
 test(2295.5, is.data.table(e$x))
+# More regressions noted in #6735
+baz = function(x) setDT(x)
+foo = function(x) {
+  bar = function() baz(x)
+  x = data.frame(a=1)
+  bar()
+  is.data.table(x)
+}
+test(2295.6, foo())
+x = data.frame(a=1)
+baz(x)
+test(2295.7, is.data.table(x))
 
 # #6588: .checkTypos used to give arbitrary strings to stopf as the first argument
 test(2296, d2[x %no such operator% 1], error = '%no such operator%')
@@ -20676,8 +20664,8 @@ test(2296, d2[x %no such operator% 1], error = '%no such operator%')
 # fix coercing integer/double for joins on multiple columns, #6602
 x = data.table(a=1L)
 y = data.table(c=1L, d=1)
-test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
-test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a to type double.*Coercing .*c to type double")
+test(2297.01, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double")
+test(2297.02, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .for join. to type double.*Coercing .*c to type double")
 x = data.table(a=1)
 y = data.table(c=1, d=1L)
 test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer")
@@ -20769,29 +20757,33 @@ test(2303.2, DT[, .(N=1L), by=.(b=rev(a))], data.table(b=2:1, N=1L)) # ensure no
 DT = data.table(a=2:3, b=1:0, key=c('a', 'b'))
 test(2303.3, DT[, .N, by=.(ab=a^b, d=c(1L, 1L))], data.table(ab=c(2, 1), d=1L, N=1L))
 
+# NB: these tests have been edited in light of #6740 to be regression tests
+#   preventing existing behavior from breaking in 1.17.0 while we decide
+#   whether a breaking change is warranted & how to proceed. The specific tests
+#   with different behavior under #5558 001,002,005,006,011,012,015,016.
 # tests for new consistent replacement of list columns with list(NULL), #5558
 # replacement of a list column with list(NULL) in a single-row data.table, using different assignment methods
 DT = data.table(L=list("A"), i=1L)
 ans = data.table(L=list(NULL), i=1L)
 # test using replacement with $ operator
 DT$L = list(NULL)
-test(2304.001, DT, ans)
+test(2304.001, DT, within(ans, rm('L')))
 DT = data.table(L=list("A"), i=1L)
 # standard form with := operator
-test(2304.002, copy(DT)[, L := list(NULL)], ans)
+test(2304.002, copy(DT)[, L := list(NULL)], within(ans, rm('L')))
 # functional form with := operator
 test(2304.003, copy(DT)[, `:=`(L=list(NULL))], ans)
 # functional form with 'let' alias
 test(2304.004, copy(DT)[, let(L=list(NULL))], ans)
 # using set()
-test(2304.005, set(copy(DT), j="L", value=list(NULL)), ans)
+test(2304.005, set(copy(DT), j="L", value=list(NULL)), within(ans, rm('L')))
 
 # replacement of multiple list columns with list(NULL) in a single-row data.table, using different assignment methods
 DT = data.table(L1=list("A"), L2=list("B"), i=1L)
 ans = data.table(L1=list(NULL), L2=list(NULL), i=1L)
 DT$L1 = list(NULL)
 DT$L2 = list(NULL)
-test(2304.006, DT, ans)
+test(2304.006, DT, within(ans, rm('L1', 'L2')))
 DT = data.table(L1=list("A"), L2=list("B"), i=1L)
 # standard form with := operator
 test(2304.007, copy(DT)[, c("L1", "L2") := list(list(NULL), list(NULL))], ans)
@@ -20807,23 +20799,23 @@ DT = data.table(L=list("A", "B"), i=1L)
 ans = data.table(L=list(NULL, NULL), i=1L)
 # test using replacement with $ operator
 DT$L = list(NULL)
-test(2304.011, DT, ans)
+test(2304.011, DT, within(ans, rm('L')))
 DT = data.table(L=list("A", "B"), i=1L)
 # standard form with := operator
-test(2304.012, copy(DT)[, L := list(NULL)], ans)
+test(2304.012, copy(DT)[, L := list(NULL)], within(ans, rm('L')))
 # functional form with := operator
 test(2304.013, copy(DT)[, `:=`(L=list(NULL))], ans)
 # functional form with 'let' alias
 test(2304.014, copy(DT)[, let(L=list(NULL))], ans)
 # using set()
-test(2304.015, set(copy(DT), j="L", value=list(NULL)), ans)
+test(2304.015, set(copy(DT), j="L", value=list(NULL)), within(ans, rm('L')))
 
 # replacement of multiple list columns with list(NULL) in a multi-row data.table, using different assignment methods
 DT = data.table(L1=list("A", "B"), L2=list("B", "C"), i=1L)
 ans = data.table(L1=list(NULL, NULL), L2=list(NULL, NULL), i=1L)
 DT$L1 = list(NULL)
 DT$L2 = list(NULL)
-test(2304.016, DT, ans)
+test(2304.016, DT, within(ans, rm('L1', 'L2')))
 DT = data.table(L1=list("A", "B"), L2=list("B", "C"), i=1L)
 # standard form with := operator
 test(2304.017, copy(DT)[, c("L1", "L2") := list(list(NULL), list(NULL))], ans)
@@ -21050,6 +21042,49 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list(
 # the integer overflow in #6729 is only noticeable with UBSan
 test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE })
 
+<<<<<<< HEAD
+if (exists("sort_by", "package:base")) {
+  # sort_by.data.table
+  DT1 = data.table(a=c(1, 3, 2, NA, 3), b=4:0)
+  DT2 = data.table(a=c("c", "a", "B")) # data.table uses C-locale and should sort_by if cedta()
+  DT3 = data.table(a=c(1, 2, 3), b=list(c("a", "b", "", NA), c(1, 3, 2, 0), c(TRUE, TRUE, FALSE, NA))) # list column
+
+  # sort_by.data.table: basics
+  test(2306.01, sort_by(DT1, ~a + b), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
+  test(2306.02, sort_by(DT1, ~I(a + b)), data.table(a=c(3, 2, 1, 3, NA), b=c(0L, 2L, 4L, 3L, 1L)))
+  test(2306.03, sort_by(DT2, ~a), data.table(a=c("B", "a", "c")))
+
+  # sort_by.data.table: list columns.
+  # NOTE 1: .formula2varlist works well with list columns.
+  # NOTE 2: 4 elem in DT of 3 row because forderv takes a list column as a DT.
+  test(2306.04, sort_by(DT3, ~b), DT3[order(b)]) # should be consistent.
+
+  # sort_by.data.table: additional C-locale sorting
+  test(2306.10, DT2[, sort_by(.SD, a)], data.table(a=c("B", "a", "c")))
+  test(2306.11, DT2[, sort_by(.SD, ~a)], data.table(a=c("B", "a", "c")))
+
+  # sort_by.data.table: various working interfaces
+  test(2306.20, sort_by(DT1, list(DT1$a, DT1$b)), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
+  test(2306.21, sort_by(DT1, DT1[, .(a, b)]), data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
+  test(2306.22, DT1[, sort_by(.SD, .(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
+  test(2306.23, DT1[, sort_by(.SD, ~a + b)], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
+  test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
+}
+=======
+#test for enhancing error message of invalid column #6512
+uniq_diff <- function(...) {
+  .Call("uniq_diff", ...)
+}
+capture_error_message <- function(expr) {
+  msg <- tryCatch(
+    { expr; NULL }, 
+    error = function(e) e$message
+  )
+  msg
+}
+test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))  # Ensure -1 is reported}, TRUE)
+>>>>>>> 1b4a51d3 (add test case)
+
 # test for enhancing error message of invalid column #6512
 uniq_diff <- function(...) {
   .Call("uniq_diff", ...)

From 2493cb51758ad6d9a79e4b97775bb17fa2e5629a Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 03:20:33 +0530
Subject: [PATCH 14/25] add test3

---
 inst/tests/tests.Rraw | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index a59563829f..43570a2620 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21084,29 +21084,3 @@ capture_error_message <- function(expr) {
 }
 test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))  # Ensure -1 is reported}, TRUE)
 >>>>>>> 1b4a51d3 (add test case)
-
-# test for enhancing error message of invalid column #6512
-uniq_diff <- function(...) {
-  .Call("uniq_diff", ...)
-}
-capture_error_message <- function(expr) {
-  msg <- tryCatch(
-    { expr; NULL }, 
-    error = function(e) e$message
-  )
-  msg
-}
-test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)
-
-# test for enhancing error message of invalid column #6512
-uniq_diff <- function(...) {
-  .Call("uniq_diff", ...)
-}
-capture_error_message <- function(expr) {
-  msg <- tryCatch(
-    { expr; NULL }, 
-    error = function(e) e$message
-  )
-  msg
-}
-test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)
\ No newline at end of file

From e66596fa7259875308e43cb01fefcc43a8ee2f30 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 03:48:12 +0530
Subject: [PATCH 15/25] resolved merge conflict

---
 inst/tests/tests.Rraw | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 43570a2620..e4231b5fa8 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21042,7 +21042,6 @@ test(2304.100, set(copy(DT), i=2L, j=c("L1", "L2"), value=list(list(NULL), list(
 # the integer overflow in #6729 is only noticeable with UBSan
 test(2305, { fread(testDir("issue_6729.txt.bz2")); TRUE })
 
-<<<<<<< HEAD
 if (exists("sort_by", "package:base")) {
   # sort_by.data.table
   DT1 = data.table(a=c(1, 3, 2, NA, 3), b=4:0)
@@ -21070,17 +21069,3 @@ if (exists("sort_by", "package:base")) {
   test(2306.23, DT1[, sort_by(.SD, ~a + b)], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
   test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
 }
-=======
-#test for enhancing error message of invalid column #6512
-uniq_diff <- function(...) {
-  .Call("uniq_diff", ...)
-}
-capture_error_message <- function(expr) {
-  msg <- tryCatch(
-    { expr; NULL }, 
-    error = function(e) e$message
-  )
-  msg
-}
-test(2306, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))  # Ensure -1 is reported}, TRUE)
->>>>>>> 1b4a51d3 (add test case)

From 89bcb2cdd363e32c191e7f2df897b14529be7792 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 03:51:35 +0530
Subject: [PATCH 16/25] test added for enhancing error message

---
 inst/tests/tests.Rraw | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e4231b5fa8..ce89efd5fd 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21069,3 +21069,16 @@ if (exists("sort_by", "package:base")) {
   test(2306.23, DT1[, sort_by(.SD, ~a + b)], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
   test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
 }
+
+# test for enhancing error message of invalid column #6512
+uniq_diff <- function(...) {
+  .Call("uniq_diff", ...)
+}
+capture_error_message <- function(expr) {
+  msg <- tryCatch(
+    { expr; NULL }, 
+    error = function(e) e$message
+  )
+  msg
+}
+test(2307, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)

From bf867b018c376bc8640260cb3b2564e1ce317a82 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Sun, 26 Jan 2025 12:19:47 +0530
Subject: [PATCH 17/25] Enhancing of error message

---
 po/fr.po    |  4 ++--
 po/zh_CN.po |  4 ++--
 src/fmelt.c | 15 ++++++++++++++-
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/po/fr.po b/po/fr.po
index e63209de3c..264163e6a3 100644
--- a/po/fr.po
+++ b/po/fr.po
@@ -1301,8 +1301,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "Type inconnu de 'measure.vars' %s à l'indice %d de la liste"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid."
-msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides."
+msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides; veuillez corriger en supprimant : %s"
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/po/zh_CN.po b/po/zh_CN.po
index 493adf3767..f1bdb098b3 100644
--- a/po/zh_CN.po
+++ b/po/zh_CN.po
@@ -1154,8 +1154,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "未知'measure.vars'类型 %s，位于列表中 %d"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid."
-msgstr "'measure.vars'里，一或多个数值无效"
+msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
+msgstr "'measure.vars'里，一或多个数值无效；请通过删除以下数值来修复：%s"
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/src/fmelt.c b/src/fmelt.c
index f031cc350f..8a13fcd945 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -1,5 +1,7 @@
 #include "data.table.h"
 #include <Rdefines.h>
+
+
 // #include <signal.h> // the debugging machinery + breakpoint aidee
 // raise(SIGINT);
 
@@ -176,8 +178,12 @@ bool is_default_measure(SEXP vec) {
 
 // maybe unlist, then unique, then set_diff.
 SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
+  // Protect input list/vector, unlisting if necessary
   SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list);
+  
+  // Check for duplicated elements in the input vector
   SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); 
+  
   int n_unique_cols = 0;
   SEXP invalid_columns = PROTECT(allocVector(INTSXP, length(int_vec)));
   int* invalid_col_ptr = INTEGER(invalid_columns);
@@ -206,16 +212,23 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
   }
   SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); 
   int unique_i = 0;
-  for (int i=0; i<length(is_duplicated); ++i) {
+  
+  // Populate the unique column numbers into the new vector
+  for (int i = 0; i < length(is_duplicated); ++i) {
     if (!LOGICAL(is_duplicated)[i]) {
       INTEGER(unique_col_numbers)[unique_i++] = INTEGER(int_vec)[i];
     }
   }
+  
+  // Apply set difference to get final unique column indices
   SEXP out = set_diff(unique_col_numbers, ncol);
   UNPROTECT(4);
   return out;
 }
 
+
+
+
 SEXP cols_to_int_or_list(SEXP cols, SEXP dtnames, bool is_measure) {
   switch(TYPEOF(cols)) {
   case STRSXP  : return chmatch(cols, dtnames, 0); 

From dd37750ae1a3088d98fa906e78907d511d5fa23e Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Mon, 27 Jan 2025 19:17:42 +0530
Subject: [PATCH 18/25] Revert to initial changes

---
 po/fr.po    | 4 ++--
 po/zh_CN.po | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/po/fr.po b/po/fr.po
index 264163e6a3..e63209de3c 100644
--- a/po/fr.po
+++ b/po/fr.po
@@ -1301,8 +1301,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "Type inconnu de 'measure.vars' %s à l'indice %d de la liste"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
-msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides; veuillez corriger en supprimant : %s"
+msgid "One or more values in 'measure.vars' is invalid."
+msgstr "Une ou plusieurs valeurs de 'measure.vars' ne sont pas valides."
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."
diff --git a/po/zh_CN.po b/po/zh_CN.po
index f1bdb098b3..493adf3767 100644
--- a/po/zh_CN.po
+++ b/po/zh_CN.po
@@ -1154,8 +1154,8 @@ msgid "Unknown 'measure.vars' type %s at index %d of list"
 msgstr "未知'measure.vars'类型 %s，位于列表中 %d"
 
 #: fmelt.c:187
-msgid "One or more values in 'measure.vars' is invalid; please fix by removing: %s"
-msgstr "'measure.vars'里，一或多个数值无效；请通过删除以下数值来修复：%s"
+msgid "One or more values in 'measure.vars' is invalid."
+msgstr "'measure.vars'里，一或多个数值无效"
 
 #: fmelt.c:189
 msgid "One or more values in 'id.vars' is invalid."

From 599ff52cfff0f4b94ff664ba51724e273bc656dd Mon Sep 17 00:00:00 2001
From: aitap <krylov.r00t@gmail.com>
Date: Wed, 26 Feb 2025 15:07:53 +0000
Subject: [PATCH 19/25] Provide a .formula2varlist implementation (#6842)

Since base::.formula2varlist is not an API and it is now needed in two
places, provide our own implementation.
---
 R/data.table.R | 4 ++--
 R/utils.R      | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index d1f6798100..99e908f63e 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -2454,7 +2454,7 @@ split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TR
     # same as split.data.frame - handling all exceptions, factor orders etc, in a single stream of processing was a nightmare in factor and drop consistency
     # evaluate formula mirroring split.data.frame #5392. Mimics base::.formula2varlist.
     if (inherits(f, "formula"))
-        f = eval(attr(terms(f), "variables"), x, environment(f))
+        f = formula_vars(f, x)
     # be sure to use x[ind, , drop = FALSE], not x[ind], in case downstream methods don't follow the same subsetting semantics (#5365)
     return(lapply(split(x = seq_len(nrow(x)), f = f, drop = drop, ...), function(ind) x[ind, , drop = FALSE]))
   }
@@ -2530,7 +2530,7 @@ sort_by.data.table <- function(x, y, ...)
 {
   if (!cedta()) return(NextMethod()) # nocov
   if (inherits(y, "formula"))
-    y <- .formula2varlist(y, x)
+    y <- formula_vars(y, x)
   if (!is.list(y))
     y <- list(y)
   # use forder instead of base 'order'
diff --git a/R/utils.R b/R/utils.R
index 50b165629d..cc4d933ff8 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -212,3 +212,11 @@ rss = function() {  #5515 #5517
   round(ans / 1024.0, 1L)  # return MB
   # nocov end
 }
+
+formula_vars = function(f, x) { # .formula2varlist is not API and seems to have appeared after R-4.2, #6841
+  terms <- terms(f)
+  setNames(
+    eval(attr(terms, "variables"), x, environment(f)),
+    attr(terms, "term.labels")
+  )
+}

From fcd1cabdb881f4e7063b5a67074c26dae90e3cec Mon Sep 17 00:00:00 2001
From: Mukulyadav2004 <145585624+Mukulyadav2004@users.noreply.github.com>
Date: Thu, 27 Feb 2025 15:41:39 +0530
Subject: [PATCH 20/25] Fix index printing by adding index info to header
 (#6816)

Produce the class header from `toprint` instead of just `x`. Fixes: #6806

Co-authored-by: Ivan K <krylov.r00t@gmail.com>
---
 R/print.data.table.R  | 2 +-
 inst/tests/tests.Rraw | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/R/print.data.table.R b/R/print.data.table.R
index 7517a4f128..a37020f502 100644
--- a/R/print.data.table.R
+++ b/R/print.data.table.R
@@ -86,6 +86,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
     if (show.indices) toprint = cbind(toprint, index_dt)
   }
   require_bit64_if_needed(x)
+  classes = classes1(toprint)
   toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...)  # na.encode=FALSE so that NA in character cols print as <NA>
 
   # FR #353 - add row.names = logical argument to print.data.table
@@ -100,7 +101,6 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
       factor = "<fctr>", POSIXct = "<POSc>", logical = "<lgcl>",
       IDate = "<IDat>", integer64 = "<i64>", raw = "<raw>",
       expression = "<expr>", ordered = "<ord>")
-    classes = classes1(x)
     abbs = unname(class_abb[classes])
     if ( length(idx <- which(is.na(abbs))) ) abbs[idx] = paste0("<", classes[idx], ">")
     toprint = rbind(abbs, toprint)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index ce89efd5fd..da41f8b9f8 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21070,6 +21070,12 @@ if (exists("sort_by", "package:base")) {
   test(2306.24, DT1[, sort_by(.SD, ~.(a, b))], data.table(a=c(1, 2, 3, 3, NA), b=c(4L, 2L, 0L, 3L, 1L)))
 }
 
+DT <- data.table(a = 1:2, b = 2:1)
+setindex(DT, b)
+# make sure that print(DT) doesn't warn due to the header missing index column types, #6806
+# can't use output= here because the print() call is outside withCallingHandlers(...)
+test(2307, { capture.output(print(DT, class = TRUE, show.indices = TRUE)); TRUE })
+
 # test for enhancing error message of invalid column #6512
 uniq_diff <- function(...) {
   .Call("uniq_diff", ...)
@@ -21081,4 +21087,4 @@ capture_error_message <- function(expr) {
   )
   msg
 }
-test(2307, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)
+test(2307, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)
\ No newline at end of file

From 7823be777f5e3da138b1ee0f8503fcd82dd3008b Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 13:15:54 +0530
Subject: [PATCH 21/25] xyz

---
 inst/tests/tests.Rraw | 2 +-
 src/fmelt.c           | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index da41f8b9f8..0004e63390 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21087,4 +21087,4 @@ capture_error_message <- function(expr) {
   )
   msg
 }
-test(2307, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)
\ No newline at end of file
+test(2308, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)
\ No newline at end of file
diff --git a/src/fmelt.c b/src/fmelt.c
index 8a13fcd945..c1866bdc94 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -1,7 +1,5 @@
 #include "data.table.h"
 #include <Rdefines.h>
-
-
 // #include <signal.h> // the debugging machinery + breakpoint aidee
 // raise(SIGINT);
 

From 3f5bed96d78a3f26c7fb2f402f80de181fcb32a1 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 15:29:45 +0530
Subject: [PATCH 22/25] modify test

---
 inst/tests/tests.Rraw | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 47a52f3331..ffa600c61d 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21077,14 +21077,4 @@ setindex(DT, b)
 test(2307, { capture.output(print(DT, class = TRUE, show.indices = TRUE)); TRUE })
 
 # test for enhancing error message of invalid column #6512
-uniq_diff <- function(...) {
-  .Call("uniq_diff", ...)
-}
-capture_error_message <- function(expr) {
-  msg <- tryCatch(
-    { expr; NULL }, 
-    error = function(e) e$message
-  )
-  msg
-}
-test(2308, {msg <- capture_error_message(uniq_diff(as.integer(c(1, 2, -1, 4)), 4, FALSE))print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)
+test(2308, {msg <- tryCatch({ .Call("uniq_diff", as.integer(c(1, 2, -1, 4)), 4, FALSE); NULL }, error = function(e) e$message)print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)

From c56a711a568fe75e664098f84f31564d48d3f414 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 15:56:34 +0530
Subject: [PATCH 23/25] test case added

---
 inst/tests/tests.Rraw | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index ffa600c61d..4c10144585 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21077,4 +21077,8 @@ setindex(DT, b)
 test(2307, { capture.output(print(DT, class = TRUE, show.indices = TRUE)); TRUE })
 
 # test for enhancing error message of invalid column #6512
-test(2308, {msg <- tryCatch({ .Call("uniq_diff", as.integer(c(1, 2, -1, 4)), 4, FALSE); NULL }, error = function(e) e$message)print(msg)return(grepl("\\[-1\\]", msg))}, TRUE)
+test(2308, {
+  msg <- tryCatch({ .Call("uniq_diff", as.integer(c(1, 2, -1, 4)), 4, FALSE); NULL }, error = function(e) e$message)
+  print(msg) 
+  return(grepl("\\[-1\\]", msg))
+}, TRUE)

From 778f20ba4bb01524b90e2b3a386affce82a9ea9b Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 16:05:57 +0530
Subject: [PATCH 24/25] issues resolved

---
 src/fmelt.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/src/fmelt.c b/src/fmelt.c
index c1866bdc94..e17536eacb 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -176,12 +176,8 @@ bool is_default_measure(SEXP vec) {
 
 // maybe unlist, then unique, then set_diff.
 SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
-  // Protect input list/vector, unlisting if necessary
   SEXP int_vec = PROTECT(isNewList(int_or_list) ? unlist_(int_or_list) : int_or_list);
-  
-  // Check for duplicated elements in the input vector
   SEXP is_duplicated = PROTECT(duplicated(int_vec, FALSE)); 
-  
   int n_unique_cols = 0;
   SEXP invalid_columns = PROTECT(allocVector(INTSXP, length(int_vec)));
   int* invalid_col_ptr = INTEGER(invalid_columns);
@@ -189,7 +185,7 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
   for (int i = 0; i < length(int_vec); ++i) {
     int col_number = INTEGER(int_vec)[i];
     bool good_number = (col_number > 0 && col_number <= ncol);
-    if (is_measure) good_number |= (col_number == NA_INTEGER);
+    if (is_measure) good_number |= (col_number==NA_INTEGER);
     if (!good_number) {
       invalid_col_ptr[invalid_count++] = col_number;
     } else if (!LOGICAL(is_duplicated)[i]) {
@@ -210,23 +206,16 @@ SEXP uniq_diff(SEXP int_or_list, int ncol, bool is_measure) {
   }
   SEXP unique_col_numbers = PROTECT(allocVector(INTSXP, n_unique_cols)); 
   int unique_i = 0;
-  
-  // Populate the unique column numbers into the new vector
-  for (int i = 0; i < length(is_duplicated); ++i) {
+  for (int i=0; i<length(is_duplicated); ++i) {
     if (!LOGICAL(is_duplicated)[i]) {
       INTEGER(unique_col_numbers)[unique_i++] = INTEGER(int_vec)[i];
     }
   }
-  
-  // Apply set difference to get final unique column indices
   SEXP out = set_diff(unique_col_numbers, ncol);
   UNPROTECT(4);
   return out;
 }
 
-
-
-
 SEXP cols_to_int_or_list(SEXP cols, SEXP dtnames, bool is_measure) {
   switch(TYPEOF(cols)) {
   case STRSXP  : return chmatch(cols, dtnames, 0); 

From 53f3e0ad654500eae9e97779de678fc1e16bae85 Mon Sep 17 00:00:00 2001
From: Divendra2006 <divendra.singhyadav.phe23@itbhu.ac.in>
Date: Fri, 28 Feb 2025 21:17:34 +0530
Subject: [PATCH 25/25] update test case

---
 inst/tests/tests.Rraw | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 4c10144585..0e5dee8e2a 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21078,7 +21078,5 @@ test(2307, { capture.output(print(DT, class = TRUE, show.indices = TRUE)); TRUE
 
 # test for enhancing error message of invalid column #6512
 test(2308, {
-  msg <- tryCatch({ .Call("uniq_diff", as.integer(c(1, 2, -1, 4)), 4, FALSE); NULL }, error = function(e) e$message)
-  print(msg) 
-  return(grepl("\\[-1\\]", msg))
-}, TRUE)
+  melt(data.table(A = 1:5, B = 6:10), id.vars = c("A", "-1"))
+}, error = "One or more values in 'id.vars' are invalid")