More test fixes for when translations are present (#7478)

aitap · web-flow · commit 265459973a32 · 2025-12-22T12:59:59.000+01:00
* Switch more tests to test(output=...)

capture.output() tests are fine in many places, but these ones tested
for messages that have since been translated and must be skipped in
foreign more.

* Use notOutput= instead of capture.output() tests

Where capture.output() was previously used to test the absence of
messages, use notOutput= instead so that the test will be skipped.

While the test succeeded anyway due to the captured output containing
neither the original untranslated message (being matched) nor its
translation (that could be printed if a regression happened), skipping
the test is more fair.

* test.data.table(): count skipped message tests

When working in foreign mode, count the occurrences of tests for message
content (which are essentially skipped) and report the number if it's
non-zero.

* test 1590.14: don't assume 1252 if not UTF-8

* Elaborate on what 1252 is

* Better test for not printing NULL

* Restore test 1775.1 for print() options

Instead, test substitute(x) for %iscall% "print" to decide whether to
print the x before testing the output.
diff --git a/R/test.data.table.R b/R/test.data.table.R
@@ -130,6 +130,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
   }
   assign("foreign", foreign, envir=env)
   assign("nfail", 0L, envir=env)
+  assign("nskip", 0L, envir=env)
   assign("ntest", 0L, envir=env)
   assign("prevtest", -1L, envir=env)
   assign("whichfail", NULL, envir=env)
@@ -282,6 +283,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
 
   nfail = env$nfail
   ntest = env$ntest
+  nskip = env$nskip
   if (nfail > 0L) {
     # nocov start
     stopf(
@@ -331,6 +333,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     get("mtext")(lastRSS, side=4L, at=lastRSS, las=1L, font=2L)
   }
 
+  if (foreign && nskip > 0L) catf("Skipped %d tests for translated messages. ", nskip) # nocov
   catf("All %d tests (last %.8g) in %s completed ok in %s\n", ntest, env$prevtest, names(fn), timetaken(env$started.at))
   ans = nfail==0L
   attr(ans, "timings") = timings  # as attr to not upset callers who expect a TRUE/FALSE result
@@ -409,6 +412,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     memtest.id = get("memtest.id", parent.frame())
     filename = get("filename", parent.frame())
     foreign = get("foreign", parent.frame())
+    nskip = get("nskip", parent.frame())
     showProgress = get("showProgress", parent.frame())
     time = nTest = RSS = NULL  # to avoid 'no visible binding' note
     # TODO(R>=4.0.2): Use add=TRUE up-front in on.exit() once non-positional arguments are supported.
@@ -451,6 +455,11 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     length(grep(x, y, fixed=TRUE)) ||  # try treating x as literal first; useful for most messages containing ()[]+ characters
     length(tryCatch(grep(x, y, ignore.case=ignore.case), error=function(e)NULL))  # otherwise try x as regexp
   }
+  if (foreign && .test.data.table && (
+    length(error) || length(warning) || length(message) || length(output) ||
+    length(notOutput) || length(ignore.warning)
+  ))
+    assign("nskip", nskip+1L, parent.frame(), inherits=TRUE) # nocov
 
   xsub = substitute(x)
   ysub = substitute(y)
@@ -478,7 +487,11 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     # save the overhead of capture.output() since there are a lot of tests, often called in loops
     # Thanks to tryCatch2 by Jan here : https://github.com/jangorecki/logR/blob/master/R/logR.R#L21
   } else {
-    out = capture.output(print(x <- suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler))))
+    out = if (xsub %iscall% "print") {
+      capture.output(x <- suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler)))
+    } else {
+      capture.output(print(x <- suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler))))
+    }
   }
   if (!is.null(options)) {
     # some of the options passed to test() may break internal data.table use below (e.g. invalid datatable.alloccol), so undo them ASAP
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -2031,17 +2031,19 @@ options(datatable.optimize = 2L)
 test(658.1, DT[ , mean(x), by=grp1, verbose=TRUE], output="GForce optimized j to.*gmean")
 test(658.2, DT[ , list(mean(x)), by=grp1, verbose=TRUE], output="GForce optimized j to.*gmean")
 test(658.3, DT[ , list(mean(x), mean(y)), by=grp1, verbose=TRUE], output="GForce optimized j to.*gmean")
-tt = capture.output(DT[,list(mean(x),mean(y)),by=list(grp1,grp2),verbose=TRUE])
-test(659, !length(grep("Wrote less rows", tt)))  # first group is one row with this seed. Ensure we treat this as aggregate case rather than allocate too many rows.
+# first group is one row with this seed. Ensure we treat this as aggregate case rather than allocate too many rows.
+test(659, DT[,list(mean(x),mean(y)),by=list(grp1,grp2),verbose=TRUE], notOutput = "Wrote less rows")
 
 # Test .N for logical i subset
 DT = data.table(a=1:10, b=rnorm(10))
 test(660, DT[a==8L, .N], 1L)
 
 # Test that growing is sensible in worst case
 DT = data.table(a=rep(1:10,1:10),b=rnorm(55))
-tt = capture.output(DT[,sum(b)*b,by=a,verbose=TRUE])
-test(661, length(grep("growing from",tt))<3)  # was 6 when we simply grew enough for latest result
+test(661, DT[,sum(b)*b,by=a,verbose=TRUE],
+          notOutput = paste(rep("growing from", 3), collapse = ".*"))
+# Need to test that "growing from" is printed less than three times.
+# The number was previously 6 when we simply grew enough for latest result
 
 # Test that adding a new logical column is supported, #2094
 DT=data.table(a=1:3)
@@ -8286,8 +8288,8 @@ test(1579.03, dt[, lapply(.SD, median), keyby=x],
               dt[, lapply(.SD, function(x) median(as.numeric(x))), keyby=x])
 test(1579.04, dt[, lapply(.SD, median, na.rm=TRUE), keyby=x],
               dt[, lapply(.SD, function(x) median(as.numeric(x), na.rm=TRUE)), keyby=x])
-ans = capture.output(dt[, lapply(.SD, median), by=x, verbose=TRUE])
-test(1579.05, any(grepl("GForce optimized", ans)), TRUE)
+test(1579.05, dt[, lapply(.SD, median), by=x, verbose=TRUE],
+              output = "GForce optimized")
 
 # testing gforce::ghead and gforce::gtail
 # head(.SD, 1) and tail(.SD, 1) optimisation
@@ -8539,8 +8541,8 @@ if (x1==x2) {
   # NB: x1==x2 is a condition in base R, independent of data.table
   test(1590.12, forderv(    c(x2,x1,x1,x2)), integer())
   # don't test base ... test(1590.13, base::order(c(x2,x1,x1,x2)), 1:4)
-} else {
-  # Windows-1252, #2856
+} else if (identical(l10n_info()$codepage, 1252L)) {
+  # US English ANSI codepage on Windows is CP1252, #2856
   test(1590.14, forderv(    c(x2,x1,x1,x2)), INT(1,4,2,3))
   # don't test base ... test(1590.15, base::order(c(x2,x1,x1,x2)), INT(1,4,2,3))
 }
@@ -9280,11 +9282,9 @@ test(1633.4, dt[, .SD, by=1:nrow(dt)], data.table(nrow=1:nrow(dt), dt)) # make s
 
 # reuse secondary indices
 dt = data.table(x=sample(3, 10, TRUE), y=1:10)
-v1 = capture.output(ans1 <- dt[.(3:2), on="x", verbose=TRUE])
+test(1634.1, dt[.(3:2), on="x", verbose=TRUE], output = "ad hoc")
 setindex(dt, x)
-v2 = capture.output(ans2 <- dt[.(3:2), on="x", verbose=TRUE])
-test(1634.1, any(grepl("ad hoc", v1)), TRUE)
-test(1634.2, any(grepl("existing index", v2)), TRUE)
+test(1634.2, dt[.(3:2), on="x", verbose=TRUE], output = "existing index")
 
 # fread's fill argument detects separator better in complex cases as well, #1573
 # if pasted to the console, these tests won't work. But do work when sourced as these are tabs not spaces in text
@@ -10248,33 +10248,33 @@ options("datatable.use.index"=TRUE, "datatable.auto.index"=TRUE)
 test(1666.01, d[k==1L, verbose=TRUE], d[3L], output="Creating new index 'k'")
 d = data.table(k=3:1)
 options("datatable.use.index"=TRUE, "datatable.auto.index"=FALSE)
-test(1666.02, grep("Creating new index", capture.output(d[k==1L, verbose=TRUE])), integer(0)) # do not create index
+test(1666.02, d[k==1L, verbose=TRUE], notOutput="Creating new index") # do not create index
 d = data.table(k=3:1)
 options("datatable.use.index"=FALSE, "datatable.auto.index"=FALSE)
-test(1666.03, grep("Creating new index", capture.output(d[k==1L, verbose=TRUE])), integer(0))
+test(1666.03, d[k==1L, verbose=TRUE], notOutput="Creating new index")
 d = data.table(k=3:1)
 options("datatable.use.index"=FALSE, "datatable.auto.index"=TRUE)
-test(1666.04, grep("Creating new index", capture.output(d[k==1L, verbose=TRUE])), integer(0))
+test(1666.04, d[k==1L, verbose=TRUE], notOutput="Creating new index")
 d = data.table(k=3:1) # subset - index
 setindex(d, k)
 options("datatable.use.index"=TRUE, "datatable.auto.index"=TRUE)
 test(1666.05, d[k==1L, verbose=TRUE], d[3L], output="Optimized subsetting with index 'k'")
 options("datatable.use.index"=TRUE, "datatable.auto.index"=FALSE)
 test(1666.06, d[k==1L, verbose=TRUE], d[3L], output="Optimized subsetting with index 'k'")
 options("datatable.use.index"=FALSE, "datatable.auto.index"=FALSE)
-test(1666.07, grep("Using existing index", capture.output(d[k==1L, verbose=TRUE])), integer(0)) # not using existing index
+test(1666.07, d[k==1L, verbose=TRUE], notOutput="Using existing index") # not using existing index
 options("datatable.use.index"=FALSE, "datatable.auto.index"=TRUE)
-test(1666.08, grep("Using existing index", capture.output(d[k==1L, verbose=TRUE])), integer(0))
+test(1666.08, d[k==1L, verbose=TRUE], notOutput="Using existing index")
 d1 = data.table(k=3:1) # join - no index
 d2 = data.table(k=2:4)
 options("datatable.use.index"=TRUE, "datatable.auto.index"=TRUE)
 test(1666.09, d1[d2, on="k", verbose=TRUE], d1[d2, on="k"], output="ad hoc")
 options("datatable.use.index"=TRUE, "datatable.auto.index"=FALSE)
 test(1666.10, d1[d2, on="k", verbose=TRUE], d1[d2, on="k"], output="ad hoc")
 options("datatable.use.index"=FALSE, "datatable.auto.index"=FALSE)
-test(1666.11, grep("Looking for existing (secondary) index", capture.output(d1[d2, on="k", verbose=TRUE])), integer(0)) # not looking for index
+test(1666.11, d1[d2, on="k", verbose=TRUE], notOutput="Looking for existing (secondary) index") # not looking for index
 options("datatable.use.index"=FALSE, "datatable.auto.index"=TRUE)
-test(1666.12, grep("Looking for existing (secondary) index", capture.output(d1[d2, on="k", verbose=TRUE])), integer(0))
+test(1666.12, d1[d2, on="k", verbose=TRUE], notOutput="Looking for existing (secondary) index")
 d1 = data.table(k=3:1,v1=10:12) # join - index
 d2 = data.table(k=2:4,v2=20:22)
 setindex(d1, k)
@@ -11653,8 +11653,7 @@ test(1765.4, {warning("foobar1"); warning("foobar2"); warning("FOO"); 4L}, 4L, i
 test(1765.5, {warning("foobar1"); warning("foobar2"); warning("FOO"); 4L}, 4L, ignore.warning="2", warning=c("foobar1","FOO"))
 
 # print(null.data.table()) should not output NULL as well, #1852
-# use capture.output() in this case rather than output= to ensure NULL is not output
-test(1766, capture.output(print(data.table(NULL))), "Null data.table (0 rows and 0 cols)")
+test(1766, data.table(NULL), notOutput = "NULL$")
 
 # Bug on subset of 1-row data.table when expr returns a named logical vector #2152
 options(datatable.auto.index=FALSE)
@@ -11888,8 +11887,8 @@ test(1774.17, as.data.table(x, na.rm='a'), error="'na.rm' must be scalar")
 
 # verify print.keys works
 DT1 <- data.table(a = 1:3, key = "a")
-test(1775.1, capture.output(print(DT1, print.keys = TRUE)),
-     c("Key: <a>", "   a", "1: 1", "2: 2", "3: 3"))
+test(1775.1, print(DT1, print.keys = TRUE),
+     output = c("Key: <a>", "   a", "1: 1", "2: 2", "3: 3"))
 DT2 <- data.table(a = 1:3, b = 4:6)
 setindexv(DT2, c("b","a"))
 test(1775.2, print(DT2, print.keys = TRUE),
@@ -13675,7 +13674,7 @@ test(1962.034, setkeyv(DT, c('a', '')),
 setkey(DT, a)
 test(1962.035, {setkeyv(DT, character(0L)); key(DT)}, NULL,
      warning = 'cols is a character vector of zero length')
-test(1962.036, any(grepl('already ordered', capture.output(setkey(DT, a, verbose = TRUE)))))
+test(1962.036, setkey(DT, a, verbose = TRUE), output = 'already ordered')
 setnames(DT, '.xi')
 setkey(DT, NULL)
 test(1962.037, setkey(DT, .xi),
@@ -16321,7 +16320,7 @@ test(2094.02, X$TAG, rep(names(x), each = 2))
 
 # use arbitrary column without message when fun.aggregate=length, #2980
 DT = data.table(a=c(3L, 3L, 2L, 9L, 5L, 10L, 3L, 2L, 9L, 8L), b=rep(1:5, 2))
-test(2095, any(grepl('override', capture.output(dcast(DT, a~b, fun.aggregate=length)), fixed=TRUE)), FALSE)
+test(2095, dcast(DT, a~b, fun.aggregate=length), notOutput='override')
 
 # gmean intermediate can overflow integers without warning, #986
 test(2096, data.table(a=c(1L,1L), v=c(2e9L, 2e9L))[, mean(v), a], data.table(a=1L, V1=2e9))
@@ -16705,10 +16704,10 @@ test(2125.05, print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE),
               "1 variable not shown: \\[d <char>\\]"))
 test(2125.06, print(DT, trunc.cols=TRUE, col.names="none"),
      output=c("^  1: 0 bbbbbbbbbbbbb ccccccccccccc", ".*",
-              "1 variable not shown: \\[d\\]", ""))
+              "1 variable not shown: \\[d\\]$"))
 test(2125.07, print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"),
      output=c("^  1: 0 bbbbbbbbbbbbb", ".*",
-              "2 variables not shown: \\[c, d\\]", ""),
+              "2 variables not shown: \\[c, d\\]$"),
      warning = "Column classes will be suppressed when col.names is 'none'")
 options("width" = 20)
 DT = data.table(a = vector("integer", 2),
@@ -21491,11 +21490,11 @@ test(2328.2, droplevels(DT), data.table(f=factor(), i=integer(), f2=factor()))
 
 #6882 print() output with col.names="none"
 dt = data.table(short = 1:3, verylongcolumnname = 4:6)
-test(2329.1, print(dt, col.names = "none"), output = "1: 1 4\n2: 2 5\n3: 3 6\n")
+test(2329.1, print(dt, col.names = "none"), output = "1: 1 4\n2: 2 5\n3: 3 6$")
 dt = data.table(x = 123456, y = "wide_string")
-test(2329.2, print(dt, col.names = "none"), output = "1: 123456 wide_string\n")
+test(2329.2, print(dt, col.names = "none"), output = "1: 123456 wide_string$")
 dt = data.table(a = NA_integer_, b = NaN)
-test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN\n")
+test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN$")
 
 # Row name extraction from multiple vectors, #7136
 x <- 1:3