Skip to content

Commit 0586ffd

Browse files
authored
Merge branch 'master' into data
2 parents e1fd079 + f9cf2a1 commit 0586ffd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+6200
-3448
lines changed

.dev/CRAN_Release.cmd

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@
33
###############################################
44

55
# 1) Update messages for new release
6-
## (a) Update C template file: src/data.table.pot
7-
## ideally, we are including _() wrapping in
8-
## new PRs throughout dev cycle, and this step
9-
## becomes about tying up loose ends
10-
## Check the output here for translatable messages
11-
xgettext -o /dev/stdout ./*.c \
12-
--keyword=Rprintf --keyword=error --keyword=warning --keyword=STOP --keyword=DTWARN --keyword=Error --keyword=DTPRINT --keyword=snprintf:3
13-
14-
## (b) Update R template file: src/R-data.table.pot
15-
## NB: this relies on R >= 4.0 to remove a bug in update_pkg_po
16-
Rscript -e "tools::update_pkg_po('.')"
6+
dt_custom_translators = list(
7+
R = 'catf:fmt|1',
8+
# TODO(MichaelChirico/potools#318): restore snprintf:3 here too
9+
src = c('STOP:1', 'DTWARN:1', 'DTPRINT:1')
10+
)
11+
message_db =
12+
potools::get_message_data(custom_translation_functions = dt_custom_translators)
13+
potools::check_cracked_messages(message_db)
14+
potools::check_untranslated_cat(message_db)
15+
potools::check_untranslated_src(message_db)
16+
17+
## (b) Update R template files (po/*.pot)
18+
potools::po_extract(custom_translation_functions = dt_custom_translators)
1719

1820
# 2) Open a PR with the new templates & contact the translators
1921
# * zh_CN: @hongyuanjia

CODEOWNERS

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,12 @@
4545
/R/translation.R @michaelchirico
4646
/src/po.h @michaelchirico
4747
/po/*.pot @Rdatatable/translators
48-
/po/*zh_CN.po @Rdatatable/chinese
49-
/po/*pt_BR.po @Rdatatable/brazil
5048
/po/*es.po @Rdatatable/spanish
49+
/po/*fr.po @Rdatatable/french
50+
/po/*pt_BR.po @Rdatatable/brazil
5151
/po/*ru.po @Rdatatable/russian
52+
/po/*zh_CN.po @Rdatatable/chinese
53+
/vignettes/fr/*.Rmd @Rdatatable/french
5254
/vignettes/ru/*.Rmd @Rdatatable/russian
5355

5456
# printing

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Authors@R: c(
1919
person("Michael","Chirico", role="aut", comment = c(ORCID="0000-0003-0787-087X")),
2020
person("Toby","Hocking", role="aut", comment = c(ORCID="0000-0002-3146-0865")),
2121
person("Benjamin","Schwendinger",role="aut", comment = c(ORCID="0000-0003-3315-8114")),
22+
person("Ivan", "Krylov", role="aut", email="[email protected]", comment = c(ORCID="0000-0002-0172-3812")),
2223
person("Pasha","Stetsenko", role="ctb"),
2324
person("Tom","Short", role="ctb"),
2425
person("Steve","Lianoglou", role="ctb"),
@@ -89,7 +90,6 @@ Authors@R: c(
8990
person("Iago", "Giné-Vázquez", role="ctb"),
9091
person("Anirban", "Chetia", role="ctb"),
9192
person("Doris", "Amoakohene", role="ctb"),
92-
person("Ivan", "Krylov", role="ctb"),
9393
person("Angel", "Feliz", role="ctb"),
9494
person("Michael","Young", role="ctb"),
9595
person("Mark", "Seeto", role="ctb"),

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ rowwiseDT(
133133
134134
19. An integer overflow in `fread()` with lines longer than `2^(31/2)` bytes is prevented, [#6729](https://github.com/Rdatatable/data.table/issues/6729). The typical impact was no worse than a wrong initial allocation size, corrected later. Thanks to @TaikiSan21 for the report and @aitap for the fix.
135135
136+
20. Fixed a memory issue causing segfaults in `forder`, [#6797](https://github.com/Rdatatable/data.table/issues/6797). Thanks @dkutner for the report and @MichaelChirico for the fix.
137+
136138
## NOTES
137139
138140
1. There is a new vignette on joins! See `vignette("datatable-joins")`. Thanks to Angel Feliz for authoring it! Feedback welcome. This vignette has been highly requested since 2017: [#2181](https://github.com/Rdatatable/data.table/issues/2181).

R/between.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ inrange = function(x,lower,upper,incbounds=TRUE) {
7575
ops = if (incbounds) c(4L, 2L) else c(5L, 3L) # >=,<= and >,<
7676
verbose = isTRUE(getOption("datatable.verbose"))
7777
if (verbose) {last.started.at=proc.time();catf("forderv(query) took ... ");flush.console()}
78-
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
78+
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # notranslate
7979
ans = bmerge(
8080
shallow(subject), query,
8181
icols=1L:2L, xcols=c(1L, 1L),

R/bmerge.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
188188
if (verbose) {last.started.at=proc.time();catf(" forder took ... ");flush.console()}
189189
# TODO: could check/reuse secondary indices, but we need 'starts' attribute as well!
190190
xo = forderv(x, xcols, retGrp=TRUE)
191-
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
191+
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # notranslate
192192
xg = attr(xo, 'starts', exact=TRUE)
193193
resetcols = head(xcols, non_equi-1L)
194194
if (length(resetcols)) {

R/data.table.R

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ replace_dot_alias = function(e) {
577577
} else {
578578
irows = as.integer(fsort(as.numeric(irows))) ## nocov; parallelized for numeric, but overhead of type conversion
579579
}
580-
if (verbose) {cat(timetaken(last.started.at), "\n");flush.console()}
580+
if (verbose) {cat(timetaken(last.started.at), "\n");flush.console()} # notranslate
581581
}
582582
## make sure, all columns are taken from x and not from i.
583583
## This is done by simply telling data.table to continue as if there was a simple subset
@@ -634,7 +634,7 @@ replace_dot_alias = function(e) {
634634
irows = irows[irows!=0L]
635635
if (verbose) {last.started.at=proc.time();catf("Inverting irows for notjoin done in ... ");flush.console()}
636636
i = irows = if (length(irows)) seq_len(nrow(x))[-irows] else NULL # NULL meaning all rows i.e. seq_len(nrow(x))
637-
if (verbose) cat(timetaken(last.started.at), "\n")
637+
if (verbose) cat(timetaken(last.started.at), "\n") # notranslate
638638
leftcols = integer() # proceed as if row subset from now on, length(leftcols) is switched on later
639639
rightcols = integer()
640640
# Doing this once here, helps speed later when repeatedly subsetting each column. R's [irows] would do this for each
@@ -892,8 +892,10 @@ replace_dot_alias = function(e) {
892892
}
893893
tt = lengths(byval)
894894
if (any(tt!=xnrow)) {
895-
plural_part <- sprintf(ngettext(length(tt), "The item in the 'by' or 'keyby' list is length %s.", "The items in the 'by' or 'keyby' list have lengths %s."), brackify(tt))
896-
stopf("%s Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).", plural_part, xnrow)
895+
stopf(ngettext(length(tt),
896+
"The item in the 'by' or 'keyby' list is length %s. Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).",
897+
"The items in the 'by' or 'keyby' list have lengths %s. Each must be length %d; the same length as there are rows in x (after subsetting if i is provided)."),
898+
brackify(tt), xnrow, domain=NA)
897899
}
898900
if (is.null(bynames)) bynames = rep.int("",length(byval))
899901
if (length(idx <- which(!nzchar(bynames))) && !bynull) {

R/onAttach.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
if (gettext("TRANSLATION CHECK") != "TRANSLATION CHECK") {
2828
packageStartupMessagef(
2929
"**********\nRunning data.table in English; package support is available in English only. When searching for online help, be sure to also check for the English error message. This can be obtained by looking at the po/R-<locale>.po and po/<locale>.po files in the package source, where the native language and English error messages can be found side-by-side.%s\n**********",
30-
if (exists('Sys.setLanguage', envir=baseenv())) " You can also try calling Sys.setLanguage('en') prior to reproducing the error message." else ""
30+
if (exists('Sys.setLanguage', envir=baseenv())) gettext(" You can also try calling Sys.setLanguage('en') prior to reproducing the error message.") else ""
3131
)
3232
}
3333
if (dev && (Sys.Date() - as.Date(d))>28L)

R/print.data.table.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
5757
catf("Null data.%s (0 rows and 0 cols)\n", class) # See FAQ 2.5 and NEWS item in v1.8.9
5858
} else {
5959
catf("Empty data.%s (%d rows and %d cols)", class, NROW(x), NCOL(x))
60-
if (length(x)>0L) cat(": ",paste(head(names(x),6L),collapse=","),if(length(x)>6L)"...",sep="")
61-
cat("\n")
60+
if (length(x)>0L) cat(": ",paste(head(names(x),6L),collapse=","),if(length(x)>6L)"...",sep="") # notranslate
61+
cat("\n") # notranslate
6262
}
6363
return(invisible(x))
6464
}

R/test.data.table.R

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
3939
scripts = gsub("[.]bz2$","",scripts)
4040
return(sapply(scripts, function(fn) {
4141
err = try(test.data.table(script=fn, verbose=verbose, pkg=pkg, silent=silent, showProgress=showProgress, testPattern=testPattern))
42-
cat("\n");
42+
cat("\n"); # notranslate
4343
isTRUE(err)
4444
}))
4545
# nocov end
@@ -110,7 +110,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
110110
datatable.old.fread.datetime.character = FALSE
111111
)
112112

113-
cat("getDTthreads(verbose=TRUE):\n") # for tracing on CRAN; output to log before anything is attempted
113+
cat("getDTthreads(verbose=TRUE):\n") # notranslate: for tracing on CRAN; output to log before anything is attempted
114114
getDTthreads(verbose=TRUE) # includes the returned value in the verbose output (rather than dangling '[1] 4'); e.g. "data.table is using 4 threads"
115115
catf("test.data.table() running: %s\n", fn) # print fn to log before attempting anything on it (in case it is missing); on same line for slightly easier grep
116116
assign("testDir", function(x) file.path(fulldir, x), envir=env)
@@ -226,6 +226,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
226226
# does show the full file output these days, so the 13 line limit no longer bites so much. It still bit recently
227227
# when receiving output of R CMD check sent over email, though.
228228
tz = Sys.getenv("TZ", unset=NA)
229+
# notranslate start
229230
cat("\n", date(), # so we can tell exactly when these tests ran on CRAN to double-check the result is up to date
230231
" endian==", .Platform$endian,
231232
", sizeof(long double)==", .Machine$sizeof.longdouble,
@@ -239,6 +240,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
239240
", .libPaths()==", paste0("'", .libPaths(), "'", collapse = ","),
240241
", ", .Call(Cdt_zlib_version),
241242
"\n", sep="")
243+
# notranslate end
242244

243245
if (inherits(err,"try-error")) {
244246
# nocov start
@@ -303,7 +305,7 @@ compactprint = function(DT, topn=2L) {
303305
print(copy(DT)[,(cn):="",verbose=FALSE], topn=topn, class=FALSE)
304306
} else {
305307
print(DT, class=FALSE) # "Empty data.table (0 rows) of <ncol> columns ...
306-
if (ncol(DT)) cat(cn,"\n")
308+
if (ncol(DT)) cat(cn,"\n") # notranslate
307309
}
308310
invisible()
309311
}
@@ -334,10 +336,6 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
334336
Sys.unsetenv(names(old)[!is_preset])
335337
}, add=TRUE)
336338
}
337-
if (!is.null(options)) {
338-
old_options <- do.call(base::options, as.list(options)) # as.list(): allow passing named character vector for convenience
339-
on.exit(base::options(old_options), add=TRUE)
340-
}
341339
# Usage:
342340
# i) tests that x equals y when both x and y are supplied, the most common usage
343341
# ii) tests that x is TRUE when y isn't supplied
@@ -376,14 +374,18 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
376374
if (memtest==1L) gc() # see #5515 for before/after
377375
inum = as.integer(num)
378376
timings[inum, RSS:=max(rss(),RSS), verbose=FALSE] # TODO prefix inum with .. for clarity when that works
379-
if (length(memtest.id) && memtest.id[1L]<=inum && inum<=memtest.id[2L]) cat(rss(),"\n") # after 'testing id ...' output; not using between() as it has verbose output when getOption(datatable.verbose)
377+
if (length(memtest.id) && memtest.id[1L]<=inum && inum<=memtest.id[2L]) cat(rss(),"\n") # notranslate. after 'testing id ...' output; not using between() as it has verbose output when getOption(datatable.verbose)
380378
if (memtest==2L) gc()
381379
}
382380
assign("lasttime", proc.time()[3L], parent.frame(), inherits=TRUE) # after gc() to exclude gc() time from next test when memtest
383381
}, add=TRUE )
384-
if (showProgress)
385-
# \r can't be in gettextf msg
386-
cat("\rRunning test id", numStr, " ") # nocov.
382+
if (showProgress) {
383+
# nocov start
384+
cat("\r") # notranslate: \r can't be in gettextf msg
385+
catf("Running test id %s", numStr)
386+
cat(" ") # notranslate
387+
# nocov end
388+
}
387389
# See PR #4090 for comments about change here in Dec 2019.
388390
# If a segfault error occurs in future and we'd like to know after which test, then arrange for the
389391
# try(sys.source()) in test.data.table() to be run in a separate R process. That process could write out
@@ -422,13 +424,23 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
422424
actual$message <- c(actual$message, conditionMessage(m))
423425
m
424426
}
427+
if (!is.null(options)) {
428+
old_options <- do.call(base::options, as.list(options)) # as.list(): allow passing named character vector for convenience
429+
on.exit(base::options(old_options), add=TRUE)
430+
}
425431
if (is.null(output) && is.null(notOutput)) {
426432
x = suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler))
427433
# save the overhead of capture.output() since there are a lot of tests, often called in loops
428434
# Thanks to tryCatch2 by Jan here : https://github.com/jangorecki/logR/blob/master/R/logR.R#L21
429435
} else {
430436
out = capture.output(print(x <- suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler))))
431437
}
438+
if (!is.null(options)) {
439+
# some of the options passed to test() may break internal data.table use below (e.g. invalid datatable.alloccol), so undo them ASAP
440+
base::options(old_options)
441+
# this is still registered for on.exit(), keep empty
442+
old_options <- list()
443+
}
432444
fail = FALSE
433445
if (.test.data.table && num>0.0) {
434446
if (num<prevtest+0.0000005) {
@@ -448,15 +460,15 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
448460
stopifnot(is.character(ignore.warning), !anyNA(ignore.warning), nchar(ignore.warning)>=1L)
449461
for (msg in ignore.warning) observed = grep(msg, observed, value=TRUE, invert=TRUE) # allow multiple for translated messages rather than relying on '|' to always work
450462
}
451-
if (length(expected) != length(observed)) {
463+
if (length(expected) != length(observed) && (!foreign || is.null(ignore.warning))) {
452464
# nocov start
453465
catf("Test %s produced %d %ss but expected %d\n%s\n%s\n", numStr, length(observed), type, length(expected), paste("Expected:", expected), paste("Observed:", observed, collapse = "\n"))
454466
fail = TRUE
455467
# nocov end
456-
} else {
468+
} else if (!foreign) {
457469
# the expected type occurred and, if more than 1 of that type, in the expected order
458470
for (i in seq_along(expected)) {
459-
if (!foreign && !string_match(expected[i], observed[i])) {
471+
if (!string_match(expected[i], observed[i])) {
460472
# nocov start
461473
catf("Test %s didn't produce the correct %s:\nExpected: %s\nObserved: %s\n", numStr, type, expected[i], observed[i])
462474
fail = TRUE
@@ -475,7 +487,8 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
475487
if (out[length(out)] == "NULL") out = out[-length(out)]
476488
out = paste(out, collapse="\n")
477489
output = paste(output, collapse="\n") # so that output= can be either a \n separated string, or a vector of strings.
478-
if (length(output) && !string_match(output, out)) {
490+
# it also happens to turn off the 'y' checking branch below
491+
if (length(output) && !foreign && !string_match(output, out)) {
479492
# nocov start
480493
catf("Test %s did not produce correct output:\n", numStr)
481494
catf("Expected: <<%s>>\n", encodeString(output)) # \n printed as '\\n' so the two lines of output can be compared vertically
@@ -487,7 +500,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
487500
fail = TRUE
488501
# nocov end
489502
}
490-
if (length(notOutput) && string_match(notOutput, out, ignore.case=TRUE)) {
503+
if (length(notOutput) && !foreign && string_match(notOutput, out, ignore.case=TRUE)) {
491504
# nocov start
492505
catf("Test %s produced output but should not have:\n", numStr)
493506
catf("Expected absent (case insensitive): <<%s>>\n", encodeString(notOutput))
@@ -534,15 +547,15 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
534547
if (!fail) {
535548
catf("Test %s ran without errors but failed check that x equals y:\n", numStr)
536549
failPrint = function(x, xsub) {
537-
cat(">", substitute(x), "=", xsub, "\n")
550+
cat(">", substitute(x), "=", xsub, "\n") # notranslate
538551
if (is.data.table(x)) compactprint(x) else {
539552
nn = length(x)
540553
catf("First %d of %d (type '%s'): \n", min(nn, 6L), length(x), typeof(x))
541554
# head.matrix doesn't restrict columns
542555
if (length(d <- dim(x))) do.call(`[`, c(list(x, drop = FALSE), lapply(pmin(d, 6L), seq_len)))
543556
else print(head(x))
544557
if (typeof(x) == 'character' && anyNonAscii(x)) {
545-
cat("Non-ASCII string detected, raw representation:\n")
558+
catf("Non-ASCII string detected, raw representation:\n")
546559
print(lapply(head(x), charToRaw))
547560
}
548561
}

0 commit comments

Comments
 (0)