Skip to content

Commit ac718aa

Browse files
committed
Merge branch 'master' into truehash
2 parents b5b023e + 4f8695f commit ac718aa

File tree

13 files changed

+1901
-1178
lines changed

13 files changed

+1901
-1178
lines changed

.ci/lint.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
args = commandArgs(TRUE)
44
if (identical(args, '--help')) {
55
writeLines(c(
6-
'Usage: Rscript .ci/lint.R .ci/linters/<KIND> <WHERE> <WHAT> [PREPROCESS]',
6+
'Usage: Rscript .ci/lint.R .ci/linters/<KIND> <WHERE> <WHAT>',
77
'KIND must name the directory containing the *.R files defining the linter functions.',
88
'WHERE must name the directory containing the files to lint, e.g. "po", or "src".',
99
"WHAT must contain the regular expression matching the files to lint, e.g., '[.]po$', or '[.][ch]$'.",
10+
NULL
1011
))
1112
q('no')
1213
}

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,8 @@ rowwiseDT(
552552

553553
22. `fread()` could fail to read Mac CSV files (with `\r` line endings) if the file contained any `\n` character, such as a final `\r\n`. This was fixed by detecting the predominant line ending in a sample of the file, [#4186](https://github.com/Rdatatable/data.table/issues/4186). Thanks to @MPagel for the report and @ben-schwen for the fix.
554554

555+
23. By reference assignments (':=') with functions that modified the data.table by reference e.g. (`foo=function(DT){modify(DT);return(1L)}`, `DT[,a:=foo(DT)]`) returned a malformed data.table due to the modification of the targeted named column index ("a") during the j expression evaluation [#6768](https://github.com/Rdatatable/data.table/issues/6768). Thanks @AntonNM for the report and fix.
556+
555557
### NOTES
556558

557559
1. There is a new vignette on joins! See `vignette("datatable-joins")`. Thanks to Angel Feliz for authoring it! Feedback welcome. This vignette has been highly requested since 2017: [#2181](https://github.com/Rdatatable/data.table/issues/2181).

R/data.table.R

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ replace_dot_alias = function(e) {
561561
}
562562
irows = vecseq(f__, len__, limit)
563563
}
564-
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
564+
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # notranslate
565565
# Fix for #1092 and #1074
566566
# TODO: implement better version of "any"/"all"/"which" to avoid
567567
# unnecessary construction of logical vectors
@@ -1189,7 +1189,6 @@ replace_dot_alias = function(e) {
11891189
} else if (is.numeric(lhs)) {
11901190
m = as.integer(lhs)
11911191
if (any(m<1L | ncol(x)<m)) stopf("LHS of := appears to be column positions but are outside [1,ncol] range. New columns can only be added by name.")
1192-
lhs = names_x[m]
11931192
} else
11941193
stopf("LHS of := isn't column names ('character') or positions ('integer' or 'numeric')")
11951194
if (!anyNA(m)) {
@@ -1214,44 +1213,16 @@ replace_dot_alias = function(e) {
12141213
return(invisible(x))
12151214
}
12161215
} else {
1217-
# Adding new column(s). TO DO: move after the first eval in case the jsub has an error.
1216+
# Adding new column(s). Allocation for columns and recalculation of target cols moved after the jval = eval(jsub)
1217+
# in case of error or by-reference modifications to the DT
12181218
newnames=setdiff(lhs, names_x)
12191219
m[is.na(m)] = ncol(x)+seq_along(newnames)
12201220
cols = as.integer(m)
12211221
# don't pass verbose to selfrefok here -- only activated when
1222-
# ok=-1 which will trigger setalloccol with verbose in the next
1223-
# branch, which again calls _selfrefok and returns the message then
1222+
# ok=-1 which will trigger setalloccol with verbose after
1223+
# the jval = eval(jsub, ...)
12241224
if ((ok<-selfrefok(x, verbose=FALSE))==0L) # ok==0 so no warning when loaded from disk (-1) [-1 considered TRUE by R]
12251225
if (is.data.table(x)) warningf("A shallow copy of this data.table was taken so that := can add or remove %d columns by reference. At an earlier point, this data.table was copied by R (or was created manually using structure() or similar). Avoid names<- and attr<- which in R currently (and oddly) may copy the whole data.table. Use set* syntax instead to avoid copying: ?set, ?setnames and ?setattr. It's also not unusual for data.table-agnostic packages to produce tables affected by this issue. If this message doesn't help, please report your use case to the data.table issue tracker so the root cause can be fixed or this message improved.", length(newnames))
1226-
# !is.data.table for DF |> DT(,:=) tests 2212.16-19 (#5113) where a shallow copy is routine for data.frame
1227-
if ((ok<1L) || (truelength(x) < ncol(x)+length(newnames))) {
1228-
DT = x # in case getOption contains "ncol(DT)" as it used to. TODO: warn and then remove
1229-
n = length(newnames) + eval(getOption("datatable.alloccol")) # TODO: warn about expressions and then drop the eval()
1230-
# i.e. reallocate at the size as if the new columns were added followed by setalloccol().
1231-
name = substitute(x)
1232-
if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO) # ok here includes -1 (loaded from disk)
1233-
catf("Growing vector of column pointers from truelength %d to %d. A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n", truelength(x), n)
1234-
# #1729 -- copying to the wrong environment here can cause some confusion
1235-
if (ok == -1L) catf("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n")
1236-
1237-
# Verbosity should not issue warnings, so cat rather than warning.
1238-
# TO DO: Add option 'datatable.pedantic' to turn on warnings like this.
1239-
1240-
# TO DO ... comments moved up from C ...
1241-
# Note that the NAMED(dt)>1 doesn't work because .Call
1242-
# always sets to 2 (see R-ints), it seems. Work around
1243-
# may be possible but not yet working. When the NAMED test works, we can drop allocwarn argument too
1244-
# because that's just passed in as FALSE from [<- where we know `*tmp*` isn't really NAMED=2.
1245-
# Note also that this growing will happen for missing columns assigned NULL, too. But so rare, we
1246-
# don't mind.
1247-
}
1248-
setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope
1249-
if (is.name(name)) {
1250-
assign(as.character(name),x,parent.frame(),inherits=TRUE)
1251-
} else if (.is_simple_extraction(name)) {
1252-
.reassign_extracted_table(name, x)
1253-
} # TO DO: else if env$<- or list$<-
1254-
}
12551226
}
12561227
}
12571228
}
@@ -1411,6 +1382,55 @@ replace_dot_alias = function(e) {
14111382
}
14121383

14131384
if (!is.null(lhs)) {
1385+
# Re-matches characters names in the lhs after jval to account for jsub's that modify the columns of the data.table (#6768)
1386+
# Replaces numerical lhs with respective names_x
1387+
if(is.character(lhs)){
1388+
m = chmatch(lhs, names_x)
1389+
if(!anyNA(m)) {
1390+
# updates by reference to existing columns
1391+
cols = as.integer(m)
1392+
newnames = NULL
1393+
} else {
1394+
# Adding new column(s).
1395+
newnames = setdiff(lhs, names_x)
1396+
m[is.na(m)] = ncol(x) + seq_along(newnames)
1397+
cols = as.integer(m)
1398+
# ok <- selfrefok above called without verbose -- only activated when
1399+
# ok=-1 which will trigger setalloccol with verbose in the next
1400+
# branch, which again calls _selfrefok and returns the message then
1401+
# !is.data.table for DF |> DT(,:=) tests 2212.16-19 (#5113) where a shallow copy is routine for data.frame
1402+
if ((ok<1L) || (truelength(x) < ncol(x)+length(newnames))) {
1403+
DT = x # in case getOption contains "ncol(DT)" as it used to. TODO: warn and then remove
1404+
n = length(newnames) + eval(getOption("datatable.alloccol")) # TODO: warn about expressions and then drop the eval()
1405+
# i.e. reallocate at the size as if the new columns were added followed by setalloccol().
1406+
name = substitute(x)
1407+
if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO) # ok here includes -1 (loaded from disk)
1408+
catf("Growing vector of column pointers from truelength %d to %d. A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n", truelength(x), n)
1409+
# #1729 -- copying to the wrong environment here can cause some confusion
1410+
if (ok == -1L) catf("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n")
1411+
1412+
# Verbosity should not issue warnings, so cat rather than warning.
1413+
# TO DO: Add option 'datatable.pedantic' to turn on warnings like this.
1414+
1415+
# TO DO ... comments moved up from C ...
1416+
# Note that the NAMED(dt)>1 doesn't work because .Call
1417+
# always sets to 2 (see R-ints), it seems. Work around
1418+
# may be possible but not yet working. When the NAMED test works, we can drop allocwarn argument too
1419+
# because that's just passed in as FALSE from [<- where we know `*tmp*` isn't really NAMED=2.
1420+
# Note also that this growing will happen for missing columns assigned NULL, too. But so rare, we
1421+
# don't mind.
1422+
}
1423+
setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope
1424+
if (is.name(name)) {
1425+
assign(as.character(name),x,parent.frame(),inherits=TRUE)
1426+
} else if (.is_simple_extraction(name)) {
1427+
.reassign_extracted_table(name, x)
1428+
} # TO DO: else if env$<- or list$<-
1429+
}
1430+
}
1431+
} else if (is.numeric(lhs)) {
1432+
lhs = names_x[m]
1433+
}
14141434
# TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above.
14151435
.Call(Cassign,x,irows,cols,newnames,jval)
14161436
return(suppPrint(x))

R/frollapply.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right","
259259
} else leftadaptive = FALSE
260260
if (leftadaptive) {
261261
if (verbose)
262-
cat("frollapply: adaptive=TRUE && align='left' pre-processing for align='right'\n")
262+
catf("frollapply: adaptive=TRUE && align='left' pre-processing for align='right'\n")
263263
if (by.column) {
264264
X = lapply(X, rev)
265265
} else {
@@ -329,7 +329,7 @@ frollapply = function(X, N, FUN, ..., by.column=TRUE, fill=NA, align=c("right","
329329
DTths0 = getDTthreads(FALSE)
330330
use.fork0 = .Platform$OS.type!="windows" && DTths0 > 1L
331331
if (verbose && !use.fork0)
332-
cat("frollapply running on single CPU thread\n")
332+
catf("frollapply running on single CPU thread\n")
333333
ans = vector("list", nx*nn)
334334
## vectorized x
335335

R/merge.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,15 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
130130
if (is.null(nm)) {
131131
warningf(ngettext(n_dots, "merge.data.table() received %d unnamed argument in '...' which will be ignored.",
132132
"merge.data.table() received %d unnamed arguments in '...' which will be ignored."),
133-
n_dots)
133+
n_dots,
134+
domain=NA)
134135
} else {
135136
named_idx = nzchar(nm)
136137
if (all(named_idx)) {
137138
warningf(ngettext(n_dots, "merge.data.table() received %d unknown keyword argument which will be ignored: %s",
138139
"merge.data.table() received %d unknown keyword arguments which will be ignored: %s"),
139-
n_dots, brackify(nm))
140+
n_dots, brackify(nm),
141+
domain=NA)
140142
} else {
141143
n_named <- sum(named_idx)
142144
unnamed_clause <- sprintf(ngettext(n_dots - n_named, "%d unnamed argument in '...'", "%d unnamed arguments in '...'"), n_dots - n_named)

inst/tests/tests.Rraw

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1960,8 +1960,9 @@ test(632, merge(DT1,DT2,all=TRUE), data.table(a=c(1,2,3,4,5),total.x=c(2,NA,1,3,
19601960
test(632.1, merge(DT1,DT2,all=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all=TRUE)),a))
19611961

19621962
# Test that with=FALSE by number isn't messed up by dup column names, #2025
1963-
DT = data.table(a=1:3,a=4:6)
1964-
test(634, DT[,2:=200L], data.table(a=1:3,a=200L))
1963+
DT = data.table(a=1:3, a=4:6, a=7:9)
1964+
test(634, DT[,2:=200L], data.table(a=1:3, a=200L, a=7:9))
1965+
test(634.1, DT[,c(2, 3):=200L], data.table(a=1:3, a=200L, a=200L))
19651966

19661967
# Test names when not all items are named, #2029
19671968
DT = data.table(x=1:3,y=1:3)
@@ -21877,9 +21878,18 @@ DT = data.table(a = as.Date("2010-01-01"), b = 1L)
2187721878
test(2348.1, tryCatch(DT[a == as.Date("20100101")], error=conditionCall)[[1L]], quote(charToDate))
2187821879
test(2348.2, tryCatch(DT[a == as.Date("20100101") | b == 2L], error=conditionCall)[[1L]], quote(charToDate))
2187921880

21881+
# j expressions that modify a data.table by reference, (#6768)
21882+
inner = function(dt) dt[,b:=4:6]
21883+
outer = function(dt) { inner(dt); return(7:9) }
21884+
foo = function(dt) { dt[,b:=4:6]; return(7:9) }
21885+
DT = data.table(a=1:3)
21886+
test(2349, DT[,c:=outer(DT)], data.table(a=1:3, b=4:6, c=7:9))
21887+
test(2349.1, DT[,c:=foo(DT)], data.table(a=1:3, b=4:6, c=7:9))
21888+
rm(inner, outer, foo, DT)
21889+
2188021890
# exercise rehashing during forder, #6694
2188121891
strings = as.character(6145:1)
2188221892
DT = data.table(x = strings)
2188321893
setorder(DT, x)
21884-
test(2349, DT[["x"]], sort.int(strings, method='radix'))
21894+
test(2350, DT[["x"]], sort.int(strings, method='radix'))
2188521895
rm(DT, strings)

0 commit comments

Comments
 (0)