Skip to content

Commit ad2e22d

Browse files
Expand altrep in assign (#5401)
* Check for ALTREP * Fix and add tests * NEWS Item * add a new forward-compatibility test * pass for stlye * comment * clarify we think this is rare given the lack of related issues --------- Co-authored-by: Michael Chirico <[email protected]> Co-authored-by: Michael Chirico <[email protected]>
1 parent 037bc89 commit ad2e22d

File tree

3 files changed

+35
-9
lines changed

3 files changed

+35
-9
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444

4545
12. Internal functions used to signal errors are now marked as non-returning, silencing a compiler warning about potentially unchecked allocation failure. Thanks to Prof. Brian D. Ripley for the report and @aitap for the fix, [#7070](https://github.com/Rdatatable/data.table/pull/7070).
4646

47+
13. In rare cases, `data.table` failed to expand ALTREP columns when assigning a full column by reference. This could result in the target column getting modified unintentionally if the next call to the data.table was a modification by reference of the source column. E.g. in `DT[, b := as.character(a)]` the string conversion gets deferred and subsequent modification of column `a` would also modify column `b`, [#5400](https://github.com/Rdatatable/data.table/issues/5400). Thanks to @aquasync for the report and Václav Tlapák for the PR.
48+
4749
### NOTES
4850

4951
1. Continued work to remove non-API C functions, [#6180](https://github.com/Rdatatable/data.table/issues/6180). Thanks Ivan Krylov for the PRs and for writing a clear and concise guide about the R API: https://aitap.codeberg.page/R-api/.

inst/tests/tests.Rraw

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2281,15 +2281,38 @@ test(753.1, DT[,c("x1","x2"):=4:6, verbose = TRUE], data.table(a=letters[1:3],x=
22812281
output = "RHS for item 2 has been duplicated")
22822282
test(753.2, DT[2,x2:=7L], data.table(a=letters[1:3],x=3:1,x1=4:6,x2=c(4L,7L,6L),key="a"))
22832283
DT = data.table(a=letters[3:1],x=1:3,y=4:6)
2284-
test(754.1, DT[,c("x1","y1","x2"):=list(x,y)], error="Supplied 3 columns to be assigned 2 items. Please see NEWS for v1.12.2")
2285-
test(754.2, DT[,c("x1","y1","x2"):=list(x,y,x)], data.table(a=letters[3:1],x=1:3,y=4:6,x1=1:3,y1=4:6,x2=1:3))
2284+
test(754.01, DT[,c("x1","y1","x2"):=list(x,y)], error="Supplied 3 columns to be assigned 2 items. Please see NEWS for v1.12.2")
2285+
test(754.02, DT[,c("x1","y1","x2"):=list(x,y,x)], data.table(a=letters[3:1],x=1:3,y=4:6,x1=1:3,y1=4:6,x2=1:3))
22862286
# And non-recycling i.e. that a single column copy does copy the column
22872287
DT = data.table(a=1:3)
2288-
test(754.3, DT[,b:=a][1,a:=4L][2,b:=5L], data.table(a=INT(4,2,3),b=INT(1,5,3)))
2289-
test(754.4, DT[,b:=a][3,b:=6L], data.table(a=INT(4,2,3),b=INT(4,2,6)))
2290-
test(754.5, DT[,a:=as.character(a),verbose=TRUE], output="Direct plonk.*no copy")
2288+
test(754.03, DT[, b := a][1, a := 4L][2, b := 5L], data.table(a=INT(4,2,3),b=INT(1,5,3)))
2289+
test(754.04, DT[, b := a][3, b := 6L], data.table(a=INT(4,2,3),b=INT(4,2,6)))
2290+
test(754.05, DT[, a := as.numeric(a), verbose=TRUE], output="Direct plonk.*no copy")
22912291
RHS = as.integer(DT$a)
2292-
test(754.6, DT[,a:=RHS,verbose=TRUE], output="RHS for item 1 has been duplicated")
2292+
test(754.06, DT[, a:= RHS, verbose=TRUE], output="RHS for item 1 has been duplicated")
2293+
# Expand ALTREPS in assign.c, #5400
2294+
# String conversion gets deferred
2295+
## first, a regression test of R itself -- we want to make sure our own test continues to be useful & testing its intended purpose
2296+
test(754.07, {a = 1:10; .Internal(inspect(a)); b = as.character(a); .Internal(inspect(b))}, output = "\\bcompact\\b.*\\bdeferred string conversion\\b")
2297+
test(754.08, DT[, a := as.character(a), verbose=TRUE], output="RHS for item 1 has been duplicated")
2298+
# Executing the code inside of test expands the ALTREP so we repeat the code
2299+
# in order to check the result after a further assignment
2300+
DT = data.table(a=1:3)
2301+
DT[, b := as.character(a)]
2302+
DT[, a := 5L]
2303+
test(754.09, DT, data.table(a=5L, b=as.character(1:3)))
2304+
# This function returns an ALTREP wrapper if the input is at least length 64
2305+
testFun = function(x) {
2306+
x[FALSE] = 1
2307+
x
2308+
}
2309+
DT = data.table(id=1:64, col1=0, col2=0)
2310+
test(754.10, DT[, col1 := testFun(col2), verbose = TRUE], output="RHS for item 1 has been duplicated")
2311+
DT = data.table(id=1:64, col1=0, col2=0)
2312+
DT[, col1 := testFun(col2)]
2313+
DT[, col2 := 999]
2314+
test(754.11, DT, data.table(id=1:64, col1=0, col2=999))
2315+
rm(testFun)
22932316

22942317
# Used to test warning on redundant by (#2282) but by=.EACHI has now superseded
22952318
DT = data.table(a=letters[1:3],b=rep(c("d","e"),each=3),x=1:6,key=c('a', 'b'))

src/assign.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -550,11 +550,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
550550
if (length(rows)==0 && targetlen==vlen && (vlen>0 || nrow==0)) {
551551
if ( MAYBE_SHARED(thisvalue) || // set() protects the NAMED of atomic vectors from .Call setting arguments to 2 by wrapping with list
552552
(TYPEOF(values)==VECSXP && i>LENGTH(values)-1) || // recycled RHS would have columns pointing to others, #185.
553-
(TYPEOF(values)!=VECSXP && i>0) // assigning the same values to a second column. Have to ensure a copy #2540
553+
(TYPEOF(values)!=VECSXP && i>0) || // assigning the same values to a second column. Have to ensure a copy #2540
554+
ALTREP(thisvalue) // Some ALTREP wrappers have survived to here, e.g. #5400
554555
) {
555556
if (verbose) {
556-
Rprintf(_("RHS for item %d has been duplicated because MAYBE_REFERENCED==%d MAYBE_SHARED==%d, but then is being plonked. length(values)==%d; length(cols)==%d\n"),
557-
i+1, MAYBE_REFERENCED(thisvalue), MAYBE_SHARED(thisvalue), length(values), length(cols));
557+
Rprintf(_("RHS for item %d has been duplicated because MAYBE_REFERENCED==%d MAYBE_SHARED==%d ALTREP==%d, but then is being plonked. length(values)==%d; length(cols)==%d\n"),
558+
i+1, MAYBE_REFERENCED(thisvalue), MAYBE_SHARED(thisvalue), ALTREP(thisvalue), length(values), length(cols));
558559
}
559560
thisvalue = copyAsPlain(thisvalue); // PROTECT not needed as assigned as element to protected list below.
560561
} else {

0 commit comments

Comments
 (0)