Skip to content

Commit b0b8b23

Browse files
authored
[.data.table: call setalloccol for non-selfrefok tables when removing columns (#7492)
* regression test * reallocate non-ok table before removing columns
1 parent f6bbd5b commit b0b8b23

File tree

2 files changed

+48
-34
lines changed

2 files changed

+48
-34
lines changed

R/data.table.R

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,8 +1191,9 @@ replace_dot_alias = function(e) {
11911191
if (any(m<1L | ncol(x)<m)) stopf("LHS of := appears to be column positions but are outside [1,ncol] range. New columns can only be added by name.")
11921192
} else
11931193
stopf("LHS of := isn't column names ('character') or positions ('integer' or 'numeric')")
1194+
ok<-selfrefok(x, verbose=FALSE)
11941195
if (!anyNA(m)) {
1195-
# updates by reference to existing columns
1196+
# updates by reference to existing columns, or deletions
11961197
cols = as.integer(m)
11971198
newnames=NULL
11981199
if (identical(irows, integer())) {
@@ -1221,7 +1222,7 @@ replace_dot_alias = function(e) {
12211222
# don't pass verbose to selfrefok here -- only activated when
12221223
# ok=-1 which will trigger setalloccol with verbose after
12231224
# the jval = eval(jsub, ...)
1224-
if ((ok<-selfrefok(x, verbose=FALSE))==0L) # ok==0 so no warning when loaded from disk (-1) [-1 considered TRUE by R]
1225+
if (ok==0L) # ok==0 so no warning when loaded from disk (-1) [-1 considered TRUE by R]
12251226
if (is.data.table(x)) warningf("A shallow copy of this data.table was taken so that := can add or remove %d columns by reference. At an earlier point, this data.table was copied by R (or was created manually using structure() or similar). Avoid names<- and attr<- which in R currently (and oddly) may copy the whole data.table. Use set* syntax instead to avoid copying: ?set, ?setnames and ?setattr. It's also not unusual for data.table-agnostic packages to produce tables affected by this issue. If this message doesn't help, please report your use case to the data.table issue tracker so the root cause can be fixed or this message improved.", length(newnames))
12261227
}
12271228
}
@@ -1395,42 +1396,50 @@ replace_dot_alias = function(e) {
13951396
newnames = setdiff(lhs, names_x)
13961397
m[is.na(m)] = ncol(x) + seq_along(newnames)
13971398
cols = as.integer(m)
1398-
# ok <- selfrefok above called without verbose -- only activated when
1399-
# ok=-1 which will trigger setalloccol with verbose in the next
1400-
# branch, which again calls _selfrefok and returns the message then
1401-
# !is.data.table for DF |> DT(,:=) tests 2212.16-19 (#5113) where a shallow copy is routine for data.frame
1402-
if ((ok<1L) || (truelength(x) < ncol(x)+length(newnames))) {
1403-
DT = x # in case getOption contains "ncol(DT)" as it used to. TODO: warn and then remove
1404-
n = length(newnames) + eval(getOption("datatable.alloccol")) # TODO: warn about expressions and then drop the eval()
1405-
# i.e. reallocate at the size as if the new columns were added followed by setalloccol().
1406-
name = substitute(x)
1407-
if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO) # ok here includes -1 (loaded from disk)
1408-
catf("Growing vector of column pointers from truelength %d to %d. A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n", truelength(x), n)
1409-
# #1729 -- copying to the wrong environment here can cause some confusion
1410-
if (ok == -1L) catf("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n")
1411-
1412-
# Verbosity should not issue warnings, so cat rather than warning.
1413-
# TO DO: Add option 'datatable.pedantic' to turn on warnings like this.
1414-
1415-
# TO DO ... comments moved up from C ...
1416-
# Note that the NAMED(dt)>1 doesn't work because .Call
1417-
# always sets to 2 (see R-ints), it seems. Work around
1418-
# may be possible but not yet working. When the NAMED test works, we can drop allocwarn argument too
1419-
# because that's just passed in as FALSE from [<- where we know `*tmp*` isn't really NAMED=2.
1420-
# Note also that this growing will happen for missing columns assigned NULL, too. But so rare, we
1421-
# don't mind.
1422-
}
1423-
setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope
1424-
if (is.name(name)) {
1425-
assign(as.character(name),x,parent.frame(),inherits=TRUE)
1426-
} else if (.is_simple_extraction(name)) {
1427-
.reassign_extracted_table(name, x)
1428-
} # TO DO: else if env$<- or list$<-
1429-
}
14301399
}
14311400
} else if (is.numeric(lhs)) {
14321401
lhs = names_x[m]
14331402
}
1403+
# ok <- selfrefok above called without verbose -- only activated when
1404+
# ok=-1 which will trigger setalloccol with verbose in the next
1405+
# branch, which again calls _selfrefok and returns the message then
1406+
# !is.data.table for DF |> DT(,:=) tests 2212.16-19 (#5113) where a shallow copy is routine for data.frame
1407+
if (
1408+
(
1409+
!is.null(newnames) || # adding new columns
1410+
is.null(jval) || (is.list(jval) && any(vapply_1b(jval, is.null))) # removing columns
1411+
) && (
1412+
(ok<1L) || # unsafe to resize
1413+
(truelength(x) < ncol(x)+length(newnames)) # not enough space for new columns
1414+
)
1415+
) {
1416+
DT = x # in case getOption contains "ncol(DT)" as it used to. TODO: warn and then remove
1417+
n = length(newnames) + eval(getOption("datatable.alloccol")) # TODO: warn about expressions and then drop the eval()
1418+
# i.e. reallocate at the size as if the new columns were added followed by setalloccol().
1419+
name = substitute(x)
1420+
if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO) # ok here includes -1 (loaded from disk)
1421+
catf("Growing vector of column pointers from truelength %d to %d. A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n", truelength(x), n)
1422+
# #1729 -- copying to the wrong environment here can cause some confusion
1423+
if (ok == -1L) catf("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n")
1424+
1425+
# Verbosity should not issue warnings, so cat rather than warning.
1426+
# TO DO: Add option 'datatable.pedantic' to turn on warnings like this.
1427+
1428+
# TO DO ... comments moved up from C ...
1429+
# Note that the NAMED(dt)>1 doesn't work because .Call
1430+
# always sets to 2 (see R-ints), it seems. Work around
1431+
# may be possible but not yet working. When the NAMED test works, we can drop allocwarn argument too
1432+
# because that's just passed in as FALSE from [<- where we know `*tmp*` isn't really NAMED=2.
1433+
# Note also that this growing will happen for missing columns assigned NULL, too. But so rare, we
1434+
# don't mind.
1435+
}
1436+
setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope
1437+
if (is.name(name)) {
1438+
assign(as.character(name),x,parent.frame(),inherits=TRUE)
1439+
} else if (.is_simple_extraction(name)) {
1440+
.reassign_extracted_table(name, x)
1441+
} # TO DO: else if env$<- or list$<-
1442+
}
14341443
# TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above.
14351444
.Call(Cassign,x,irows,cols,newnames,jval)
14361445
return(suppPrint(x))

inst/tests/tests.Rraw

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21896,3 +21896,8 @@ DT = data.table(x = strings)
2189621896
setorder(DT, x)
2189721897
test(2350, DT[["x"]], sort.int(strings, method='radix'))
2189821898
rm(DT, strings)
21899+
21900+
# do remove columns in freshly unserialized data.tables, #7488
21901+
DT = unserialize(serialize(as.data.table(mtcars), NULL))
21902+
test(2351, DT[,carb:=NULL], as.data.table(mtcars)[,carb:=NULL])
21903+
rm(DT)

0 commit comments

Comments
 (0)