Skip to content

Commit 3e99eb4

Browse files
implement option, add regresion tests
1 parent 0e5f928 commit 3e99eb4

File tree

5 files changed

+25
-5
lines changed

5 files changed

+25
-5
lines changed

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
## data.table [v1.17.99](https://github.com/Rdatatable/data.table/milestone/35) (in development)
66

7+
### POTENTIALLY BREAKING CHANGES
8+
9+
1. `data.table(x=1, <expr>)`, where `<expr>` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, <expr>)` where `<expr>` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 14 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`.
10+
711
### NEW FEATURES
812

913
1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also match `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR.

R/as.data.table.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ as.data.table.list = function(x,
162162
xi = x[[i]] = as.POSIXct(xi)
163163
} else if (is.matrix(xi) || is.data.frame(xi)) {
164164
if (!is.data.table(xi)) {
165-
if (is.matrix(xi) && NCOL(xi)<=1L && is.null(colnames(xi))) { # 1 column matrix naming #4124
165+
if (is.matrix(xi) && NCOL(xi)<=1L && is.null(colnames(xi)) && isFALSE(getOption('datatable.old.matrix.autoname'))) { # 1 column matrix naming #4124
166166
xi = x[[i]] = c(xi)
167167
} else {
168168
xi = x[[i]] = as.data.table(xi, keep.rownames=keep.rownames) # we will never allow a matrix to be a column; always unpack the columns

R/onLoad.R

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@
7373
# In fread and fwrite we have moved back to using getOption's default argument since it is unlikely fread and fread will be called in a loop many times, plus they
7474
# are relatively heavy functions where the overhead in getOption() would not be noticed. It's only really [.data.table where getOption default bit.
7575
# Improvement to base::getOption() now submitted (100x; 5s down to 0.05s): https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17394
76-
opts = c("datatable.verbose"="FALSE", # datatable.<argument name>
76+
opts = c(
77+
"datatable.verbose"="FALSE", # datatable.<argument name>
7778
"datatable.optimize"="Inf", # datatable.<argument name>
7879
"datatable.print.nrows"="100L", # datatable.<argument name>
7980
"datatable.print.topn"="5L", # datatable.<argument name>
@@ -85,12 +86,14 @@
8586
"datatable.show.indices"="FALSE", # for print.data.table
8687
"datatable.allow.cartesian"="FALSE", # datatable.<argument name>
8788
"datatable.join.many"="TRUE", # mergelist, [.data.table #4383 #914
88-
"datatable.dfdispatchwarn"="TRUE", # not a function argument
89-
"datatable.warnredundantby"="TRUE", # not a function argument
89+
"datatable.dfdispatchwarn"="TRUE", # not a function argument
90+
"datatable.warnredundantby"="TRUE", # not a function argument
9091
"datatable.alloccol"="1024L", # argument 'n' of alloc.col. Over-allocate 1024 spare column slots
9192
"datatable.auto.index"="TRUE", # DT[col=="val"] to auto add index so 2nd time faster
9293
"datatable.use.index"="TRUE", # global switch to address #1422
93-
"datatable.prettyprint.char" = NULL # FR #1091
94+
"datatable.prettyprint.char" = NULL, # FR #1091
95+
"datatable.old.matrix.autoname"="TRUE", # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
96+
NULL
9497
)
9598
for (i in setdiff(names(opts),names(options()))) {
9699
eval(parse(text=paste0("options(",i,"=",opts[i],")")))

inst/tests/tests.Rraw

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21317,6 +21317,10 @@ colnames(M) = c('A', '')
2131721317
test(2321.26, as.data.table(M), data.table(A=1:3, V2=4:6))
2131821318
test(2321.27, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), A=1:3, V2=4:6))
2131921319

21320+
# also respect old auto-naming rules by default (to be deprecated)
21321+
test(2321.28, names(data.table(a=1, cbind(2), c=3, 4)), c("a", "V1", "c", "V4"))
21322+
test(2321.29, options=c(datatable.old.matrix.autoname=FALSE), names(data.table(a=1, cbind(2), c=3, 4)), c("a", "V2", "c", "V4"))
21323+
2132021324
# New fctr() helper: like factor() but retaining order by default #4837
2132121325
test(2322.01, levels(fctr(c("b","a","c"))), c("b","a","c"))
2132221326
test(2322.02, levels(fctr(c(3,1,2))), c("3","1","2"))

man/data.table-options.Rd

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,15 @@
108108
}
109109
}
110110

111+
\section{Back-compatibility Options}{
112+
\describe{
113+
\item{\code{datatable.old.matrix.autoname}}{Logical, default \code{TRUE}. Governs how the output of
114+
expressions like \code{data.table(x=1, cbind(1))} will be named. When \code{TRUE}, it will be named
115+
\code{V1}, otherwise it will be named \code{V2}.
116+
}
117+
}
118+
}
119+
111120
\seealso{
112121
\code{\link[base]{options}},
113122
\code{\link[base]{getOption}},

0 commit comments

Comments
 (0)