Skip to content

Commit ad6e70b

Browse files
authored
Merge branch 'master' into DataCodeIntegration
2 parents da74dd4 + bfa049c commit ad6e70b

File tree

6 files changed

+70
-7
lines changed

6 files changed

+70
-7
lines changed

.ci/atime/tests.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,5 +277,14 @@ test.list <- atime::atime_test_list(
277277
Slow = "73d79edf8ff8c55163e90631072192301056e336", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc)
278278
Fast = "8397dc3c993b61a07a81c786ca68c22bc589befc"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7019/commits) that removes inefficiency
279279

280+
"isoweek improved in #7144" = atime::atime_test(
281+
setup = {
282+
set.seed(349)
283+
x = sample(Sys.Date() - 0:5000, N, replace=TRUE)
284+
},
285+
expr = data.table::isoweek(x),
286+
Slow = "548410d23dd74b625e8ea9aeb1a5d2e9dddd2927", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/548410d23dd74b625e8ea9aeb1a5d2e9dddd2927)
287+
Fast = "c0b32a60466bed0e63420ec105bc75c34590865e"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7144/commits) that uses a much faster implementation
288+
280289
tests=extra.test.list)
281290
# nolint end: undesirable_operator_linter.

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@
4242

4343
8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`, [#5560](https://github.com/Rdatatable/data.table/issues/5560). When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`. Thanks @sindribaldur for the report and @aitap for the fix.
4444

45+
9. `isoweek()` is much faster (e.g. 20x) by re-using an implementation from {base}, [#5111](https://github.com/Rdatatable/data.table/issues/5111). Thanks @MichaelChirico for the report and PR.
46+
47+
10. `data.table()` and `as.data.table()` with `keep.rownames=TRUE` now extract row names from named vectors, matching `data.frame()` behavior. Names from the first named vector in the input are used to create the row names column (default name `"rn"` or custom name via `keep.rownames="column_name"`), [#1916](https://github.com/Rdatatable/data.table/issues/1916). Thanks to @richierocks for the feature request and @Mukulyadav2004 for the implementation.
48+
4549
### BUG FIXES
4650

4751
1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix.

R/IDateTime.R

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -342,19 +342,20 @@ yday = function(x) convertDate(as.IDate(x), "yday")
342342
wday = function(x) convertDate(as.IDate(x), "wday")
343343
mday = function(x) convertDate(as.IDate(x), "mday")
344344
week = function(x) convertDate(as.IDate(x), "week")
345-
isoweek = function(x) {
345+
# TODO(#3279): Investigate if improved as.IDate() makes our below implementation faster than this
346+
isoweek = function(x) as.integer(format(as.IDate(x), "%V"))
346347
# ISO 8601-conformant week, as described at
347348
# https://en.wikipedia.org/wiki/ISO_week_date
348349
# Approach:
349350
# * Find nearest Thursday to each element of x
350351
# * Find the number of weeks having passed between
351352
# January 1st of the year of the nearest Thursdays and x
352353

353-
x = as.IDate(x) # number of days since 1 Jan 1970 (a Thurs)
354-
nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L))
355-
year_start = as.IDate(format(nearest_thurs, '%Y-01-01'))
356-
1L + (nearest_thurs - year_start) %/% 7L
357-
}
354+
# x = as.IDate(x) # number of days since 1 Jan 1970 (a Thurs)
355+
# nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L))
356+
# year_start = as.IDate(format(nearest_thurs, '%Y-01-01'))
357+
# 1L + (nearest_thurs - year_start) %/% 7L
358+
358359

359360
month = function(x) convertDate(as.IDate(x), "month")
360361
quarter = function(x) convertDate(as.IDate(x), "quarter")

R/as.data.table.R

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,26 @@ as.data.table.list = function(x,
136136
missing.check.names = missing(check.names)
137137
origListNames = if (missing(.named)) names(x) else NULL # as.data.table called directly, not from inside data.table() which provides .named, #3854
138138
empty_atomic = FALSE
139+
140+
# Handle keep.rownames for vectors (mimicking data.frame behavior)
141+
rownames_ = NULL
142+
check_rownames = !isFALSE(keep.rownames)
143+
139144
for (i in seq_len(n)) {
140145
xi = x[[i]]
141146
if (is.null(xi)) next # eachncol already initialized to 0 by integer() above
147+
if (check_rownames && is.null(rownames_)) {
148+
if (is.null(dim(xi))) {
149+
if (!is.null(nm <- names(xi))) {
150+
rownames_ = nm
151+
x[[i]] = unname(xi)
152+
}
153+
} else {
154+
if (!is.null(nm <- rownames(xi))) {
155+
rownames_ = nm
156+
}
157+
}
158+
}
142159
if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE
143160
if ("POSIXlt" %chin% class(xi)) {
144161
warningf("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.")
@@ -203,6 +220,18 @@ as.data.table.list = function(x,
203220
}
204221
if (any(vnames==".SD")) stopf("A column may not be called .SD. That has special meaning.")
205222
if (check.names) vnames = make.names(vnames, unique=TRUE)
223+
224+
# Add rownames column when vector names were found
225+
if (!is.null(rownames_)) {
226+
rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn"
227+
if (!is.na(idx <- chmatch(rn_name, vnames)[1L])) {
228+
ans = c(list(ans[[idx]]), ans[-idx])
229+
vnames = c(vnames[idx], vnames[-idx])
230+
} else {
231+
ans = c(list(recycle(rownames_, nrow)), ans)
232+
vnames = c(rn_name, vnames)
233+
}
234+
}
206235
setattr(ans, "names", vnames)
207236
setDT(ans, key=key) # copy ensured above; also, setDT handles naming
208237
if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames) # PR 3854 and tests 2058.15-17

inst/tests/tests.Rraw

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21406,3 +21406,23 @@ dt = data.table(x = 123456, y = "wide_string")
2140621406
test(2329.2, print(dt, col.names = "none"), output = "1: 123456 wide_string\n")
2140721407
dt = data.table(a = NA_integer_, b = NaN)
2140821408
test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN\n")
21409+
21410+
# Row name extraction from multiple vectors, #7136
21411+
x <- 1:3
21412+
y <- setNames(4:6, c("A", "B", "C"))
21413+
test(2330.1, as.data.table(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=1:3, V2=4:6))
21414+
test(2330.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=1:3, V2=4:6))
21415+
test(2330.3, as.data.table(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=4:6, V2=1:3))
21416+
21417+
# Behavior under data.frame()
21418+
test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), x=1:3, y=4:6))
21419+
test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), y=4:6, x=1:3))
21420+
21421+
DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames
21422+
test(2330.6, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=letters[1:6], a=6:1, V=1:6))
21423+
21424+
z <- setNames(1:3, rep("", 3)) # vector with all-empty names # behaviour with all-empty row names
21425+
test(2330.7, as.data.table(list(z), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3))
21426+
21427+
M <- matrix(1:6, nrow=3, dimnames=list(rep("", 3), c("V1", "V2"))) # test of list(M) for empty-rowname'd matrix input
21428+
test(2330.8, as.data.table(list(M), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3, V2=4:6))

man/as.data.table.Rd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ is.data.table(x)
3131
}
3232
\arguments{
3333
\item{x}{An R object.}
34-
\item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead.}
34+
\item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead. For lists and when calling \code{data.table()}, names from the first named vector are extracted and used as row names, similar to \code{data.frame()} behavior.}
3535
\item{key}{ Character vector of one or more column names which is passed to \code{\link{setkeyv}}. }
3636
\item{sorted}{logical used in \emph{array} method, default \code{TRUE} is overridden when \code{key} is provided. }
3737
\item{value.name}{character scalar used in \emph{array} method, default \code{"value"}.}

0 commit comments

Comments
 (0)