Skip to content

Commit a04d3ed

Browse files
committed
Merge branch 'master' of https://github.com/Rdatatable/data.table into issue_2606
2 parents 9a89e90 + 2f0d12f commit a04d3ed

File tree

12 files changed

+234
-106
lines changed

12 files changed

+234
-106
lines changed

.ci/atime/tests.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,5 +277,14 @@ test.list <- atime::atime_test_list(
277277
Slow = "73d79edf8ff8c55163e90631072192301056e336", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc)
278278
Fast = "8397dc3c993b61a07a81c786ca68c22bc589befc"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7019/commits) that removes inefficiency
279279

280+
"isoweek improved in #7144" = atime::atime_test(
281+
setup = {
282+
set.seed(349)
283+
x = sample(Sys.Date() - 0:5000, N, replace=TRUE)
284+
},
285+
expr = data.table::isoweek(x),
286+
Slow = "548410d23dd74b625e8ea9aeb1a5d2e9dddd2927", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/548410d23dd74b625e8ea9aeb1a5d2e9dddd2927)
287+
Fast = "c0b32a60466bed0e63420ec105bc75c34590865e"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7144/commits) that uses a much faster implementation
288+
280289
tests=extra.test.list)
281290
# nolint end: undesirable_operator_linter.

.ci/linters/md/news_linter.R renamed to .ci/linters/md/news_github_link_mismatch_linter.R

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,6 @@
1-
# ensure that numbered list in each section is in sequence
2-
check_section_numbering = function(news) {
3-
if (!grepl("NEWS", news)) return(invisible())
4-
news = readLines(news)
5-
# plain '#' catches some examples; 'd' for 'data.table'
6-
sections = grep("^#+ [A-Zd]", news)
7-
entries = grep("^[0-9]+[.]", news)
8-
entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries]))
9-
section_id = findInterval(entries, sections)
10-
11-
any_mismatch = FALSE
12-
for (id in unique(section_id)) {
13-
section_entries = entry_value[section_id == id]
14-
intended_value = seq_along(section_entries)
15-
matched = section_entries == intended_value
16-
if (all(matched)) next
17-
any_mismatch = TRUE
18-
section_header = news[sections[id]]
19-
cat(sprintf(
20-
"In section '%s' (line %d), bad numbering:\n%s\n",
21-
section_header, sections[id],
22-
paste0(" [", section_entries[!matched], " --> ", intended_value[!matched], "]", collapse="\n")
23-
))
24-
}
25-
stopifnot("Please fix the NEWS issues above" = !any_mismatch)
26-
}
27-
281
# ensure that GitHub link text & URL actually agree
29-
check_gh_links = function(news) {
2+
news_github_link_mismatch_linter = function(news) {
3+
if (!grepl("NEWS", news)) return(invisible())
304
news = readLines(news)
315
gh_links_info = gregexpr(
326
"\\[#(?<md_number>[0-9]+)\\]\\(https://github.com/Rdatatable/data.table/(?<link_type>[^/]+)/(?<link_number>[0-9]+)\\)",
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# ensure that numbered list in each section is in sequence
2+
news_section_numbering_linter = function(news) {
3+
if (!grepl("NEWS", news)) return(invisible())
4+
news = readLines(news)
5+
# plain '#' catches some examples; 'd' for 'data.table'
6+
sections = grep("^#+ [A-Zd]", news)
7+
entries = grep("^[0-9]+[.]", news)
8+
entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries]))
9+
section_id = findInterval(entries, sections)
10+
11+
any_mismatch = FALSE
12+
for (id in unique(section_id)) {
13+
section_entries = entry_value[section_id == id]
14+
intended_value = seq_along(section_entries)
15+
matched = section_entries == intended_value
16+
if (all(matched)) next
17+
any_mismatch = TRUE
18+
section_header = news[sections[id]]
19+
cat(sprintf(
20+
"In section '%s' (line %d), bad numbering:\n%s\n",
21+
section_header, sections[id],
22+
paste0(" [", section_entries[!matched], " --> ", intended_value[!matched], "]", collapse="\n")
23+
))
24+
}
25+
stopifnot("Please fix the NEWS issues above" = !any_mismatch)
26+
}

.ci/linters/md/heading_id_linter.R renamed to .ci/linters/md/vignette_heading_id_linter.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# ensure that ids are limited to alphanumerics and dashes
22
# (in particular, dots and underscores break the links)
3-
check_header_ids = function(md) {
3+
vignette_heading_id_linter = function(md) {
44
if (!grepl('[.]Rmd$', md)) return(invisible())
55
md = readLines(md)
66
# A bit surprisingly, some headings don't start with a letter.

NEWS.md

Lines changed: 55 additions & 34 deletions
Large diffs are not rendered by default.

R/IDateTime.R

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -342,19 +342,20 @@ yday = function(x) convertDate(as.IDate(x), "yday")
342342
wday = function(x) convertDate(as.IDate(x), "wday")
343343
mday = function(x) convertDate(as.IDate(x), "mday")
344344
week = function(x) convertDate(as.IDate(x), "week")
345-
isoweek = function(x) {
345+
# TODO(#3279): Investigate if improved as.IDate() makes our below implementation faster than this
346+
isoweek = function(x) as.integer(format(as.IDate(x), "%V"))
346347
# ISO 8601-conformant week, as described at
347348
# https://en.wikipedia.org/wiki/ISO_week_date
348349
# Approach:
349350
# * Find nearest Thursday to each element of x
350351
# * Find the number of weeks having passed between
351352
# January 1st of the year of the nearest Thursdays and x
352353

353-
x = as.IDate(x) # number of days since 1 Jan 1970 (a Thurs)
354-
nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L))
355-
year_start = as.IDate(format(nearest_thurs, '%Y-01-01'))
356-
1L + (nearest_thurs - year_start) %/% 7L
357-
}
354+
# x = as.IDate(x) # number of days since 1 Jan 1970 (a Thurs)
355+
# nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L))
356+
# year_start = as.IDate(format(nearest_thurs, '%Y-01-01'))
357+
# 1L + (nearest_thurs - year_start) %/% 7L
358+
358359

359360
month = function(x) convertDate(as.IDate(x), "month")
360361
quarter = function(x) convertDate(as.IDate(x), "quarter")

R/as.data.table.R

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ as.data.table.matrix = function(x, keep.rownames=FALSE, key=NULL, ...) {
4848
if (!identical(keep.rownames, FALSE)) {
4949
# can specify col name to keep.rownames, #575
5050
ans = data.table(rn=rownames(x), x, keep.rownames=FALSE)
51+
# auto-inferred name 'x' is not back-compatible & inconsistent, #7145
52+
if (ncol(x) == 1L && is.null(colnames(x)))
53+
setnames(ans, 'x', 'V1')
5154
if (is.character(keep.rownames))
5255
setnames(ans, 'rn', keep.rownames[1L])
5356
return(ans)
@@ -133,9 +136,26 @@ as.data.table.list = function(x,
133136
missing.check.names = missing(check.names)
134137
origListNames = if (missing(.named)) names(x) else NULL # as.data.table called directly, not from inside data.table() which provides .named, #3854
135138
empty_atomic = FALSE
139+
140+
# Handle keep.rownames for vectors (mimicking data.frame behavior)
141+
rownames_ = NULL
142+
check_rownames = !isFALSE(keep.rownames)
143+
136144
for (i in seq_len(n)) {
137145
xi = x[[i]]
138146
if (is.null(xi)) next # eachncol already initialized to 0 by integer() above
147+
if (check_rownames && is.null(rownames_)) {
148+
if (is.null(dim(xi))) {
149+
if (!is.null(nm <- names(xi))) {
150+
rownames_ = nm
151+
x[[i]] = unname(xi)
152+
}
153+
} else {
154+
if (!is.null(nm <- rownames(xi))) {
155+
rownames_ = nm
156+
}
157+
}
158+
}
139159
if (!is.null(dim(xi)) && missing.check.names) check.names=TRUE
140160
if ("POSIXlt" %chin% class(xi)) {
141161
warningf("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.")
@@ -200,6 +220,18 @@ as.data.table.list = function(x,
200220
}
201221
if (any(vnames==".SD")) stopf("A column may not be called .SD. That has special meaning.")
202222
if (check.names) vnames = make.names(vnames, unique=TRUE)
223+
224+
# Add rownames column when vector names were found
225+
if (!is.null(rownames_)) {
226+
rn_name = if (is.character(keep.rownames)) keep.rownames[1L] else "rn"
227+
if (!is.na(idx <- chmatch(rn_name, vnames)[1L])) {
228+
ans = c(list(ans[[idx]]), ans[-idx])
229+
vnames = c(vnames[idx], vnames[-idx])
230+
} else {
231+
ans = c(list(recycle(rownames_, nrow)), ans)
232+
vnames = c(rn_name, vnames)
233+
}
234+
}
203235
setattr(ans, "names", vnames)
204236
setDT(ans, key=key) # copy ensured above; also, setDT handles naming
205237
if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames) # PR 3854 and tests 2058.15-17

R/print.data.table.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
141141
print_default(toprint)
142142
return(invisible(x))
143143
}
144+
if (col.names == "none")
145+
colnames(toprint) = rep.int("", ncol(toprint))
144146
if (nrow(toprint)>20L && col.names == "auto")
145147
# repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them
146148
# option to shut this off per request of Oleg Bondar on SO, #1482

inst/tests/tests.Rraw

Lines changed: 82 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -21276,13 +21276,46 @@ if (test_R.utils) local({
2127621276
})
2127721277

2127821278
# Create a data.table when one vector is transposed doesn't respect the name defined by user #4124
21279-
test(2321.1, DT <- data.table(a=1:2, b=matrix(1:2)), data.table(a=1:2, b=1:2))
21280-
test(2321.2, names(DT), names(data.frame(a=1:2, b=matrix(1:2))))
21281-
test(2321.3, DT <- data.table(a=integer(), b=matrix(1L, nrow=0L, ncol=1L)), data.table(a=integer(), b=integer()))
21282-
test(2321.4, names(DT), names(data.frame(a=integer(), b=matrix(1L, nrow=0L, ncol=1L))))
21279+
test(2321.01, DT <- data.table(a=1:2, b=matrix(1:2)), data.table(a=1:2, b=1:2))
21280+
test(2321.02, names(DT), names(data.frame(a=1:2, b=matrix(1:2))))
21281+
test(2321.03, DT <- data.table(a=integer(), b=matrix(1L, nrow=0L, ncol=1L)), data.table(a=integer(), b=integer()))
21282+
test(2321.04, names(DT), names(data.frame(a=integer(), b=matrix(1L, nrow=0L, ncol=1L))))
2128321283
## but respect named column vectors
21284-
test(2321.5, DT <- data.table(a=1:2, cbind(b=3:4)), data.table(a=1:2, b=3:4))
21285-
test(2321.6, names(DT), names(data.frame(a=1:2, cbind(b=3:4))))
21284+
test(2321.05, DT <- data.table(a=1:2, cbind(b=3:4)), data.table(a=1:2, b=3:4))
21285+
test(2321.06, names(DT), names(data.frame(a=1:2, cbind(b=3:4))))
21286+
## also respect old naming pattern when invoked indirectly, #7145
21287+
M = cbind(1:3)
21288+
test(2321.07, as.data.table(M), data.table(V1=1:3))
21289+
rownames(M) = c('a', 'b', 'c')
21290+
test(2321.08, as.data.table(M), data.table(V1=1:3))
21291+
test(2321.09, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), V1=1:3))
21292+
colnames(M) = 'zz'
21293+
test(2321.10, as.data.table(M), data.table(zz=1:3))
21294+
test(2321.11, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), zz=1:3))
21295+
colnames(M) = 'x'
21296+
test(2321.12, as.data.table(M), data.table(x=1:3))
21297+
test(2321.13, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), x=1:3))
21298+
M = cbind(M, y=4:6)
21299+
test(2321.14, as.data.table(M), data.table(x=1:3, y=4:6))
21300+
test(2321.15, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), x=1:3, y=4:6))
21301+
colnames(M) = c('A', 'B')
21302+
test(2321.16, as.data.table(M), data.table(A=1:3, B=4:6))
21303+
test(2321.17, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), A=1:3, B=4:6))
21304+
colnames(M) = NULL
21305+
test(2321.18, as.data.table(M), data.table(V1=1:3, V2=4:6))
21306+
test(2321.19, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), V1=1:3, V2=4:6))
21307+
colnames(M) = c('x', '')
21308+
test(2321.20, as.data.table(M), data.table(x=1:3, V2=4:6))
21309+
test(2321.21, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), x=1:3, V2=4:6))
21310+
colnames(M) = c('', 'x')
21311+
test(2321.22, as.data.table(M), data.table(V1=1:3, x=4:6))
21312+
test(2321.23, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), V1=1:3, x=4:6))
21313+
colnames(M) = c('', '')
21314+
test(2321.24, as.data.table(M), data.table(V1=1:3, V2=4:6))
21315+
test(2321.25, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), V1=1:3, V2=4:6))
21316+
colnames(M) = c('A', '')
21317+
test(2321.26, as.data.table(M), data.table(A=1:3, V2=4:6))
21318+
test(2321.27, as.data.table(M, keep.rownames='id'), data.table(id=c('a', 'b', 'c'), A=1:3, V2=4:6))
2128621319

2128721320
# New fctr() helper: like factor() but retaining order by default #4837
2128821321
test(2322.01, levels(fctr(c("b","a","c"))), c("b","a","c"))
@@ -21366,27 +21399,55 @@ DT[, i := integer()]
2136621399
DT[, f2 := factor()]
2136721400
test(2328.2, droplevels(DT), data.table(f=factor(), i=integer(), f2=factor()))
2136821401

21402+
#6882 print() output with col.names="none"
21403+
dt = data.table(short = 1:3, verylongcolumnname = 4:6)
21404+
test(2329.1, print(dt, col.names = "none"), output = "1: 1 4\n2: 2 5\n3: 3 6\n")
21405+
dt = data.table(x = 123456, y = "wide_string")
21406+
test(2329.2, print(dt, col.names = "none"), output = "1: 123456 wide_string\n")
21407+
dt = data.table(a = NA_integer_, b = NaN)
21408+
test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN\n")
21409+
21410+
# Row name extraction from multiple vectors, #7136
21411+
x <- 1:3
21412+
y <- setNames(4:6, c("A", "B", "C"))
21413+
test(2330.1, as.data.table(list(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=1:3, V2=4:6))
21414+
test(2330.2, as.data.table(list(x, y), keep.rownames="custom"), data.table(custom=c("A", "B", "C"), V1=1:3, V2=4:6))
21415+
test(2330.3, as.data.table(list(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), V1=4:6, V2=1:3))
21416+
21417+
# Behavior under data.frame()
21418+
test(2330.4, as.data.table(data.frame(x, y), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), x=1:3, y=4:6))
21419+
test(2330.5, as.data.table(data.frame(y, x), keep.rownames=TRUE), data.table(rn=c("A", "B", "C"), y=4:6, x=1:3))
21420+
21421+
DF <- data.frame(row.names = letters[1:6], V = 1:6) # Test data.frame with explicit rownames
21422+
test(2330.6, as.data.table(list(a = 6:1, DF), keep.rownames=TRUE), data.table(rn=letters[1:6], a=6:1, V=1:6))
21423+
21424+
z <- setNames(1:3, rep("", 3)) # vector with all-empty names # behaviour with all-empty row names
21425+
test(2330.7, as.data.table(list(z), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3))
21426+
21427+
M <- matrix(1:6, nrow=3, dimnames=list(rep("", 3), c("V1", "V2"))) # test of list(M) for empty-rowname'd matrix input
21428+
test(2330.8, as.data.table(list(M), keep.rownames=TRUE), data.table(rn=rep("", 3), V1=1:3, V2=4:6))
21429+
2136921430
#2606
21370-
test(2329.1, {
21371-
dt1 <- data.table(a = 1)
21372-
lst <- list(inner = dt1)
21373-
res <- tables(recursive=TRUE)
21431+
test(2331.1, {
21432+
dt1 = data.table(a = 1)
21433+
lst = list(inner = dt1)
21434+
res = tables(recursive=TRUE)
2137421435
any(res$NAME == "lst$inner")
2137521436
}, TRUE)
21376-
test(2329.2, {
21377-
lst <- list(data.table(b = 2))
21378-
res <- tables(recursive=TRUE)
21437+
test(2331.2, {
21438+
lst = list(data.table(b = 2))
21439+
res = tables(recursive=TRUE)
2137921440
any(grepl("^lst\\[\\[1\\]\\]$", res$NAME))
2138021441
}, TRUE)
21381-
test(2329.3, {
21382-
nested <- list(l1 = list(l2 = data.table(c = 3)))
21383-
res <- tables(recursive=TRUE)
21442+
test(2331.3, {
21443+
nested = list(l1 = list(l2 = data.table(c = 3)))
21444+
res = tables(recursive=TRUE)
2138421445
any(res$NAME == "nested$l1$l2")
2138521446
}, TRUE)
21386-
test(2329.4, {
21387-
cycle <- list()
21388-
cycle[[1]] <- cycle
21389-
cycle[[2]] <- data.table(x = 1)
21390-
res <- tables(recursive=TRUE)
21447+
test(2331.4, {
21448+
cycle = list()
21449+
cycle[[1]] = cycle
21450+
cycle[[2]] = data.table(x = 1)
21451+
res = tables(recursive=TRUE)
2139121452
any(res$NAME == "cycle[[2]]") && !"cycle[[1]]" %in% res$NAME
2139221453
}, TRUE)

man/as.data.table.Rd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ is.data.table(x)
3131
}
3232
\arguments{
3333
\item{x}{An R object.}
34-
\item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead.}
34+
\item{keep.rownames}{Default is \code{FALSE}. If \code{TRUE}, adds the input object's names as a separate column named \code{"rn"}. \code{keep.rownames = "id"} names the column \code{"id"} instead. For lists and when calling \code{data.table()}, names from the first named vector are extracted and used as row names, similar to \code{data.frame()} behavior.}
3535
\item{key}{ Character vector of one or more column names which is passed to \code{\link{setkeyv}}. }
3636
\item{sorted}{logical used in \emph{array} method, default \code{TRUE} is overridden when \code{key} is provided. }
3737
\item{value.name}{character scalar used in \emph{array} method, default \code{"value"}.}

0 commit comments

Comments
 (0)