Skip to content

Commit b7aed31

Browse files
authored
Merge branch 'master' into redundantParameterCleanup
2 parents 4029724 + 8f5ffa8 commit b7aed31

File tree

8 files changed

+809
-790
lines changed

8 files changed

+809
-790
lines changed

.ci/linters/r/eval_parse_linter.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
eval_parse_linter = make_linter_from_xpath(
2+
"//SYMBOL_FUNCTION_CALL[text() = 'parse']
3+
/ancestor::expr
4+
/preceding-sibling::expr[SYMBOL_FUNCTION_CALL[text() = 'eval']]
5+
/parent::expr
6+
",
7+
"Avoid eval(parse()); build the language directly, possibly using substitute2()."
8+
)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@
8484
8585
13. Reference to `.SD` in `...` arguments to `lapply()`, e.g. ``lapply(list_of_tables, `[`, j=.SD[1L])`` is evaluated correctly, [#2982](https://github.com/Rdatatable/data.table/issues/2982). Thanks @franknarf1 for the report and @MichaelChirico for the fix.
8686
87+
14. Filling columns of class Date with POSIXct (and vice versa) using `shift()` now yields a clear, informative error message specifying the class mismatch, [#5218](https://github.com/Rdatatable/data.table/issues/5218). Thanks @ashbaldry for the report and @ben-schwen for the fix.
88+
8789
### NOTES
8890
8991
1. The following in-progress deprecations have proceeded:

R/data.table.R

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -97,34 +97,32 @@ replace_dot_alias = function(e) {
9797
}
9898

9999
.checkTypos = function(err, ref) {
100+
err_str <- conditionMessage(err)
100101
# a slightly wonky workaround so that this still works in non-English sessions, #4989
101102
# generate this at run time (as opposed to e.g. onAttach) since session language is
102103
# technically OK to update (though this should be rare), and since it's low-cost
103104
# to do so here because we're about to error anyway.
104-
missing_obj_fmt = gsub(
105-
"'missing_datatable_variable____'",
105+
missing_obj_regex = gsub(
106+
"'____missing_datatable_variable____'",
106107
"'(?<obj_name>[^']+)'",
107-
tryCatch(eval(parse(text="missing_datatable_variable____")), error=identity)$message
108-
# eval(parse()) to avoid "no visible binding for global variable" note from R CMD check
109-
# names starting with _ don't parse, so no leading _ in the name
108+
# expression() to avoid "no visible binding for global variable" note from R CMD check
109+
conditionMessage(tryCatch(eval(quote(`____missing_datatable_variable____`)), error=identity)),
110+
fixed=TRUE
110111
)
111-
idx = regexpr(missing_obj_fmt, err$message, perl=TRUE)
112-
if (idx > 0L) {
113-
start = attr(idx, "capture.start", exact=TRUE)[ , "obj_name"]
114-
used = substr(
115-
err$message,
116-
start,
117-
start + attr(idx, "capture.length", exact=TRUE)[ , "obj_name"] - 1L
118-
)
119-
found = agrep(used, ref, value=TRUE, ignore.case=TRUE, fixed=TRUE)
120-
if (length(found)) {
121-
stopf("Object '%s' not found. Perhaps you intended %s", used, brackify(found))
122-
} else {
123-
stopf("Object '%s' not found amongst %s", used, brackify(ref))
124-
}
112+
idx = regexpr(missing_obj_regex, err_str, perl=TRUE)
113+
if (idx == -1L)
114+
stopf("%s", err_str, domain=NA) # Don't use stopf() directly, since err_str might have '%', #6588
115+
start = attr(idx, "capture.start", exact=TRUE)[ , "obj_name"]
116+
used = substr(
117+
err_str,
118+
start,
119+
start + attr(idx, "capture.length", exact=TRUE)[ , "obj_name"] - 1L
120+
)
121+
found = agrep(used, ref, value=TRUE, ignore.case=TRUE, fixed=TRUE)
122+
if (length(found)) {
123+
stopf("Object '%s' not found. Perhaps you intended %s", used, brackify(found))
125124
} else {
126-
# Don't use stopf() directly, since err$message might have '%', #6588
127-
stopf("%s", err$message, domain=NA)
125+
stopf("Object '%s' not found amongst %s", used, brackify(ref))
128126
}
129127
}
130128

R/onLoad.R

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -73,31 +73,29 @@
7373
# In fread and fwrite we have moved back to using getOption's default argument since it is unlikely fread and fread will be called in a loop many times, plus they
7474
# are relatively heavy functions where the overhead in getOption() would not be noticed. It's only really [.data.table where getOption default bit.
7575
# Improvement to base::getOption() now submitted (100x; 5s down to 0.05s): https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17394
76-
opts = c(
77-
"datatable.verbose"="FALSE", # datatable.<argument name>
78-
"datatable.optimize"="Inf", # datatable.<argument name>
79-
"datatable.print.nrows"="100L", # datatable.<argument name>
80-
"datatable.print.topn"="5L", # datatable.<argument name>
81-
"datatable.print.class"="TRUE", # for print.data.table
82-
"datatable.print.rownames"="TRUE", # for print.data.table
83-
"datatable.print.colnames"="'auto'", # for print.data.table
84-
"datatable.print.keys"="TRUE", # for print.data.table
85-
"datatable.print.trunc.cols"="FALSE", # for print.data.table
86-
"datatable.show.indices"="FALSE", # for print.data.table
87-
"datatable.allow.cartesian"="FALSE", # datatable.<argument name>
88-
"datatable.join.many"="TRUE", # mergelist, [.data.table #4383 #914
89-
"datatable.dfdispatchwarn"="TRUE", # not a function argument
90-
"datatable.warnredundantby"="TRUE", # not a function argument
91-
"datatable.alloccol"="1024L", # argument 'n' of alloc.col. Over-allocate 1024 spare column slots
92-
"datatable.auto.index"="TRUE", # DT[col=="val"] to auto add index so 2nd time faster
93-
"datatable.use.index"="TRUE", # global switch to address #1422
94-
"datatable.prettyprint.char" = NULL, # FR #1091
95-
"datatable.old.matrix.autoname"="TRUE", # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
96-
NULL
97-
)
98-
for (i in setdiff(names(opts),names(options()))) {
99-
eval(parse(text=paste0("options(",i,"=",opts[i],")")))
100-
}
76+
opts = list(
77+
datatable.verbose=FALSE, # datatable.<argument name>
78+
datatable.optimize=Inf, # datatable.<argument name>
79+
datatable.print.nrows=100L, # datatable.<argument name>
80+
datatable.print.topn=5L, # datatable.<argument name>
81+
datatable.print.class=TRUE, # for print.data.table
82+
datatable.print.rownames=TRUE, # for print.data.table
83+
datatable.print.colnames='auto', # for print.data.table
84+
datatable.print.keys=TRUE, # for print.data.table
85+
datatable.print.trunc.cols=FALSE, # for print.data.table
86+
datatable.show.indices=FALSE, # for print.data.table
87+
datatable.allow.cartesian=FALSE, # datatable.<argument name>
88+
datatable.join.many=TRUE, # mergelist, [.data.table #4383 #914
89+
datatable.dfdispatchwarn=TRUE, # not a function argument
90+
datatable.warnredundantby=TRUE, # not a function argument
91+
datatable.alloccol=1024L, # argument 'n' of alloc.col. Over-allocate 1024 spare column slots
92+
datatable.auto.index=TRUE, # DT[col=="val"] to auto add index so 2nd time faster
93+
datatable.use.index=TRUE, # global switch to address #1422
94+
datatable.prettyprint.char=NULL, # FR #1091
95+
datatable.old.matrix.autoname=TRUE # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
96+
)
97+
opts = opts[!names(opts) %chin% names(options())]
98+
options(opts)
10199

102100
# Test R behaviour that changed in v3.1 and is now depended on
103101
x = 1L:3L

inst/tests/tests.Rraw

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6851,6 +6851,13 @@ test(1463.78, shift(x,1:2, type="cyclic"), list(as.raw(c(5, 1:4)), as.raw(c(4:
68516851
test(1463.79, shift(x,-1L, type="cyclic"), as.raw(c(2:5, 1)))
68526852
test(1463.80, shift(x,-(1:2),type="cyclic"), list(as.raw(c(2:5, 1)), as.raw(c(3:5,1:2))))
68536853

6854+
# shift incompatible types (e.g. Date and POSIXct)
6855+
d = .Date(0:4)
6856+
p = .POSIXct(1:5)
6857+
test(1463.81, shift(d, fill=p[1L]), error="Filling Date with POSIXct .* unsupported.*")
6858+
test(1463.82, shift(p, fill=d[1L]), error="Filling POSIXct with Date .* unsupported.*")
6859+
test(1463.83, shift(d, fill=as.IDate(2000L)), .Date(c(2000L, 0:3)))
6860+
68546861
# FR #686
68556862
DT = data.table(a=rep(c("A", "B", "C", "A", "B"), c(2,2,3,1,2)), foo=1:10)
68566863
# Seemingly superfluous 'foo' is needed to test fix for #1942

src/shift.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type)
4040
SEXP elem = VECTOR_ELT(x, i);
4141
size_t size = RTYPE_SIZEOF(elem);
4242
R_xlen_t xrows = xlength(elem);
43+
if ((INHERITS(elem, char_Date) && INHERITS(fill, char_POSIXct)) || (INHERITS(elem, char_POSIXct) && INHERITS(fill, char_Date))) {
44+
const char* elem_type = INHERITS(elem, char_Date) ? "Date" : "POSIXct";
45+
const char* fill_type = INHERITS(fill, char_Date) ? "Date" : "POSIXct";
46+
error(_("Filling %s with %s using shift() is unsupported. Please convert fill to %s first."),
47+
elem_type, fill_type, elem_type);
48+
}
4349
SEXP thisfill = PROTECT(coerceAs(fill, elem, ScalarLogical(0))); // #4865 use coerceAs for type coercion
4450
switch (TYPEOF(elem)) {
4551
case INTSXP: case LGLSXP: {

vignettes/datatable-joins.Rmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ x[i, on, nomatch]
117117
\____ secondary data.table
118118
```
119119

120-
> Please keep in mind that the standard argument order in `data.table` is `dt[i, j, by]`. For join operations, it is recommended to pass the `on` and `nomatch` arguments by name to avoid using `j` and `by` when they are not needed.
120+
Note: Please keep in mind that the standard argument order in `data.table` is `dt[i, j, by]`. For join operations, it is recommended to pass the `on` and `nomatch` arguments by name to avoid using `j` and `by` when they are not needed.
121121

122122
## 3. Equi joins
123123

@@ -439,7 +439,7 @@ ProductReceived[ProductSales,
439439
allow.cartesian = TRUE]
440440
```
441441

442-
> `allow.cartesian` is defaulted to FALSE as this is seldom what the user wants, and such a cross join can lead to a very large number of rows in the result. For example, if Table A has 100 rows and Table B has 50 rows, their Cartesian product would result in 5000 rows (100 * 50). This can quickly become memory-intensive for large datasets.
442+
Note: `allow.cartesian` is defaulted to FALSE as this is seldom what the user wants, and such a cross join can lead to a very large number of rows in the result. For example, if Table A has 100 rows and Table B has 50 rows, their Cartesian product would result in 5000 rows (100 * 50). This can quickly become memory-intensive for large datasets.
443443

444444

445445
#### 3.6.1. Selecting one match

0 commit comments

Comments
 (0)