Skip to content

Commit 5f3fccd

Browse files
authored
Merge branch 'master' into freadAtimeStruct
2 parents 71b67cf + 053d905 commit 5f3fccd

File tree

16 files changed

+148
-86
lines changed

16 files changed

+148
-86
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@
105105
106106
17. `t1 - t2`, where one is an `IDate` and the other is a `Date`, are now consistent with the case where both are `IDate` or both are `Date`, [#4749](https://github.com/Rdatatable/data.table/issues/4749). Thanks @George9000 for the report and @MichaelChirico for the fix.
107107
108+
18. `fwrite` now allows `dec` to be the same as `sep` for edge cases where only one will be written, e.g. 0-row or 1-column tables. [#7227](https://github.com/Rdatatable/data.table/issues/7227). Thanks @MichaelChirico for the report and @venom1204 for the fix.
109+
108110
### NOTES
109111
110112
1. The following in-progress deprecations have proceeded:

R/data.table.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ replace_dot_alias = function(e) {
312312
root = root_name(jsub)
313313
} else if (length(jsub) > 2L && jsub[[2L]] %iscall% ":=") {
314314
#2142 -- j can be {} and have length 1
315-
stopf("You have wrapped := with {} which is ok but then := must be the only thing inside {}. You have something else inside {} as well. Consider placing the {} on the RHS of := instead; e.g. DT[,someCol:={tmpVar1<-...;tmpVar2<-...;tmpVar1*tmpVar2}]")
315+
stopf("Invalid use of `:=` inside `{}`. `:=` must be the only expression inside `{}` when used in `j`. Instead of: DT[{tmp1 <- ...; tmp2 <- ...; someCol := tmp1 * tmp2}], Use: DT[, someCol := {tmp1 <- ...; tmp2 <- ...; tmp1 * tmp2}]")
316316
}
317317
}
318318
if (root=="eval" && !any(all.vars(jsub[[2L]]) %chin% names_x)) {
@@ -2884,7 +2884,7 @@ address = function(x) .Call(Caddress, eval(substitute(x), parent.frame()))
28842884

28852885
":=" = function(...) {
28862886
# this error is detected when eval'ing isub and replaced with a more helpful one when using := in i due to forgetting a comma, #4227
2887-
stopf('Check that is.data.table(DT) == TRUE. Otherwise, :=, `:=`(...) and let(...) are defined for use in j, once only and in particular ways. Note that namespace-qualification like data.table::`:=`(...) is not supported. See help(":=").', class="dt_invalid_let_error")
2887+
stopf('Check that is.data.table(DT) == TRUE. Otherwise, `:=` is defined for use in j, once only and in particular ways. See help(":=", "data.table"). A common reason for this error is allocating a new column in `j` and using `<-` instead of `:=`; e.g., `DT[, new_col <- 1]` should be `DT[, new_col := 1]`. Another is using `:=` in a multi-statement `{...}` block; please use `:=` as the only statement in `j`.', class="dt_invalid_let_error")
28882888
}
28892889

28902890
# TODO(#6197): Export these.

R/fwrite.R

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
1313
yaml = FALSE,
1414
bom = FALSE,
1515
verbose=getOption("datatable.verbose", FALSE),
16-
encoding = "") {
16+
encoding = "",
17+
forceDecimal = FALSE) {
1718
na = as.character(na[1L]) # fix for #1725
1819
if (length(encoding) != 1L || !encoding %chin% c("", "UTF-8", "native")) {
1920
stopf("Argument 'encoding' must be '', 'UTF-8' or 'native'.")
@@ -44,14 +45,14 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
4445
is.character(sep) && length(sep)==1L && (nchar(sep) == 1L || identical(sep, "")),
4546
is.character(sep2) && length(sep2)==3L && nchar(sep2[2L])==1L,
4647
is.character(dec) && length(dec)==1L && nchar(dec) == 1L,
47-
dec != sep, # sep2!=dec and sep2!=sep checked at C level when we know if list columns are present
48+
`dec and sep must be distinct whenever both might be needed` = (!NROW(x) || NCOL(x) <= 1L || dec != sep), # sep2!=dec and sep2!=sep checked at C level when we know if list columns are present
4849
is.character(eol) && length(eol)==1L,
4950
length(qmethod) == 1L && qmethod %chin% c("double", "escape"),
5051
length(compress) == 1L && compress %chin% c("auto", "none", "gzip"),
5152
length(compressLevel) == 1L && 0L <= compressLevel && compressLevel <= 9L,
5253
isTRUEorFALSE(col.names), isTRUEorFALSE(append), isTRUEorFALSE(row.names),
5354
isTRUEorFALSE(verbose), isTRUEorFALSE(showProgress), isTRUEorFALSE(logical01),
54-
isTRUEorFALSE(bom),
55+
isTRUEorFALSE(bom), isTRUEorFALSE(forceDecimal),
5556
length(na) == 1L, #1725, handles NULL or character(0) input
5657
is.character(file) && length(file)==1L && !is.na(file),
5758
length(buffMB)==1L && !is.na(buffMB) && 1L<=buffMB && buffMB<=1024L,
@@ -122,7 +123,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
122123
}
123124
.Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append,
124125
row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread,
125-
showProgress, is_gzip, compressLevel, bom, yaml, verbose, encoding)
126+
showProgress, is_gzip, compressLevel, bom, yaml, verbose, encoding, forceDecimal)
126127
invisible()
127128
}
128129

inst/tests/tests.Rraw

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3997,9 +3997,9 @@ test(1133.8, DT[, new := if (.GRP==1L) 7L else 3.4, by=x], data.table(x=INT(1,1,
39973997
DT <- data.table(x=c("A", "A", "B", "B"), val =1:4)
39983998
DT2 <- copy(DT)[, a := 1L]
39993999
test(1134.1, DT[, {a := 1L}], DT2)
4000-
test(1134.2, DT[, {a := 1L; NULL}], error="You have wrapped.*which is ok.*Consider")
4000+
test(1134.2, DT[, {a := 1L; NULL}], error="Invalid use of `:=` inside `{}`")
40014001
test(1134.3, DT[, {b := 2L}, by=x], DT2[, b:=2L, by=x])
4002-
test(1134.4, DT[, {b := 2L; sum(val)}, by=x], error="You have wrapped.*which is ok.*Consider")
4002+
test(1134.4, DT[, {b := 2L; sum(val)}, by=x], error="Invalid use of `:=` inside `{}`")
40034003

40044004
# FR #2693 and Gabor's suggestions on datatable-help "Problem with FAQ 2.8"
40054005
d1 <- data.table(id1 = c(1L, 2L, 2L, 3L), val = 1:4, key = "id1")
@@ -10977,9 +10977,9 @@ test(1732.7, fwrite(DT, quote='auto'), output='A,B\n,5\nNA,7\n"",0\nmonty,')
1097710977
test(1732.8, fwrite(DT, quote='auto', na="NA"), output='A,B\nNA,5\n"NA",7\n"",0\n"monty",NA')
1097810978

1097910979
# dec=","
10980-
test(1733.1, fwrite(data.table(pi),dec=","), error=base_messages$stopifnot("dec != sep"))
10980+
# Test 1733.1 removed, see #7227
1098110981
test(1733.2, fwrite(data.table(c(1.2,-8.0,pi,67.99),1:4),dec=",",sep=";"),
10982-
output="V1;V2\n1,2;1\n-8;2\n3,14159265358979;3\n67,99;4")
10982+
output="V1;V2\n1,2;1\n-8;2\n3,14159265358979;3\n67,99;4")
1098310983

1098410984
# fwrite implied and actual row.names
1098510985
DT = data.table(foo=1:3,bar=c(1.2,9.8,-6.0))
@@ -21593,3 +21593,30 @@ test(2336.3, all.equal(as.Date(t1) - t2, t1 - t2))
2159321593
test(2336.4, all.equal(as.Date(t2) - t1, t2 - t1))
2159421594
test(2336.5, all.equal(t1 - as.Date(t2), t1 - t2))
2159521595
test(2336.6, all.equal(t2 - as.Date(t1), t2 - t1))
21596+
21597+
# fwrite: allow dec=',' with single column, #7227
21598+
test(2337.1, fwrite(data.table(1), dec=","), NULL)
21599+
if (getRversion() >= "4.0.0") { # rely on stopifnot(named = ...) for correct message
21600+
test(2337.2, fwrite(data.table(0.1, 0.2), dec=",", sep=","), error = "dec and sep must be distinct")
21601+
}
21602+
test(2337.3, is.null(fwrite(data.table(c(0.1, 0.2)), dec=",", sep="\t")))
21603+
test(2337.4, is.null(fwrite(data.table(a=numeric(), b=numeric()), dec=",", sep=",")))
21604+
test(2337.5, is.null(fwrite(data.table(a=numeric()), dec=",", sep=",")))
21605+
21606+
# 2864 force decimal points for whole numbers in numeric columns
21607+
dd = data.table(x=c(1, 2, 3))
21608+
di = data.table(x=c(1L, 2L, 3L))
21609+
test(2338.1, capture.output(fwrite(dd, forceDecimal=TRUE)), c("x", "1.", "2.", "3."))
21610+
test(2338.2, capture.output(fwrite(dd, forceDecimal=TRUE, dec=",", sep="\t")), c("x", "1,", "2,", "3,"))
21611+
test(2338.3, capture.output(fwrite(dd, forceDecimal=FALSE)), c("x", "1", "2", "3"))
21612+
test(2338.4, capture.output(fwrite(di, forceDecimal=TRUE)), c("x", "1", "2", "3"))
21613+
test(2338.5, capture.output(fwrite(data.table(x=c(0.)), forceDecimal=TRUE)), c("x", "0."))
21614+
test(2338.6, capture.output(fwrite(data.table(x=c(-0.)), forceDecimal=TRUE)), c("x", "0."))
21615+
test(2338.7, capture.output(fwrite(data.table(x=c(0.00)), forceDecimal=TRUE)), c("x", "0."))
21616+
# round trip
21617+
local({
21618+
f <- tempfile(); on.exit(unlink(f))
21619+
test(2338.8, {fwrite(dd, f, forceDecimal=TRUE); fread(f)}, dd)
21620+
test(2338.9, {fwrite(dd, f, forceDecimal=FALSE); fread(f)}, di)
21621+
})
21622+

man/assign.Rd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
\description{
77
Fast add, remove and update subsets of columns, by reference. \code{:=} operator can be used in two ways: \code{LHS := RHS} form, and \code{Functional form}. See \code{Usage}.
88

9+
Note that when using \samp{:=} inside a \code{{...}} block in \code{j}, the \samp{:=} call must be the only statement. For assigning to multiple columns, use the functional form: \code{DT[, `:=`(col1=val1, col2=val2)]}.
10+
911
\code{set} is a low-overhead loop-able version of \code{:=}. It is particularly useful for repetitively updating rows of certain columns by reference (using a for-loop). See \code{Examples}. It can not perform grouping operations.
1012

1113
\code{let} is an alias for the functional form and behaves exactly like \code{`:=`}.
@@ -66,7 +68,8 @@ All of the following result in a friendly error (by design) :
6668
x := 1L
6769
DT[i, col] := val
6870
DT[i]$col := val
69-
DT[, {col1 := 1L; col2 := 2L}] # Use the functional form, `:=`(), instead (see above).
71+
DT[, {col1 := 1L; col2 := 2L}] # Using `{}` in `j` is reserved for single `:=` expressions.
72+
# For multiple updates, use the functional form `:=`() instead.
7073
}
7174

7275
For additional resources, please read \href{../doc/datatable-faq.html}{\code{vignette("datatable-faq")}}. Also have a look at StackOverflow's \href{https://stackoverflow.com/questions/tagged/data.table/}{data.table tag}.

man/fwrite.Rd

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
2121
yaml = FALSE,
2222
bom = FALSE,
2323
verbose = getOption("datatable.verbose", FALSE),
24-
encoding = "")
24+
encoding = "",
25+
forceDecimal = FALSE)
2526
}
2627
\arguments{
2728
\item{x}{Any \code{list} of same length vectors; e.g. \code{data.frame} and \code{data.table}. If \code{matrix}, it gets internally coerced to \code{data.table} preserving col names but not row names}
@@ -62,6 +63,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
6263
\item{bom}{If \code{TRUE} a BOM (Byte Order Mark) sequence (EF BB BF) is added at the beginning of the file; format 'UTF-8 with BOM'.}
6364
\item{verbose}{Be chatty and report timings?}
6465
\item{encoding}{ The encoding of the strings written to the CSV file. Default is \code{""}, which means writing raw bytes without considering the encoding. Other possible options are \code{"UTF-8"} and \code{"native"}. }
66+
\item{forceDecimal}{ Should decimal points be forced for whole numbers in numeric columns? When \code{FALSE}, the default, whole numbers like \code{c(1.0, 2.0, 3.0)} will be written as \samp{1, 2, 3} i.e., dropping \code{dec}. }
6567
}
6668
\details{
6769
\code{fwrite} began as a community contribution with \href{https://github.com/Rdatatable/data.table/pull/1613}{pull request #1613} by Otto Seiskari. This gave Matt Dowle the impetus to specialize the numeric formatting and to parallelize: \url{https://h2o.ai/blog/2016/fast-csv-writing-for-r/}. Final items were tracked in \href{https://github.com/Rdatatable/data.table/issues/1664}{issue #1664} such as automatic quoting, \code{bit64::integer64} support, decimal/scientific formatting exactly matching \code{write.csv} between 2.225074e-308 and 1.797693e+308 to 15 significant figures, \code{row.names}, dates (between 0000-03-01 and 9999-12-31), times and \code{sep2} for \code{list} columns where each cell can itself be a vector.

src/data.table.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ SEXP chmatch_R(SEXP, SEXP, SEXP);
319319
SEXP chmatchdup_R(SEXP, SEXP, SEXP);
320320
SEXP chin_R(SEXP, SEXP);
321321
SEXP freadR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
322-
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
322+
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
323323
SEXP rbindlist(SEXP, SEXP, SEXP, SEXP, SEXP);
324324
SEXP setlistelt(SEXP, SEXP, SEXP);
325325
SEXP setS4elt(SEXP, SEXP, SEXP);

src/fmelt.c

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,14 @@ static const char *concat(SEXP vec, SEXP idx) {
103103
// with missing inputs, and -1 in the positions with column names not
104104
// found. Column names not found will eventually cause error via
105105
// uniq_diff().
106-
SEXP chmatch_na(SEXP x, SEXP table){
107-
SEXP ans;
108-
PROTECT(ans = chmatch(x, table, -1));
109-
for(int i=0; i<length(ans); i++){
110-
if(STRING_ELT(x, i) == NA_STRING){
111-
INTEGER(ans)[i] = NA_INTEGER;
106+
SEXP chmatch_na(SEXP x, SEXP table)
107+
{
108+
SEXP ans = chmatch(x, table, -1);
109+
PROTECT(ans);
110+
int *restrict target = INTEGER(ans);
111+
for (int i = 0; i < length(ans); i++) {
112+
if (STRING_ELT(x, i) == NA_STRING) {
113+
target[i] = NA_INTEGER;
112114
}
113115
}
114116
UNPROTECT(1);
@@ -280,23 +282,23 @@ SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) {
280282
}
281283

282284
struct processData {
283-
SEXP RCHK; // a 2 item list holding vars (result of checkVars) and not_NA_indices. PROTECTed up in fmelt so that preprocess() doesn't need to PROTECT. To pass rchk, #2865
284-
SEXP idcols, // convenience pointers into RCHK[0][0], RCHK[0][1] and RCHK[1] respectively
285-
variable_table, // NULL or data for variable column(s).
286-
valuecols, // list with one element per output/value column, each element is an integer vector.
287-
not_NA_indices;
288-
int *isfactor,
289-
*leach, // length of each element of the valuecols(measure.vars) list.
290-
*isidentical; // are all inputs for this value column the same type?
291-
int lids, // number of id columns.
292-
lvars, // number of variable columns.
293-
lvalues, // number of value columns.
294-
lmax, // max length of valuecols elements / number of times to repeat ids.
295-
totlen, // of output/long DT result of melt operation.
296-
nrow; // of input/wide DT to be melted.
285+
SEXP RCHK; // a 2 item list holding vars (result of checkVars) and not_NA_indices. PROTECTed up in fmelt so that preprocess() doesn't need to PROTECT. To pass rchk, #2865
286+
SEXP idcols; // convenience pointers into RCHK[0][0], RCHK[0][1] and RCHK[1] respectively
287+
SEXP variable_table; // NULL or data for variable column(s).
288+
SEXP valuecols; // list with one element per output/value column, each element is an integer vector.
289+
SEXP not_NA_indices;
290+
int *isfactor;
291+
int *leach; // length of each element of the valuecols(measure.vars) list.
292+
int *isidentical; // are all inputs for this value column the same type?
293+
int lids; // number of id columns.
294+
int lvars; // number of variable columns.
295+
int lvalues; // number of value columns.
296+
int lmax; // max length of valuecols elements / number of times to repeat ids.
297+
int totlen; // of output/long DT result of melt operation.
298+
int nrow; // of input/wide DT to be melted.
297299
SEXPTYPE *maxtype;
298-
bool measure_is_list,
299-
narm; // remove missing values?
300+
bool measure_is_list;
301+
bool narm; // remove missing values?
300302
};
301303

302304
static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valnames, Rboolean narm, Rboolean verbose, struct processData *data) {

src/frank.c

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,26 @@
33
// #include <signal.h> // the debugging machinery + breakpoint aidee
44
// raise(SIGINT);
55

6-
SEXP dt_na(SEXP x, SEXP cols) {
7-
int n=0, elem;
8-
6+
SEXP dt_na(SEXP x, SEXP cols)
7+
{
98
if (!isNewList(x)) internal_error(__func__, "Argument '%s' to %s is type '%s' not '%s'", "x", "Cdt_na", type2char(TYPEOF(x)), "list"); // # nocov
109
if (!isInteger(cols)) internal_error(__func__, "Argument '%s' to %s is type '%s' not '%s'", "cols", "Cdt_na", type2char(TYPEOF(cols)), "integer"); // # nocov
11-
for (int i=0; i<LENGTH(cols); ++i) {
12-
elem = INTEGER(cols)[i];
13-
if (elem<1 || elem>LENGTH(x))
14-
error(_("Item %d of 'cols' is %d which is outside 1-based range [1,ncol(x)=%d]"), i+1, elem, LENGTH(x));
15-
if (!n) n = length(VECTOR_ELT(x, elem-1));
10+
11+
int n = 0;
12+
const int numCols = LENGTH(cols);
13+
const int* col_ints = INTEGER_RO(cols);
14+
for (int i = 0; i < numCols; i++) {
15+
const int elem = col_ints[i];
16+
if (elem < 1 || elem > LENGTH(x))
17+
error(_("Item %d of 'cols' is %d which is outside 1-based range [1,ncol(x)=%d]"), i + 1, elem, LENGTH(x));
18+
if (!n) n = length(VECTOR_ELT(x, elem - 1));
1619
}
1720
SEXP ans = PROTECT(allocVector(LGLSXP, n));
1821
int *ians = LOGICAL(ans);
19-
for (int i=0; i<n; ++i) ians[i]=0;
20-
for (int i=0; i<LENGTH(cols); ++i) {
21-
SEXP v = VECTOR_ELT(x, INTEGER(cols)[i]-1);
22+
memset(ians, 0, n * sizeof(int));
23+
24+
for (int i = 0; i < numCols; i++) {
25+
SEXP v = VECTOR_ELT(x, col_ints[i]-1);
2226
if (!length(v) || isList(v)) continue; // like stats:::na.omit.data.frame, skip pairlist columns
2327
if (n != length(v))
2428
error(_("Column %d of input list x is length %d, inconsistent with first column of that item which is length %d."), i+1,length(v),n);

src/freadR.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,15 +265,13 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
265265
colNamesSxp = R_NilValue;
266266
SET_VECTOR_ELT(RCHK, 1, colNamesSxp = allocVector(STRSXP, ncol));
267267
for (int i = 0; i < ncol; i++) {
268-
SEXP elem;
269268
if (colNames == NULL || colNames[i].len <= 0) {
270269
char buff[12];
271270
snprintf(buff, sizeof(buff), "V%d", i + 1); // # notranslate
272-
elem = mkChar(buff); // no PROTECT as passed immediately to SET_STRING_ELT
271+
SET_STRING_ELT(colNamesSxp, i, mkChar(buff)); // no PROTECT as passed immediately to SET_STRING_ELT
273272
} else {
274-
elem = mkCharLenCE(anchor + colNames[i].off, colNames[i].len, ienc); // no PROTECT as passed immediately to SET_STRING_ELT
273+
SET_STRING_ELT(colNamesSxp, i, mkCharLenCE(anchor + colNames[i].off, colNames[i].len, ienc)); // no PROTECT as passed immediately to SET_STRING_ELT
275274
}
276-
SET_STRING_ELT(colNamesSxp, i, elem);
277275
}
278276
// "use either select= or drop= but not both" was checked earlier in freadR
279277
applyDrop(dropSxp, type, ncol, /*dropSource=*/-1);

0 commit comments

Comments
 (0)