Skip to content

Commit 1887699

Browse files
merge master branch
2 parents 7f7a061 + 053d905 commit 1887699

File tree

13 files changed

+106
-63
lines changed

13 files changed

+106
-63
lines changed

R/data.table.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ replace_dot_alias = function(e) {
312312
root = root_name(jsub)
313313
} else if (length(jsub) > 2L && jsub[[2L]] %iscall% ":=") {
314314
#2142 -- j can be {} and have length 1
315-
stopf("You have wrapped := with {} which is ok but then := must be the only thing inside {}. You have something else inside {} as well. Consider placing the {} on the RHS of := instead; e.g. DT[,someCol:={tmpVar1<-...;tmpVar2<-...;tmpVar1*tmpVar2}]")
315+
stopf("Invalid use of `:=` inside `{}`. `:=` must be the only expression inside `{}` when used in `j`. Instead of: DT[{tmp1 <- ...; tmp2 <- ...; someCol := tmp1 * tmp2}], Use: DT[, someCol := {tmp1 <- ...; tmp2 <- ...; tmp1 * tmp2}]")
316316
}
317317
}
318318
if (root=="eval" && !any(all.vars(jsub[[2L]]) %chin% names_x)) {
@@ -2884,7 +2884,7 @@ address = function(x) .Call(Caddress, eval(substitute(x), parent.frame()))
28842884

28852885
":=" = function(...) {
28862886
# this error is detected when eval'ing isub and replaced with a more helpful one when using := in i due to forgetting a comma, #4227
2887-
stopf('Check that is.data.table(DT) == TRUE. Otherwise, :=, `:=`(...) and let(...) are defined for use in j, once only and in particular ways. Note that namespace-qualification like data.table::`:=`(...) is not supported. See help(":=").', class="dt_invalid_let_error")
2887+
stopf('Check that is.data.table(DT) == TRUE. Otherwise, `:=` is defined for use in j, once only and in particular ways. See help(":=", "data.table"). A common reason for this error is allocating a new column in `j` and using `<-` instead of `:=`; e.g., `DT[, new_col <- 1]` should be `DT[, new_col := 1]`. Another is using `:=` in a multi-statement `{...}` block; please use `:=` as the only statement in `j`.', class="dt_invalid_let_error")
28882888
}
28892889

28902890
# TODO(#6197): Export these.

R/fwrite.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
1414
bom = FALSE,
1515
verbose=getOption("datatable.verbose", FALSE),
1616
encoding = "",
17+
forceDecimal = FALSE,
1718
select = NULL) {
1819
na = as.character(na[1L]) # fix for #1725
1920
if (length(encoding) != 1L || !encoding %chin% c("", "UTF-8", "native")) {
@@ -63,7 +64,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
6364
length(compressLevel) == 1L && 0L <= compressLevel && compressLevel <= 9L,
6465
isTRUEorFALSE(col.names), isTRUEorFALSE(append), isTRUEorFALSE(row.names),
6566
isTRUEorFALSE(verbose), isTRUEorFALSE(showProgress), isTRUEorFALSE(logical01),
66-
isTRUEorFALSE(bom),
67+
isTRUEorFALSE(bom), isTRUEorFALSE(forceDecimal),
6768
length(na) == 1L, #1725, handles NULL or character(0) input
6869
is.character(file) && length(file)==1L && !is.na(file),
6970
length(buffMB)==1L && !is.na(buffMB) && 1L<=buffMB && buffMB<=1024L,
@@ -134,7 +135,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
134135
}
135136
.Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append,
136137
row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread,
137-
showProgress, is_gzip, compressLevel, bom, yaml, verbose, encoding)
138+
showProgress, is_gzip, compressLevel, bom, yaml, verbose, encoding, forceDecimal)
138139
invisible()
139140
}
140141

inst/tests/tests.Rraw

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3997,9 +3997,9 @@ test(1133.8, DT[, new := if (.GRP==1L) 7L else 3.4, by=x], data.table(x=INT(1,1,
39973997
DT <- data.table(x=c("A", "A", "B", "B"), val =1:4)
39983998
DT2 <- copy(DT)[, a := 1L]
39993999
test(1134.1, DT[, {a := 1L}], DT2)
4000-
test(1134.2, DT[, {a := 1L; NULL}], error="You have wrapped.*which is ok.*Consider")
4000+
test(1134.2, DT[, {a := 1L; NULL}], error="Invalid use of `:=` inside `{}`")
40014001
test(1134.3, DT[, {b := 2L}, by=x], DT2[, b:=2L, by=x])
4002-
test(1134.4, DT[, {b := 2L; sum(val)}, by=x], error="You have wrapped.*which is ok.*Consider")
4002+
test(1134.4, DT[, {b := 2L; sum(val)}, by=x], error="Invalid use of `:=` inside `{}`")
40034003

40044004
# FR #2693 and Gabor's suggestions on datatable-help "Problem with FAQ 2.8"
40054005
d1 <- data.table(id1 = c(1L, 2L, 2L, 3L), val = 1:4, key = "id1")
@@ -21603,18 +21603,35 @@ test(2337.3, is.null(fwrite(data.table(c(0.1, 0.2)), dec=",", sep="\t")))
2160321603
test(2337.4, is.null(fwrite(data.table(a=numeric(), b=numeric()), dec=",", sep=",")))
2160421604
test(2337.5, is.null(fwrite(data.table(a=numeric()), dec=",", sep=",")))
2160521605

21606+
# 2864 force decimal points for whole numbers in numeric columns
21607+
dd = data.table(x=c(1, 2, 3))
21608+
di = data.table(x=c(1L, 2L, 3L))
21609+
test(2338.1, capture.output(fwrite(dd, forceDecimal=TRUE)), c("x", "1.", "2.", "3."))
21610+
test(2338.2, capture.output(fwrite(dd, forceDecimal=TRUE, dec=",", sep="\t")), c("x", "1,", "2,", "3,"))
21611+
test(2338.3, capture.output(fwrite(dd, forceDecimal=FALSE)), c("x", "1", "2", "3"))
21612+
test(2338.4, capture.output(fwrite(di, forceDecimal=TRUE)), c("x", "1", "2", "3"))
21613+
test(2338.5, capture.output(fwrite(data.table(x=c(0.)), forceDecimal=TRUE)), c("x", "0."))
21614+
test(2338.6, capture.output(fwrite(data.table(x=c(-0.)), forceDecimal=TRUE)), c("x", "0."))
21615+
test(2338.7, capture.output(fwrite(data.table(x=c(0.00)), forceDecimal=TRUE)), c("x", "0."))
21616+
# round trip
21617+
local({
21618+
f <- tempfile(); on.exit(unlink(f))
21619+
test(2338.8, {fwrite(dd, f, forceDecimal=TRUE); fread(f)}, dd)
21620+
test(2338.9, {fwrite(dd, f, forceDecimal=FALSE); fread(f)}, di)
21621+
})
21622+
2160621623
# test for select parameter #4177
2160721624
DT = data.table(a=1:2, b=3:4)
2160821625
f = tempfile()
2160921626
fwrite(DT, f, select = "a")
21610-
test(2338.1, names(fread(f)), "a")
21627+
test(2339.1, names(fread(f)), "a")
2161121628
df = as.data.frame(DT)
2161221629
fwrite(df, f, select = "a")
21613-
test(2338.2, names(fread(f)), "a")
21630+
test(2339.2, names(fread(f)), "a")
2161421631
l = as.list(DT)
2161521632
fwrite(l, f, select = "a")
21616-
test(2338.3, names(fread(f)), "a")
21633+
test(2339.3, names(fread(f)), "a")
2161721634
m = as.matrix(DT)
2161821635
fwrite(m, f, select = "a")
21619-
test(2338.4, names(fread(f)), "a")
21636+
test(2339.4, names(fread(f)), "a")
2162021637
unlink(f)

man/assign.Rd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
\description{
77
Fast add, remove and update subsets of columns, by reference. \code{:=} operator can be used in two ways: \code{LHS := RHS} form, and \code{Functional form}. See \code{Usage}.
88

9+
Note that when using \samp{:=} inside a \code{{...}} block in \code{j}, the \samp{:=} call must be the only statement. For assigning to multiple columns, use the functional form: \code{DT[, `:=`(col1=val1, col2=val2)]}.
10+
911
\code{set} is a low-overhead loop-able version of \code{:=}. It is particularly useful for repetitively updating rows of certain columns by reference (using a for-loop). See \code{Examples}. It can not perform grouping operations.
1012

1113
\code{let} is an alias for the functional form and behaves exactly like \code{`:=`}.
@@ -66,7 +68,8 @@ All of the following result in a friendly error (by design) :
6668
x := 1L
6769
DT[i, col] := val
6870
DT[i]$col := val
69-
DT[, {col1 := 1L; col2 := 2L}] # Use the functional form, `:=`(), instead (see above).
71+
DT[, {col1 := 1L; col2 := 2L}] # Using `{}` in `j` is reserved for single `:=` expressions.
72+
# For multiple updates, use the functional form `:=`() instead.
7073
}
7174

7275
For additional resources, please read \href{../doc/datatable-faq.html}{\code{vignette("datatable-faq")}}. Also have a look at StackOverflow's \href{https://stackoverflow.com/questions/tagged/data.table/}{data.table tag}.

man/fwrite.Rd

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
2121
yaml = FALSE,
2222
bom = FALSE,
2323
verbose = getOption("datatable.verbose", FALSE),
24-
encoding = "")
24+
encoding = "",
25+
forceDecimal = FALSE)
2526
}
2627
\arguments{
2728
\item{x}{Any \code{list} of same length vectors; e.g. \code{data.frame} and \code{data.table}. If \code{matrix}, it gets internally coerced to \code{data.table} preserving col names but not row names}
@@ -62,6 +63,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
6263
\item{bom}{If \code{TRUE} a BOM (Byte Order Mark) sequence (EF BB BF) is added at the beginning of the file; format 'UTF-8 with BOM'.}
6364
\item{verbose}{Be chatty and report timings?}
6465
\item{encoding}{ The encoding of the strings written to the CSV file. Default is \code{""}, which means writing raw bytes without considering the encoding. Other possible options are \code{"UTF-8"} and \code{"native"}. }
66+
\item{forceDecimal}{ Should decimal points be forced for whole numbers in numeric columns? When \code{FALSE}, the default, whole numbers like \code{c(1.0, 2.0, 3.0)} will be written as \samp{1, 2, 3} i.e., dropping \code{dec}. }
6567
\item{select}{Vector of column names or column numbers specifying which columns to include. When \code{NULL} (default), all columns are selected. This avoids creating temporary subsets for memory efficiency.}
6668
}
6769
\details{

src/data.table.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ SEXP chmatch_R(SEXP, SEXP, SEXP);
319319
SEXP chmatchdup_R(SEXP, SEXP, SEXP);
320320
SEXP chin_R(SEXP, SEXP);
321321
SEXP freadR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
322-
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
322+
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
323323
SEXP rbindlist(SEXP, SEXP, SEXP, SEXP, SEXP);
324324
SEXP setlistelt(SEXP, SEXP, SEXP);
325325
SEXP setS4elt(SEXP, SEXP, SEXP);

src/fmelt.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -282,23 +282,23 @@ SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) {
282282
}
283283

284284
struct processData {
285-
SEXP RCHK; // a 2 item list holding vars (result of checkVars) and not_NA_indices. PROTECTed up in fmelt so that preprocess() doesn't need to PROTECT. To pass rchk, #2865
286-
SEXP idcols, // convenience pointers into RCHK[0][0], RCHK[0][1] and RCHK[1] respectively
287-
variable_table, // NULL or data for variable column(s).
288-
valuecols, // list with one element per output/value column, each element is an integer vector.
289-
not_NA_indices;
290-
int *isfactor,
291-
*leach, // length of each element of the valuecols(measure.vars) list.
292-
*isidentical; // are all inputs for this value column the same type?
293-
int lids, // number of id columns.
294-
lvars, // number of variable columns.
295-
lvalues, // number of value columns.
296-
lmax, // max length of valuecols elements / number of times to repeat ids.
297-
totlen, // of output/long DT result of melt operation.
298-
nrow; // of input/wide DT to be melted.
285+
SEXP RCHK; // a 2 item list holding vars (result of checkVars) and not_NA_indices. PROTECTed up in fmelt so that preprocess() doesn't need to PROTECT. To pass rchk, #2865
286+
SEXP idcols; // convenience pointers into RCHK[0][0], RCHK[0][1] and RCHK[1] respectively
287+
SEXP variable_table; // NULL or data for variable column(s).
288+
SEXP valuecols; // list with one element per output/value column, each element is an integer vector.
289+
SEXP not_NA_indices;
290+
int *isfactor;
291+
int *leach; // length of each element of the valuecols(measure.vars) list.
292+
int *isidentical; // are all inputs for this value column the same type?
293+
int lids; // number of id columns.
294+
int lvars; // number of variable columns.
295+
int lvalues; // number of value columns.
296+
int lmax; // max length of valuecols elements / number of times to repeat ids.
297+
int totlen; // of output/long DT result of melt operation.
298+
int nrow; // of input/wide DT to be melted.
299299
SEXPTYPE *maxtype;
300-
bool measure_is_list,
301-
narm; // remove missing values?
300+
bool measure_is_list;
301+
bool narm; // remove missing values?
302302
};
303303

304304
static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valnames, Rboolean narm, Rboolean verbose, struct processData *data) {

src/freadR.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,15 +265,13 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
265265
colNamesSxp = R_NilValue;
266266
SET_VECTOR_ELT(RCHK, 1, colNamesSxp = allocVector(STRSXP, ncol));
267267
for (int i = 0; i < ncol; i++) {
268-
SEXP elem;
269268
if (colNames == NULL || colNames[i].len <= 0) {
270269
char buff[12];
271270
snprintf(buff, sizeof(buff), "V%d", i + 1); // # notranslate
272-
elem = mkChar(buff); // no PROTECT as passed immediately to SET_STRING_ELT
271+
SET_STRING_ELT(colNamesSxp, i, mkChar(buff)); // no PROTECT as passed immediately to SET_STRING_ELT
273272
} else {
274-
elem = mkCharLenCE(anchor + colNames[i].off, colNames[i].len, ienc); // no PROTECT as passed immediately to SET_STRING_ELT
273+
SET_STRING_ELT(colNamesSxp, i, mkCharLenCE(anchor + colNames[i].off, colNames[i].len, ienc)); // no PROTECT as passed immediately to SET_STRING_ELT
275274
}
276-
SET_STRING_ELT(colNamesSxp, i, elem);
277275
}
278276
// "use either select= or drop= but not both" was checked earlier in freadR
279277
applyDrop(dropSxp, type, ncol, /*dropSource=*/-1);

src/fwrite.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ static int scipen;
4242
static bool squashDateTime=false; // 0=ISO(yyyy-mm-dd) 1=squash(yyyymmdd)
4343
static bool verbose=false;
4444
static int gzip_level;
45+
static bool forceDecimal=false; // force writing decimal points for numeric columns
4546

4647
extern const char *getString(const void *, int64_t);
4748
extern int getStringLen(const void *, int64_t);
@@ -198,6 +199,9 @@ void writeFloat64(const void *col, int64_t row, char **pch)
198199
}
199200
} else if (x == 0.0) {
200201
*ch++ = '0'; // and we're done. so much easier rather than passing back special cases
202+
if (forceDecimal) {
203+
*ch++ = dec;
204+
}
201205
} else {
202206
if (x < 0.0) { *ch++ = '-'; x = -x; } // and we're done on sign, already written. no need to pass back sign
203207
union { double d; uint64_t l; } u;
@@ -237,8 +241,13 @@ void writeFloat64(const void *col, int64_t row, char **pch)
237241
if (l % 10 >= 5) l += 10; // use the last digit to round
238242
l /= 10;
239243
if (l == 0) {
244+
// # nocov start. Very likely not needed as such numbers (e.g. 2^-1075) likely not representable in R.
240245
if (*(ch - 1) == '-') ch--;
241246
*ch++ = '0';
247+
if (forceDecimal) {
248+
*ch++ = dec;
249+
}
250+
// # nocov end
242251
} else {
243252
// Count trailing zeros and therefore s.f. present in l
244253
int trailZero = 0;
@@ -257,6 +266,10 @@ void writeFloat64(const void *col, int64_t row, char **pch)
257266
if (sf > dr) width = sf + 1; // 1.234 and 123.4
258267
else { dl0 = 1; width = dr + 1 + dl0; } // 0.1234, 0.0001234
259268
}
269+
const bool isWholeNumber = (dr == 0 && dl0 >= 0);
270+
if (forceDecimal && isWholeNumber) {
271+
width += 1;
272+
}
260273
// So: 3.1416 => l=31416, sf=5, exp=0 dr=4; dl0=0; width=6
261274
// 30460 => l=3046, sf=4, exp=4 dr=0; dl0=1; width=5
262275
// 0.0072 => l=72, sf=2, exp=-3 dr=4; dl0=1; width=6
@@ -269,6 +282,8 @@ void writeFloat64(const void *col, int64_t row, char **pch)
269282
while (dr && sf) { *ch-- = '0' + l % 10; l /= 10; dr--; sf--; }
270283
while (dr) { *ch-- = '0'; dr--; }
271284
*ch-- = dec;
285+
} else if (forceDecimal && isWholeNumber) {
286+
*ch-- = dec;
272287
}
273288
while (dl0) { *ch-- = '0'; dl0--; }
274289
while (sf) { *ch-- = '0' + l % 10; l /= 10; sf--; }
@@ -615,6 +630,7 @@ void fwriteMain(fwriteMainArgs args)
615630
int8_t quoteHeaders = args.doQuote;
616631
verbose = args.verbose;
617632
gzip_level = args.gzip_level;
633+
forceDecimal = args.forceDecimal;
618634

619635
size_t len;
620636
unsigned int crc;

src/fwrite.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ typedef struct fwriteMainArgs
117117
bool bom;
118118
const char *yaml;
119119
bool verbose;
120+
bool forceDecimal; // force writing decimal points for numeric columns
120121
} fwriteMainArgs;
121122

122123
void fwriteMain(fwriteMainArgs args);

0 commit comments

Comments
 (0)