Skip to content

Commit 0090166

Browse files
authored
Merge branch 'master' into pythonRemoval
2 parents 72149e4 + c27ec26 commit 0090166

File tree

18 files changed

+426
-73
lines changed

18 files changed

+426
-73
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
^\.github$
2020
^\.vscode$
2121
^\.zed$
22+
^\.lintr$
2223

2324
^\.gitlab-ci\.yml$
2425

GOVERNANCE.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Governance for the R data.table project
1+
# Governance for the R data.table project
22

33
# Purpose and scope
44

@@ -121,7 +121,7 @@ Funds acquired by the data.table project will be disbursed at the discretion of
121121

122122
# Code of conduct
123123

124-
The full Code of Conduct can be found [here](CODE_OF_CONDUCT.md), including details for reporting violations.
124+
The full Code of Conduct can be found [here](.github/CODE_OF_CONDUCT.md), including details for reporting violations.
125125

126126
## Reporting Responsibility
127127

R/data.table.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ replace_dot_alias = function(e) {
312312
root = root_name(jsub)
313313
} else if (length(jsub) > 2L && jsub[[2L]] %iscall% ":=") {
314314
#2142 -- j can be {} and have length 1
315-
stopf("You have wrapped := with {} which is ok but then := must be the only thing inside {}. You have something else inside {} as well. Consider placing the {} on the RHS of := instead; e.g. DT[,someCol:={tmpVar1<-...;tmpVar2<-...;tmpVar1*tmpVar2}]")
315+
stopf("Invalid use of `:=` inside `{}`. `:=` must be the only expression inside `{}` when used in `j`. Instead of: DT[{tmp1 <- ...; tmp2 <- ...; someCol := tmp1 * tmp2}], Use: DT[, someCol := {tmp1 <- ...; tmp2 <- ...; tmp1 * tmp2}]")
316316
}
317317
}
318318
if (root=="eval" && !any(all.vars(jsub[[2L]]) %chin% names_x)) {
@@ -2884,7 +2884,7 @@ address = function(x) .Call(Caddress, eval(substitute(x), parent.frame()))
28842884

28852885
":=" = function(...) {
28862886
# this error is detected when eval'ing isub and replaced with a more helpful one when using := in i due to forgetting a comma, #4227
2887-
stopf('Check that is.data.table(DT) == TRUE. Otherwise, :=, `:=`(...) and let(...) are defined for use in j, once only and in particular ways. Note that namespace-qualification like data.table::`:=`(...) is not supported. See help(":=").', class="dt_invalid_let_error")
2887+
stopf('Check that is.data.table(DT) == TRUE. Otherwise, `:=` is defined for use in j, once only and in particular ways. See help(":=", "data.table"). A common reason for this error is allocating a new column in `j` and using `<-` instead of `:=`; e.g., `DT[, new_col <- 1]` should be `DT[, new_col := 1]`. Another is using `:=` in a multi-statement `{...}` block; please use `:=` as the only statement in `j`.', class="dt_invalid_let_error")
28882888
}
28892889

28902890
# TODO(#6197): Export these.

R/fwrite.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
1313
yaml = FALSE,
1414
bom = FALSE,
1515
verbose=getOption("datatable.verbose", FALSE),
16-
encoding = "") {
16+
encoding = "",
17+
forceDecimal = FALSE) {
1718
na = as.character(na[1L]) # fix for #1725
1819
if (length(encoding) != 1L || !encoding %chin% c("", "UTF-8", "native")) {
1920
stopf("Argument 'encoding' must be '', 'UTF-8' or 'native'.")
@@ -51,7 +52,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
5152
length(compressLevel) == 1L && 0L <= compressLevel && compressLevel <= 9L,
5253
isTRUEorFALSE(col.names), isTRUEorFALSE(append), isTRUEorFALSE(row.names),
5354
isTRUEorFALSE(verbose), isTRUEorFALSE(showProgress), isTRUEorFALSE(logical01),
54-
isTRUEorFALSE(bom),
55+
isTRUEorFALSE(bom), isTRUEorFALSE(forceDecimal),
5556
length(na) == 1L, #1725, handles NULL or character(0) input
5657
is.character(file) && length(file)==1L && !is.na(file),
5758
length(buffMB)==1L && !is.na(buffMB) && 1L<=buffMB && buffMB<=1024L,
@@ -122,7 +123,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
122123
}
123124
.Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append,
124125
row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread,
125-
showProgress, is_gzip, compressLevel, bom, yaml, verbose, encoding)
126+
showProgress, is_gzip, compressLevel, bom, yaml, verbose, encoding, forceDecimal)
126127
invisible()
127128
}
128129

inst/tests/tests.Rraw

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3997,9 +3997,9 @@ test(1133.8, DT[, new := if (.GRP==1L) 7L else 3.4, by=x], data.table(x=INT(1,1,
39973997
DT <- data.table(x=c("A", "A", "B", "B"), val =1:4)
39983998
DT2 <- copy(DT)[, a := 1L]
39993999
test(1134.1, DT[, {a := 1L}], DT2)
4000-
test(1134.2, DT[, {a := 1L; NULL}], error="You have wrapped.*which is ok.*Consider")
4000+
test(1134.2, DT[, {a := 1L; NULL}], error="Invalid use of `:=` inside `{}`")
40014001
test(1134.3, DT[, {b := 2L}, by=x], DT2[, b:=2L, by=x])
4002-
test(1134.4, DT[, {b := 2L; sum(val)}, by=x], error="You have wrapped.*which is ok.*Consider")
4002+
test(1134.4, DT[, {b := 2L; sum(val)}, by=x], error="Invalid use of `:=` inside `{}`")
40034003

40044004
# FR #2693 and Gabor's suggestions on datatable-help "Problem with FAQ 2.8"
40054005
d1 <- data.table(id1 = c(1L, 2L, 2L, 3L), val = 1:4, key = "id1")
@@ -21602,3 +21602,21 @@ if (getRversion() >= "4.0.0") { # rely on stopifnot(named = ...) for correct mes
2160221602
test(2337.3, is.null(fwrite(data.table(c(0.1, 0.2)), dec=",", sep="\t")))
2160321603
test(2337.4, is.null(fwrite(data.table(a=numeric(), b=numeric()), dec=",", sep=",")))
2160421604
test(2337.5, is.null(fwrite(data.table(a=numeric()), dec=",", sep=",")))
21605+
21606+
# 2864 force decimal points for whole numbers in numeric columns
21607+
dd = data.table(x=c(1, 2, 3))
21608+
di = data.table(x=c(1L, 2L, 3L))
21609+
test(2338.1, capture.output(fwrite(dd, forceDecimal=TRUE)), c("x", "1.", "2.", "3."))
21610+
test(2338.2, capture.output(fwrite(dd, forceDecimal=TRUE, dec=",", sep="\t")), c("x", "1,", "2,", "3,"))
21611+
test(2338.3, capture.output(fwrite(dd, forceDecimal=FALSE)), c("x", "1", "2", "3"))
21612+
test(2338.4, capture.output(fwrite(di, forceDecimal=TRUE)), c("x", "1", "2", "3"))
21613+
test(2338.5, capture.output(fwrite(data.table(x=c(0.)), forceDecimal=TRUE)), c("x", "0."))
21614+
test(2338.6, capture.output(fwrite(data.table(x=c(-0.)), forceDecimal=TRUE)), c("x", "0."))
21615+
test(2338.7, capture.output(fwrite(data.table(x=c(0.00)), forceDecimal=TRUE)), c("x", "0."))
21616+
# round trip
21617+
local({
21618+
f <- tempfile(); on.exit(unlink(f))
21619+
test(2338.8, {fwrite(dd, f, forceDecimal=TRUE); fread(f)}, dd)
21620+
test(2338.9, {fwrite(dd, f, forceDecimal=FALSE); fread(f)}, di)
21621+
})
21622+

man/assign.Rd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
\description{
77
Fast add, remove and update subsets of columns, by reference. \code{:=} operator can be used in two ways: \code{LHS := RHS} form, and \code{Functional form}. See \code{Usage}.
88

9+
Note that when using \samp{:=} inside a \code{{...}} block in \code{j}, the \samp{:=} call must be the only statement. For assigning to multiple columns, use the functional form: \code{DT[, `:=`(col1=val1, col2=val2)]}.
10+
911
\code{set} is a low-overhead loop-able version of \code{:=}. It is particularly useful for repetitively updating rows of certain columns by reference (using a for-loop). See \code{Examples}. It can not perform grouping operations.
1012

1113
\code{let} is an alias for the functional form and behaves exactly like \code{`:=`}.
@@ -66,7 +68,8 @@ All of the following result in a friendly error (by design) :
6668
x := 1L
6769
DT[i, col] := val
6870
DT[i]$col := val
69-
DT[, {col1 := 1L; col2 := 2L}] # Use the functional form, `:=`(), instead (see above).
71+
DT[, {col1 := 1L; col2 := 2L}] # Using `{}` in `j` is reserved for single `:=` expressions.
72+
# For multiple updates, use the functional form `:=`() instead.
7073
}
7174

7275
For additional resources, please read \href{../doc/datatable-faq.html}{\code{vignette("datatable-faq")}}. Also have a look at StackOverflow's \href{https://stackoverflow.com/questions/tagged/data.table/}{data.table tag}.

man/fread.Rd

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,15 @@ Currently, the \code{yaml} setting is somewhat inflexible with respect to incorp
118118
119119
When \code{input} begins with http://, https://, ftp://, ftps://, or file://, \code{fread} detects this and \emph{downloads} the target to a temporary file (at \code{tempfile()}) before proceeding to read the file as usual. URLS (ftps:// and https:// as well as ftp:// and http://) paths are downloaded with \code{download.file} and \code{method} set to \code{getOption("download.file.method")}, defaulting to \code{"auto"}; and file:// is downloaded with \code{download.file} with \code{method="internal"}. NB: this implies that for file://, even files found on the current machine will be "downloaded" (i.e., hard-copied) to a temporary file. See \code{\link{download.file}} for more details.
120120
121+
\bold{Automatic Decompression:}
122+
123+
In many cases, \code{fread} can automatically detect and decompress files with common compression extensions directly, without needing an explicit connection object or shell commands. This works by checking the file extension.
124+
125+
\itemize{
126+
\item \code{.gz} and \code{.bz2} are supported out of the box.
127+
\item \code{.zip} is also supported. If the archive contains a single data file, \code{fread} will read it. If the archive contains multiple files, \code{fread} will produce an error.
128+
}
129+
121130
\bold{Shell commands:}
122131
123132
\code{fread} accepts shell commands for convenience. The input command is run and its output written to a file in \code{tmpdir} (\code{\link{tempdir}()} by default) to which \code{fread} is applied "as normal". The details are platform dependent -- \code{system} is used on UNIX environments, \code{shell} otherwise; see \code{\link[base]{system}}.

man/fwrite.Rd

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
2121
yaml = FALSE,
2222
bom = FALSE,
2323
verbose = getOption("datatable.verbose", FALSE),
24-
encoding = "")
24+
encoding = "",
25+
forceDecimal = FALSE)
2526
}
2627
\arguments{
2728
\item{x}{Any \code{list} of same length vectors; e.g. \code{data.frame} and \code{data.table}. If \code{matrix}, it gets internally coerced to \code{data.table} preserving col names but not row names}
@@ -62,6 +63,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
6263
\item{bom}{If \code{TRUE} a BOM (Byte Order Mark) sequence (EF BB BF) is added at the beginning of the file; format 'UTF-8 with BOM'.}
6364
\item{verbose}{Be chatty and report timings?}
6465
\item{encoding}{ The encoding of the strings written to the CSV file. Default is \code{""}, which means writing raw bytes without considering the encoding. Other possible options are \code{"UTF-8"} and \code{"native"}. }
66+
\item{forceDecimal}{ Should decimal points be forced for whole numbers in numeric columns? When \code{FALSE}, the default, whole numbers like \code{c(1.0, 2.0, 3.0)} will be written as \samp{1, 2, 3} i.e., dropping \code{dec}. }
6567
}
6668
\details{
6769
\code{fwrite} began as a community contribution with \href{https://github.com/Rdatatable/data.table/pull/1613}{pull request #1613} by Otto Seiskari. This gave Matt Dowle the impetus to specialize the numeric formatting and to parallelize: \url{https://h2o.ai/blog/2016/fast-csv-writing-for-r/}. Final items were tracked in \href{https://github.com/Rdatatable/data.table/issues/1664}{issue #1664} such as automatic quoting, \code{bit64::integer64} support, decimal/scientific formatting exactly matching \code{write.csv} between 2.225074e-308 and 1.797693e+308 to 15 significant figures, \code{row.names}, dates (between 0000-03-01 and 9999-12-31), times and \code{sep2} for \code{list} columns where each cell can itself be a vector.

src/data.table.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ SEXP chmatch_R(SEXP, SEXP, SEXP);
319319
SEXP chmatchdup_R(SEXP, SEXP, SEXP);
320320
SEXP chin_R(SEXP, SEXP);
321321
SEXP freadR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
322-
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
322+
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
323323
SEXP rbindlist(SEXP, SEXP, SEXP, SEXP, SEXP);
324324
SEXP setlistelt(SEXP, SEXP, SEXP);
325325
SEXP setS4elt(SEXP, SEXP, SEXP);

src/fmelt.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -282,23 +282,23 @@ SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) {
282282
}
283283

284284
struct processData {
285-
SEXP RCHK; // a 2 item list holding vars (result of checkVars) and not_NA_indices. PROTECTed up in fmelt so that preprocess() doesn't need to PROTECT. To pass rchk, #2865
286-
SEXP idcols, // convenience pointers into RCHK[0][0], RCHK[0][1] and RCHK[1] respectively
287-
variable_table, // NULL or data for variable column(s).
288-
valuecols, // list with one element per output/value column, each element is an integer vector.
289-
not_NA_indices;
290-
int *isfactor,
291-
*leach, // length of each element of the valuecols(measure.vars) list.
292-
*isidentical; // are all inputs for this value column the same type?
293-
int lids, // number of id columns.
294-
lvars, // number of variable columns.
295-
lvalues, // number of value columns.
296-
lmax, // max length of valuecols elements / number of times to repeat ids.
297-
totlen, // of output/long DT result of melt operation.
298-
nrow; // of input/wide DT to be melted.
285+
SEXP RCHK; // a 2 item list holding vars (result of checkVars) and not_NA_indices. PROTECTed up in fmelt so that preprocess() doesn't need to PROTECT. To pass rchk, #2865
286+
SEXP idcols; // convenience pointers into RCHK[0][0], RCHK[0][1] and RCHK[1] respectively
287+
SEXP variable_table; // NULL or data for variable column(s).
288+
SEXP valuecols; // list with one element per output/value column, each element is an integer vector.
289+
SEXP not_NA_indices;
290+
int *isfactor;
291+
int *leach; // length of each element of the valuecols(measure.vars) list.
292+
int *isidentical; // are all inputs for this value column the same type?
293+
int lids; // number of id columns.
294+
int lvars; // number of variable columns.
295+
int lvalues; // number of value columns.
296+
int lmax; // max length of valuecols elements / number of times to repeat ids.
297+
int totlen; // of output/long DT result of melt operation.
298+
int nrow; // of input/wide DT to be melted.
299299
SEXPTYPE *maxtype;
300-
bool measure_is_list,
301-
narm; // remove missing values?
300+
bool measure_is_list;
301+
bool narm; // remove missing values?
302302
};
303303

304304
static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valnames, Rboolean narm, Rboolean verbose, struct processData *data) {

0 commit comments

Comments
 (0)