Skip to content

Commit e2e0173

Browse files
Add forceDecimal parameter to fwrite (#7212)
* add new feature * add more tests * add const qualifier * Use local for clearer test * dec= test * Update man/fwrite.Rd Co-authored-by: Michael Chirico <[email protected]> * nocov * oops * test #ing --------- Co-authored-by: Michael Chirico <[email protected]>
1 parent 7f77c06 commit e2e0173

File tree

7 files changed

+46
-6
lines changed

7 files changed

+46
-6
lines changed

R/fwrite.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
1313
yaml = FALSE,
1414
bom = FALSE,
1515
verbose=getOption("datatable.verbose", FALSE),
16-
encoding = "") {
16+
encoding = "",
17+
forceDecimal = FALSE) {
1718
na = as.character(na[1L]) # fix for #1725
1819
if (length(encoding) != 1L || !encoding %chin% c("", "UTF-8", "native")) {
1920
stopf("Argument 'encoding' must be '', 'UTF-8' or 'native'.")
@@ -51,7 +52,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
5152
length(compressLevel) == 1L && 0L <= compressLevel && compressLevel <= 9L,
5253
isTRUEorFALSE(col.names), isTRUEorFALSE(append), isTRUEorFALSE(row.names),
5354
isTRUEorFALSE(verbose), isTRUEorFALSE(showProgress), isTRUEorFALSE(logical01),
54-
isTRUEorFALSE(bom),
55+
isTRUEorFALSE(bom), isTRUEorFALSE(forceDecimal),
5556
length(na) == 1L, #1725, handles NULL or character(0) input
5657
is.character(file) && length(file)==1L && !is.na(file),
5758
length(buffMB)==1L && !is.na(buffMB) && 1L<=buffMB && buffMB<=1024L,
@@ -122,7 +123,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
122123
}
123124
.Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append,
124125
row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread,
125-
showProgress, is_gzip, compressLevel, bom, yaml, verbose, encoding)
126+
showProgress, is_gzip, compressLevel, bom, yaml, verbose, encoding, forceDecimal)
126127
invisible()
127128
}
128129

inst/tests/tests.Rraw

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21602,3 +21602,21 @@ if (getRversion() >= "4.0.0") { # rely on stopifnot(named = ...) for correct mes
2160221602
test(2337.3, is.null(fwrite(data.table(c(0.1, 0.2)), dec=",", sep="\t")))
2160321603
test(2337.4, is.null(fwrite(data.table(a=numeric(), b=numeric()), dec=",", sep=",")))
2160421604
test(2337.5, is.null(fwrite(data.table(a=numeric()), dec=",", sep=",")))
21605+
21606+
# 2864 force decimal points for whole numbers in numeric columns
21607+
dd = data.table(x=c(1, 2, 3))
21608+
di = data.table(x=c(1L, 2L, 3L))
21609+
test(2338.1, capture.output(fwrite(dd, forceDecimal=TRUE)), c("x", "1.", "2.", "3."))
21610+
test(2338.2, capture.output(fwrite(dd, forceDecimal=TRUE, dec=",", sep="\t")), c("x", "1,", "2,", "3,"))
21611+
test(2338.3, capture.output(fwrite(dd, forceDecimal=FALSE)), c("x", "1", "2", "3"))
21612+
test(2338.4, capture.output(fwrite(di, forceDecimal=TRUE)), c("x", "1", "2", "3"))
21613+
test(2338.5, capture.output(fwrite(data.table(x=c(0.)), forceDecimal=TRUE)), c("x", "0."))
21614+
test(2338.6, capture.output(fwrite(data.table(x=c(-0.)), forceDecimal=TRUE)), c("x", "0."))
21615+
test(2338.7, capture.output(fwrite(data.table(x=c(0.00)), forceDecimal=TRUE)), c("x", "0."))
21616+
# round trip
21617+
local({
21618+
f <- tempfile(); on.exit(unlink(f))
21619+
test(2338.8, {fwrite(dd, f, forceDecimal=TRUE); fread(f)}, dd)
21620+
test(2338.9, {fwrite(dd, f, forceDecimal=FALSE); fread(f)}, di)
21621+
})
21622+

man/fwrite.Rd

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
2121
yaml = FALSE,
2222
bom = FALSE,
2323
verbose = getOption("datatable.verbose", FALSE),
24-
encoding = "")
24+
encoding = "",
25+
forceDecimal = FALSE)
2526
}
2627
\arguments{
2728
\item{x}{Any \code{list} of same length vectors; e.g. \code{data.frame} and \code{data.table}. If \code{matrix}, it gets internally coerced to \code{data.table} preserving col names but not row names}
@@ -62,6 +63,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
6263
\item{bom}{If \code{TRUE} a BOM (Byte Order Mark) sequence (EF BB BF) is added at the beginning of the file; format 'UTF-8 with BOM'.}
6364
\item{verbose}{Be chatty and report timings?}
6465
\item{encoding}{ The encoding of the strings written to the CSV file. Default is \code{""}, which means writing raw bytes without considering the encoding. Other possible options are \code{"UTF-8"} and \code{"native"}. }
66+
\item{forceDecimal}{ Should decimal points be forced for whole numbers in numeric columns? When \code{FALSE}, the default, whole numbers like \code{c(1.0, 2.0, 3.0)} will be written as \samp{1, 2, 3} i.e., dropping \code{dec}. }
6567
}
6668
\details{
6769
\code{fwrite} began as a community contribution with \href{https://github.com/Rdatatable/data.table/pull/1613}{pull request #1613} by Otto Seiskari. This gave Matt Dowle the impetus to specialize the numeric formatting and to parallelize: \url{https://h2o.ai/blog/2016/fast-csv-writing-for-r/}. Final items were tracked in \href{https://github.com/Rdatatable/data.table/issues/1664}{issue #1664} such as automatic quoting, \code{bit64::integer64} support, decimal/scientific formatting exactly matching \code{write.csv} between 2.225074e-308 and 1.797693e+308 to 15 significant figures, \code{row.names}, dates (between 0000-03-01 and 9999-12-31), times and \code{sep2} for \code{list} columns where each cell can itself be a vector.

src/data.table.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ SEXP chmatch_R(SEXP, SEXP, SEXP);
319319
SEXP chmatchdup_R(SEXP, SEXP, SEXP);
320320
SEXP chin_R(SEXP, SEXP);
321321
SEXP freadR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
322-
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
322+
SEXP fwriteR(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
323323
SEXP rbindlist(SEXP, SEXP, SEXP, SEXP, SEXP);
324324
SEXP setlistelt(SEXP, SEXP, SEXP);
325325
SEXP setS4elt(SEXP, SEXP, SEXP);

src/fwrite.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ static int scipen;
4242
static bool squashDateTime=false; // 0=ISO(yyyy-mm-dd) 1=squash(yyyymmdd)
4343
static bool verbose=false;
4444
static int gzip_level;
45+
static bool forceDecimal=false; // force writing decimal points for numeric columns
4546

4647
extern const char *getString(const void *, int64_t);
4748
extern int getStringLen(const void *, int64_t);
@@ -198,6 +199,9 @@ void writeFloat64(const void *col, int64_t row, char **pch)
198199
}
199200
} else if (x == 0.0) {
200201
*ch++ = '0'; // and we're done. so much easier rather than passing back special cases
202+
if (forceDecimal) {
203+
*ch++ = dec;
204+
}
201205
} else {
202206
if (x < 0.0) { *ch++ = '-'; x = -x; } // and we're done on sign, already written. no need to pass back sign
203207
union { double d; uint64_t l; } u;
@@ -237,8 +241,13 @@ void writeFloat64(const void *col, int64_t row, char **pch)
237241
if (l % 10 >= 5) l += 10; // use the last digit to round
238242
l /= 10;
239243
if (l == 0) {
244+
// # nocov start. Very likely not needed as such numbers (e.g. 2^-1075) likely not representable in R.
240245
if (*(ch - 1) == '-') ch--;
241246
*ch++ = '0';
247+
if (forceDecimal) {
248+
*ch++ = dec;
249+
}
250+
// # nocov end
242251
} else {
243252
// Count trailing zeros and therefore s.f. present in l
244253
int trailZero = 0;
@@ -257,6 +266,10 @@ void writeFloat64(const void *col, int64_t row, char **pch)
257266
if (sf > dr) width = sf + 1; // 1.234 and 123.4
258267
else { dl0 = 1; width = dr + 1 + dl0; } // 0.1234, 0.0001234
259268
}
269+
const bool isWholeNumber = (dr == 0 && dl0 >= 0);
270+
if (forceDecimal && isWholeNumber) {
271+
width += 1;
272+
}
260273
// So: 3.1416 => l=31416, sf=5, exp=0 dr=4; dl0=0; width=6
261274
// 30460 => l=3046, sf=4, exp=4 dr=0; dl0=1; width=5
262275
// 0.0072 => l=72, sf=2, exp=-3 dr=4; dl0=1; width=6
@@ -269,6 +282,8 @@ void writeFloat64(const void *col, int64_t row, char **pch)
269282
while (dr && sf) { *ch-- = '0' + l % 10; l /= 10; dr--; sf--; }
270283
while (dr) { *ch-- = '0'; dr--; }
271284
*ch-- = dec;
285+
} else if (forceDecimal && isWholeNumber) {
286+
*ch-- = dec;
272287
}
273288
while (dl0) { *ch-- = '0'; dl0--; }
274289
while (sf) { *ch-- = '0' + l % 10; l /= 10; sf--; }
@@ -615,6 +630,7 @@ void fwriteMain(fwriteMainArgs args)
615630
int8_t quoteHeaders = args.doQuote;
616631
verbose = args.verbose;
617632
gzip_level = args.gzip_level;
633+
forceDecimal = args.forceDecimal;
618634

619635
size_t len;
620636
unsigned int crc;

src/fwrite.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ typedef struct fwriteMainArgs
117117
bool bom;
118118
const char *yaml;
119119
bool verbose;
120+
bool forceDecimal; // force writing decimal points for numeric columns
120121
} fwriteMainArgs;
121122

122123
void fwriteMain(fwriteMainArgs args);

src/fwriteR.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ SEXP fwriteR(
172172
SEXP bom_Arg,
173173
SEXP yaml_Arg,
174174
SEXP verbose_Arg,
175-
SEXP encoding_Arg
175+
SEXP encoding_Arg,
176+
SEXP forceDecimal_Arg // TRUE|FALSE
176177
)
177178
{
178179
if (!isNewList(DF)) error(_("fwrite must be passed an object of type list; e.g. data.frame, data.table"));
@@ -183,6 +184,7 @@ SEXP fwriteR(
183184
args.bom = LOGICAL(bom_Arg)[0];
184185
args.yaml = CHAR(STRING_ELT(yaml_Arg, 0));
185186
args.verbose = LOGICAL(verbose_Arg)[0];
187+
args.forceDecimal = LOGICAL(forceDecimal_Arg)[0];
186188
args.filename = CHAR(STRING_ELT(filename_Arg, 0));
187189
args.ncol = length(DF);
188190
if (args.ncol == 0) {

0 commit comments

Comments
 (0)