Skip to content

Commit 6aaea51

Browse files
committed
Merge branch 'master' into modular_gforce
2 parents 9fc4734 + df7fa80 commit 6aaea51

File tree

5 files changed

+44
-22
lines changed

5 files changed

+44
-22
lines changed

.gitlab-ci.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,13 +337,15 @@ test-win-old:
337337
## R-release on MacOS
338338
test-mac-rel:
339339
<<: *test-mac
340+
image: macos-14-xcode-15
340341
variables:
341342
R_VERSION: "$R_REL_VERSION"
342343
R_BIN: "$R_REL_MAC_BIN"
343344

344345
## R-oldrel on MacOS
345346
test-mac-old:
346347
<<: *test-mac
348+
image: macos-14-xcode-15
347349
variables:
348350
R_VERSION: "$R_OLD_VERSION"
349351
R_BIN: "$R_OLD_MAC_BIN"

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,8 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T
353353
354354
22. `setDTthreads(percent=)` and `setDTthreads(threads=)` now respect `OMP_NUM_THREADS` and `omp_get_max_threads()`, ensuring consistency with `setDTthreads()` (no arguments) when OpenMP environment variables are set, [#7165](https://github.com/Rdatatable/data.table/issues/7165). Previously, explicitly setting a thread count or percentage would ignore these OpenMP limits, potentially exceeding the user's intended thread cap. Thanks to @bastistician for the report and @ben-schwen for the fix.
355355

356+
23. `fread()` auto-detects separators for single-column files consisting solely of quoted values (e.g. `"this_that"\n"2025-01-01 00:00:01"`), [#7366](https://github.com/Rdatatable/data.table/issues/7366). Thanks @arunsrinivasan for the report and @ben-schwen for the fix.
357+
356358
### NOTES
357359

358360
1. The following in-progress deprecations have proceeded:

inst/tests/tests.Rraw

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21194,28 +21194,31 @@ test(2344.03, setkey(d1[, .(V1, label = c("one", "zero", "one"), V2)][data.table
2119421194
DT = data.table(V1 = 1:2, V2 = 3:4, V3 = 5:6, key = c("V1", "V2", "V3"))
2119521195
test(2344.04, key(DT[, .(V4 = c("b", "a"), V2, V5 = c("y", "x"), V1)]), c("V1", "V2"))
2119621196

21197+
# fread with quotes and single column #7366
21198+
test(2345, fread('"this_that"\n"2025-01-01 00:00:01"'), data.table(this_that = as.POSIXct("2025-01-01 00:00:01", tz="UTC")))
21199+
2119721200
# gforce should also work with Map in j #5336
2119821201
# conversions should not turn gforce off #2934
2119921202
# lapply gforce should also work without .SD #5032
2120021203
# support arithmetic in j with gforce #3815
2120121204
out = c("GForce FALSE", "GForce FALSE", "GForce TRUE")
2120221205
dt = data.table(a=1:4, b=1:2)
21203-
test(2345.01,optimize=0:2, dt[, max(as.character(a)), by=b, verbose=TRUE], data.table(b=1:2, V1=c("3","4")), output=out)
21204-
test(2345.02,optimize=0:2, dt[, max(as.numeric(a)), by=b, verbose=TRUE], data.table(b=1:2, V1=c(3,4)), output=out)
21206+
test(2346.01,optimize=0:2, dt[, max(as.character(a)), by=b, verbose=TRUE], data.table(b=1:2, V1=c("3","4")), output=out)
21207+
test(2346.02,optimize=0:2, dt[, max(as.numeric(a)), by=b, verbose=TRUE], data.table(b=1:2, V1=c(3,4)), output=out)
2120521208
dt = data.table(a=1:4, b=1:2)
21206-
test(2345.11,optimize=0:2, dt[, Map(sum, .SD), b, verbose=TRUE], dt[, lapply(.SD, sum), b], output=out)
21207-
test(2345.12,optimize=0:2, dt[, Map(sum, .SD, .SD), by=b, verbose=TRUE], output="GForce FALSE")
21209+
test(2346.11,optimize=0:2, dt[, Map(sum, .SD), b, verbose=TRUE], dt[, lapply(.SD, sum), b], output=out)
21210+
test(2346.12,optimize=0:2, dt[, Map(sum, .SD, .SD), by=b, verbose=TRUE], output="GForce FALSE")
2120821211
dt = data.table(a = NA_integer_, b = 1:2, c = c(TRUE, FALSE))
21209-
test(2345.13,optimize=0:2, dt[, Map(weighted.mean, .SD, na.rm=c), b, .SDcols="a", verbose=TRUE], data.table(b=1:2, a=c(NaN, NA_real_)), output="GForce FALSE")
21210-
test(2345.14,optimize=0:2, dt[,list(weighted.mean(a, na.rm=c)), b, verbose=TRUE], data.table(b=1:2, V1=c(NaN, NA_real_)), output="GForce FALSE")
21212+
test(2346.13,optimize=0:2, dt[, Map(weighted.mean, .SD, na.rm=c), b, .SDcols="a", verbose=TRUE], data.table(b=1:2, a=c(NaN, NA_real_)), output="GForce FALSE")
21213+
test(2346.14,optimize=0:2, dt[,list(weighted.mean(a, na.rm=c)), b, verbose=TRUE], data.table(b=1:2, V1=c(NaN, NA_real_)), output="GForce FALSE")
2121121214
dt = data.table(a=1:2, b=1, c=1:4)
21212-
test(2345.21,optimize=0:2, dt[, lapply(list(b, c), sum), by=a, verbose=TRUE], output=out)
21213-
test(2345.22,optimize=0:2, dt[, c(list(sum(b), sum(c))), by=a, verbose=TRUE], output=out)
21214-
test(2345.23,optimize=0:2, names(dt[, lapply(list(b, c), sum), by=a]))
21215+
test(2346.21,optimize=0:2, dt[, lapply(list(b, c), sum), by=a, verbose=TRUE], output=out)
21216+
test(2346.22,optimize=0:2, dt[, c(list(sum(b), sum(c))), by=a, verbose=TRUE], output=out)
21217+
test(2346.23,optimize=0:2, names(dt[, lapply(list(b, c), sum), by=a]))
2121521218
dt = data.table(a=1:4, b=1:2)
21216-
test(2345.31,optimize=0:2, dt[, .(max(a)-min(a)), by=b, verbose=TRUE], output=out)
21217-
test(2345.32,optimize=0:2, dt[, .((max(a) - min(a)) / (max(a) + min(a))), by=b, verbose=TRUE], data.table(b=1:2, V1=c(0.5, 1/3)), output=out)
21218-
test(2345.33,optimize=0:2, dt[, sum(a) / .N, b, verbose=TRUE], output=out)
21219-
test(2345.34,optimize=0:2, dt[, mean(a) * 2L + sum(a), b, verbose=TRUE], output=out)
21220-
test(2345.35,optimize=0:2, dt[, list(range=max(a)-min(a), avg=mean(a)), by=b, verbose=TRUE], output=out)
21221-
test(2345.36,optimize=0:2, dt[, .(max(a)-sqrt(min(a))), by=b, verbose=TRUE], output="GForce FALSE")
21219+
test(2346.31,optimize=0:2, dt[, .(max(a)-min(a)), by=b, verbose=TRUE], output=out)
21220+
test(2346.32,optimize=0:2, dt[, .((max(a) - min(a)) / (max(a) + min(a))), by=b, verbose=TRUE], data.table(b=1:2, V1=c(0.5, 1/3)), output=out)
21221+
test(2346.33,optimize=0:2, dt[, sum(a) / .N, b, verbose=TRUE], output=out)
21222+
test(2346.34,optimize=0:2, dt[, mean(a) * 2L + sum(a), b, verbose=TRUE], output=out)
21223+
test(2346.35,optimize=0:2, dt[, list(range=max(a)-min(a), avg=mean(a)), by=b, verbose=TRUE], output=out)
21224+
test(2346.36,optimize=0:2, dt[, .(max(a)-sqrt(min(a))), by=b, verbose=TRUE], output="GForce FALSE")

src/fread.c

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,14 +1899,29 @@ int freadMain(freadMainArgs _args)
18991899
thisBlockStart = lineStart;
19001900
}
19011901
}
1902-
if ((thisBlockLines > topNumLines && lastncol > 1) || // more lines wins even with fewer fields, so long as number of fields >= 2
1903-
(thisBlockLines == topNumLines &&
1904-
lastncol > topNumFields && // when number of lines is tied, choose the sep which separates it into more columns
1905-
(quoteRule < QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED || quoteRule <= topQuoteRule) && // for test 1834 where every line contains a correctly quoted field contain sep
1906-
(topNumFields <= 1 || sep != ' '))) {
1902+
bool blockHasQuote = false;
1903+
if (quote && lastncol == 1) {
1904+
for (const char *scan = thisBlockStart; scan < ch; scan++) {
1905+
if (*scan == quote) {
1906+
blockHasQuote = true;
1907+
break;
1908+
}
1909+
}
1910+
}
1911+
bool singleColumnCandidate = (lastncol == 1 && thisBlockLines >= 2 && blockHasQuote && quoteRule < QUOTE_RULE_IGNORE_QUOTES);
1912+
// more contiguous rows than the current best; only allow 1-column wins while we still have no multi-column pick
1913+
bool betterLines = thisBlockLines > topNumLines && (lastncol > 1 || (singleColumnCandidate && topNumFields <= 1));
1914+
// first multi-column candidate after only single-column options so far
1915+
bool promoteOverSingle = (topNumFields <= 1 && lastncol > topNumFields && thisBlockLines >= 2);
1916+
// more lines wins even with fewer fields, so long as number of fields >= 2
1917+
bool betterTie = (thisBlockLines == topNumLines &&
1918+
lastncol > topNumFields && // when number of lines is tied, choose the sep which separates it into more columns
1919+
(quoteRule < QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED || quoteRule <= topQuoteRule) && // for test 1834 where every line contains a correctly quoted field contain sep
1920+
(topNumFields <= 1 || sep != ' '));
1921+
if (betterLines || promoteOverSingle || betterTie) {
19071922
topNumLines = thisBlockLines;
19081923
topNumFields = lastncol;
1909-
topSep = sep;
1924+
topSep = singleColumnCandidate ? 127 : sep; // treat consistent single-column quoted blocks as single-column input (#7366)
19101925
topQuoteRule = quoteRule;
19111926
firstJumpEnd = ch;
19121927
topStart = thisBlockStart;

src/idatetime.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ SEXP convertDate(SEXP x, SEXP type)
145145
SEXP ans = PROTECT(allocVector(INTSXP, n));
146146
int *ansp = INTEGER(ans);
147147

148-
SEXP opt = GetOption(install("datatable.week"), R_NilValue);
148+
SEXP opt = GetOption1(install("datatable.week"));
149149
const char *mode = isString(opt) && length(opt) == 1 ? CHAR(STRING_ELT(opt, 0)) : "default";
150150

151151
bool use_sequential = !strcmp(mode, "sequential");

0 commit comments

Comments
 (0)