You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
if ((*ch==' '&&stripWhite) || (*ch=='\0'&&ch<eof))
505
516
while(*++ch==' '|| (*ch=='\0'&&ch<eof)); // if sep==' ' the space would have been skipped already and we wouldn't be on space now.
506
517
constchar*fieldStart=ch;
507
-
if (*ch!=quote||quoteRule==3||quote=='\0') {
518
+
if (*ch!=quote||quoteRule==QUOTE_RULE_IGNORE_QUOTES||quote=='\0') {
508
519
// Most common case. Unambiguously not quoted. Simply search for sep|eol. If field contains sep|eol then it should have been quoted and we do not try to heal that.
509
520
while(!end_of_field(ch)) ch++; // sep, \r, \n or eof will end
if (ch==eof&"eRule!=2) { target->off--; target->len++; } // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2
593
+
if (ch==eof&"eRule!=QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED) { target->off--; target->len++; } // test 1324 where final field has open quote but not ending quote; include the open quote like QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED
583
594
while(target->len>0&& ((ch[-1] ==' '&&stripWhite) ||ch[-1] =='\0')) { target->len--; ch--; } // test 1551.6; trailing whitespace in field [67,V37] == "\"\"A\"\" ST "
584
595
}
585
596
// Does end-of-field correspond to end-of-possible-NA?
@@ -1341,7 +1352,7 @@ int freadMain(freadMainArgs _args)
1341
1352
if (verbose) DTPRINT(_(" Using %d threads (omp_get_max_threads()=%d, nth=%d)\n"), nth, maxth, args.nth);
1342
1353
}
1343
1354
1344
-
constuint64_tui64=NA_FLOAT64_I64;
1355
+
staticconstuint64_tui64=NA_FLOAT64_I64;
1345
1356
memcpy(&NA_FLOAT64, &ui64, 8);
1346
1357
1347
1358
constint64_tnrowLimit=args.nrowLimit;
@@ -1378,7 +1389,9 @@ int freadMain(freadMainArgs _args)
1378
1389
}
1379
1390
disabled_parsers[CT_BOOL8_N] = !args.logical01;
1380
1391
disabled_parsers[CT_BOOL8_Y] = !args.logicalYN;
1381
-
disabled_parsers[CT_ISO8601_DATE] =disabled_parsers[CT_ISO8601_TIME] =args.oldNoDateTime; // temporary new option in v1.13.0; see NEWS
1392
+
disabled_parsers[CT_ISO8601_DATE] =args.oldNoDateTime; // temporary new option in v1.13.0; see NEWS
// when top is tied the first wins, so do all seps for the first quoteRule, then all seps for the second quoteRule, etc
1742
1755
for (ints=0; s<nseps; s++) {
@@ -1746,7 +1759,7 @@ int freadMain(freadMainArgs _args)
1746
1759
// if (verbose) DTPRINT(_(" Trying sep='%c' with quoteRule %d ...\n"), sep, quoteRule);
1747
1760
1748
1761
if (fill) {
1749
-
if (quoteRule>1&"e) continue; // turn off self-healing quote rule when filling
1762
+
if (quoteRule>QUOTE_RULE_EMBEDDED_QUOTES_ESCAPED&"e) continue; // turn off self-healing quote rule when filling
1750
1763
intfirstRowNcol=countfields(&ch);
1751
1764
intthisncol=0, maxncol=firstRowNcol, thisRow=0;
1752
1765
while (ch<eof&&++thisRow<jumpLines) { // TODO: rename 'jumpLines' to 'jumpRows'
@@ -1801,7 +1814,7 @@ int freadMain(freadMainArgs _args)
1801
1814
if ((thisBlockLines>topNumLines&&lastncol>1) ||// more lines wins even with fewer fields, so long as number of fields >= 2
1802
1815
(thisBlockLines==topNumLines&&
1803
1816
lastncol>topNumFields&&// when number of lines is tied, choose the sep which separates it into more columns
1804
-
(quoteRule<2||quoteRule <= topQuoteRule) &&// for test 1834 where every line contains a correctly quoted field contain sep
1817
+
(quoteRule<QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED||quoteRule <= topQuoteRule) &&// for test 1834 where every line contains a correctly quoted field contain sep
1805
1818
(topNumFields <= 1||sep!=' '))) {
1806
1819
topNumLines=thisBlockLines;
1807
1820
topNumFields=lastncol;
@@ -1828,8 +1841,8 @@ int freadMain(freadMainArgs _args)
1828
1841
ASSERT(topSep==127, "Single column input has topSep=%d", topSep);
1829
1842
sep=topSep;
1830
1843
// no self healing quote rules, as we don't have >1 field to disambiguate
1831
-
// choose quote rule 0 or 1 based on for which 100 rows gets furthest into file
1832
-
for (quoteRule=0; quoteRule <= 1; quoteRule++) { // #loop_counter_not_local_scope_ok
1844
+
// choose QUOTE_RULE_EMBEDDED_QUOTES_DOUBLED or QUOTE_RULE_EMBEDDED_QUOTES_ESCAPED based on for which 100 rows gets furthest into file
1845
+
for (quoteRule=QUOTE_RULE_EMBEDDED_QUOTES_DOUBLED; quoteRule <= QUOTE_RULE_EMBEDDED_QUOTES_ESCAPED; quoteRule++) { // #loop_counter_not_local_scope_ok
1833
1846
intthisRow=0, thisncol=0;
1834
1847
ch=pos;
1835
1848
while (ch<eof&&++thisRow<jumpLines&& (thisncol=countfields(&ch)) >= 0) {};
@@ -1843,7 +1856,7 @@ int freadMain(freadMainArgs _args)
1843
1856
}
1844
1857
1845
1858
quoteRule=topQuoteRule;
1846
-
if (quoteRule>1&"e) {
1859
+
if (quoteRule>QUOTE_RULE_EMBEDDED_QUOTES_ESCAPED&"e) {
1847
1860
DTWARN(_("Found and resolved improper quoting in first %d rows. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), jumpLines);
1848
1861
// TODO: include line number and text in warning. Could loop again with the standard quote rule to find the line that fails.
1849
1862
}
@@ -1890,8 +1903,8 @@ int freadMain(freadMainArgs _args)
1890
1903
DTPRINT(_(" File copy in RAM took %.3f seconds.\n"), time_taken);
1891
1904
elseif (tt>0.5) // # nocov
1892
1905
DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg); // # nocov. not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded
if (verbose) DTPRINT(_(" 1-column file ends with 2 or more end-of-line. Restoring last eol using extra byte in cow page.\n"));
1897
1910
eof++;
@@ -2241,7 +2254,7 @@ int freadMain(freadMainArgs _args)
2241
2254
if (type[j] <tmpType[j]) {
2242
2255
if (strcmp(typeName[tmpType[j]], typeName[type[j]]) !=0) {
2243
2256
DTWARN(_("Attempt to override column %d%s%.*s%s of inherent type '%s' down to '%s' ignored. Only overrides to a higher type are currently supported. If this was intended, please coerce to the lower type afterwards."),
0 commit comments