You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
if ((*ch==' '&&stripWhite) || (*ch=='\0'&&ch<eof))
505
515
while(*++ch==' '|| (*ch=='\0'&&ch<eof)); // if sep==' ' the space would have been skipped already and we wouldn't be on space now.
506
516
constchar*fieldStart=ch;
507
-
if (*ch!=quote||quoteRule==3||quote=='\0') {
517
+
if (*ch!=quote||quoteRule==QUOTE_RULE_IGNORE_QUOTES||quote=='\0') {
508
518
// Most common case. Unambiguously not quoted. Simply search for sep|eol. If field contains sep|eol then it should have been quoted and we do not try to heal that.
509
519
while(!end_of_field(ch)) ch++; // sep, \r, \n or eof will end
if (ch==eof&"eRule!=2) { target->off--; target->len++; } // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2
592
+
if (ch==eof&"eRule!=QUOTE_RULE_HYBRID) { target->off--; target->len++; } // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2
583
593
while(target->len>0&& ((ch[-1] ==' '&&stripWhite) ||ch[-1] =='\0')) { target->len--; ch--; } // test 1551.6; trailing whitespace in field [67,V37] == "\"\"A\"\" ST "
584
594
}
585
595
// Does end-of-field correspond to end-of-possible-NA?
// when top is tied the first wins, so do all seps for the first quoteRule, then all seps for the second quoteRule, etc
1742
1754
for (ints=0; s<nseps; s++) {
@@ -1746,7 +1758,7 @@ int freadMain(freadMainArgs _args)
1746
1758
// if (verbose) DTPRINT(_(" Trying sep='%c' with quoteRule %d ...\n"), sep, quoteRule);
1747
1759
1748
1760
if (fill) {
1749
-
if (quoteRule>1&"e) continue; // turn off self-healing quote rule when filling
1761
+
if (quoteRule>QUOTE_RULE_EMBEDDED_QUOTES_ESCAPED&"e) continue; // turn off self-healing quote rule when filling
1750
1762
intfirstRowNcol=countfields(&ch);
1751
1763
intthisncol=0, maxncol=firstRowNcol, thisRow=0;
1752
1764
while (ch<eof&&++thisRow<jumpLines) { // TODO: rename 'jumpLines' to 'jumpRows'
@@ -1801,7 +1813,7 @@ int freadMain(freadMainArgs _args)
1801
1813
if ((thisBlockLines>topNumLines&&lastncol>1) ||// more lines wins even with fewer fields, so long as number of fields >= 2
1802
1814
(thisBlockLines==topNumLines&&
1803
1815
lastncol>topNumFields&&// when number of lines is tied, choose the sep which separates it into more columns
1804
-
(quoteRule<2||quoteRule <= topQuoteRule) &&// for test 1834 where every line contains a correctly quoted field contain sep
1816
+
(quoteRule<QUOTE_RULE_HYBRID||quoteRule <= topQuoteRule) &&// for test 1834 where every line contains a correctly quoted field contain sep
1805
1817
(topNumFields <= 1||sep!=' '))) {
1806
1818
topNumLines=thisBlockLines;
1807
1819
topNumFields=lastncol;
@@ -1829,10 +1841,10 @@ int freadMain(freadMainArgs _args)
1829
1841
sep=topSep;
1830
1842
// no self healing quote rules, as we don't have >1 field to disambiguate
1831
1843
// choose quote rule 0 or 1 based on for which 100 rows gets furthest into file
1832
-
for (quoteRule=0; quoteRule <= 1; quoteRule++) { // #loop_counter_not_local_scope_ok
1844
+
for (quoteRule=QUOTE_RULE_EMBEDDED_QUOTES_DOUBLED; quoteRule <= QUOTE_RULE_EMBEDDED_QUOTES_ESCAPED; quoteRule++) { // #loop_counter_not_local_scope_ok
1833
1845
intthisRow=0, thisncol=0;
1834
1846
ch=pos;
1835
-
while (ch<eof&&++thisRow<jumpLines&& (thisncol=countfields(&ch)) >= 0) {};
1847
+
while (ch<eof&&++thisRow<jumpLines&& (thisncol=countfields(&ch)) >= 0);
1836
1848
if (thisncol<0) continue; // invalid file; e.g. unescaped quote inside quoted field
1837
1849
if (!firstJumpEnd||ch>firstJumpEnd) {
1838
1850
firstJumpEnd=ch;
@@ -1843,7 +1855,7 @@ int freadMain(freadMainArgs _args)
1843
1855
}
1844
1856
1845
1857
quoteRule=topQuoteRule;
1846
-
if (quoteRule>1&"e) {
1858
+
if (quoteRule>QUOTE_RULE_EMBEDDED_QUOTES_ESCAPED&"e) {
1847
1859
DTWARN(_("Found and resolved improper quoting in first %d rows. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), jumpLines);
1848
1860
// TODO: include line number and text in warning. Could loop again with the standard quote rule to find the line that fails.
1849
1861
}
@@ -1890,8 +1902,8 @@ int freadMain(freadMainArgs _args)
1890
1902
DTPRINT(_(" File copy in RAM took %.3f seconds.\n"), time_taken);
1891
1903
elseif (tt>0.5) // # nocov
1892
1904
DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg); // # nocov. not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded
if (sampleLines>allocnrow) INTERNAL_STOP("sampleLines(%"PRId64") > allocnrow(%"PRId64")", sampleLines, allocnrow); // # nocov
2156
2170
}
2157
-
if (sampleLines>allocnrow) INTERNAL_STOP("sampleLines(%"PRId64") > allocnrow(%"PRId64")", sampleLines, allocnrow); // # nocov
2158
2171
}
2159
2172
if (nrowLimit<allocnrow) {
2160
2173
if (verbose) DTPRINT(_(" Alloc limited to lower nrows=%"PRId64" passed in.\n"), nrowLimit);
@@ -2241,7 +2254,7 @@ int freadMain(freadMainArgs _args)
2241
2254
if (type[j] <tmpType[j]) {
2242
2255
if (strcmp(typeName[tmpType[j]], typeName[type[j]]) !=0) {
2243
2256
DTWARN(_("Attempt to override column %d%s%.*s%s of inherent type '%s' down to '%s' ignored. Only overrides to a higher type are currently supported. If this was intended, please coerce to the lower type afterwards."),
0 commit comments