Skip to content

Commit 587a9ad

Browse files
committed
implemented suggested changes
1 parent 067bab1 commit 587a9ad

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

src/fread.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ static char quote, dec;
3434
static int linesForDecDot; // when dec='auto', track the balance of fields in favor of dec='.' vs dec=',', ties go to '.'
3535
static bool eol_one_r; // only true very rarely for \r-only files
3636

37-
enum quoteRule
37+
enum quote_rule_t
3838
{
3939
// Fields may be quoted, any quote inside the field is doubled.This is
4040
// the CSV standard. For example: <<...,"hello ""world""",...>>
@@ -50,7 +50,7 @@ enum quoteRule
5050
// to mark the end of the field iff it is followed by the field separator.
5151
// Under this rule eol characters cannot appear inside the field.
5252
// For example: <<...,"hello "world"",...>>
53-
QUOTE_RULE_HYBRID,
53+
QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED,
5454

5555
// Fields are not quoted at all.Any quote characters appearing anywhere
5656
// inside the field will be treated as any other regular characters.
@@ -60,7 +60,7 @@ enum quoteRule
6060
QUOTE_RULE_COUNT
6161
};
6262

63-
static enum quoteRule quoteRule;
63+
static enum quote_rule_t quoteRule;
6464
static const char* const* NAstrings;
6565
static bool any_number_like_NAstrings = false;
6666
static bool blank_is_a_NAstring = false;
@@ -89,7 +89,7 @@ const char typeName[NUMTYPE][10] = { "drop", "bool8", "bool8", "bool8", "bool8",
8989
int8_t typeSize[NUMTYPE] = { 0, 1, 1, 1, 1, 1, 1, 4, 8, 8, 8, 8, 4, 8, 8 };
9090

9191
// In AIX, NAN and INFINITY don't qualify as constant literals. Refer: PR #3043
92-
// So we assign them through below init function.
92+
// So we assign them through below init_const_literals function.
9393
static double NAND;
9494
static double INFD;
9595

@@ -530,9 +530,9 @@ static void Field(FieldParseContext *ctx)
530530
return;
531531
}
532532
// else *ch==quote (we don't mind that quoted fields are a little slower e.g. no desire to save switch)
533-
// the field is quoted and quotes are correctly escaped (quoteRule 0 and 1)
534-
// or the field is quoted but quotes are not escaped (quoteRule 2)
535-
// or the field is not quoted but the data contains a quote at the start (quoteRule 2 too)
533+
// the field is quoted and quotes are correctly escaped (QUOTE_RULE_EMBEDDED_QUOTES_DOUBLED and QUOTE_RULE_EMBEDDED_QUOTES_ESCAPED)
534+
// or the field is quoted but quotes are not escaped (QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED)
535+
// or the field is not quoted but the data contains a quote at the start (QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED too)
536536
// What if this string signifies an NA? Will find out after we're done parsing quotes
537537
const char *field_after_NA = end_NA_string(fieldStart);
538538
fieldStart++; // step over opening quote
@@ -551,7 +551,7 @@ static void Field(FieldParseContext *ctx)
551551
if (*ch == quote) break;
552552
}
553553
break;
554-
case QUOTE_RULE_HYBRID:
554+
case QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED:
555555
// (i) quoted (perhaps because the source system knows sep is present) but any quotes were not escaped at all,
556556
// so look for ", to define the end. (There might not be any quotes present to worry about, anyway).
557557
// (ii) not-quoted but there is a quote at the beginning so it should have been; look for , at the end
@@ -590,7 +590,7 @@ static void Field(FieldParseContext *ctx)
590590
*ctx->ch = ch;
591591
} else {
592592
*ctx->ch = ch;
593-
if (ch == eof && quoteRule != QUOTE_RULE_HYBRID) { target->off--; target->len++; } // test 1324 where final field has open quote but not ending quote; include the open quote like QUOTE_RULE_HYBRID
593+
if (ch == eof && quoteRule != QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED) { target->off--; target->len++; } // test 1324 where final field has open quote but not ending quote; include the open quote like QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED
594594
while(target->len > 0 && ((ch[-1] == ' ' && stripWhite) || ch[-1] == '\0')) { target->len--; ch--; } // test 1551.6; trailing whitespace in field [67,V37] == "\"\"A\"\" ST "
595595
}
596596
// Does end-of-field correspond to end-of-possible-NA?
@@ -1745,7 +1745,7 @@ int freadMain(freadMainArgs _args)
17451745
}
17461746
int topNumLines = 0; // the most number of lines with the same number of fields, so far
17471747
int topNumFields = 1; // how many fields that was, to resolve ties
1748-
enum quoteRule topQuoteRule = -1; // which quote rule that was
1748+
enum quote_rule_t topQuoteRule = -1; // which quote rule that was
17491749
int topSkip = 0; // how many rows to auto-skip
17501750
const char *topStart = NULL;
17511751

@@ -1814,7 +1814,7 @@ int freadMain(freadMainArgs _args)
18141814
if ((thisBlockLines > topNumLines && lastncol > 1) || // more lines wins even with fewer fields, so long as number of fields >= 2
18151815
(thisBlockLines == topNumLines &&
18161816
lastncol > topNumFields && // when number of lines is tied, choose the sep which separates it into more columns
1817-
(quoteRule < QUOTE_RULE_HYBRID || quoteRule <= topQuoteRule) && // for test 1834 where every line contains a correctly quoted field contain sep
1817+
(quoteRule < QUOTE_RULE_EMBEDDED_QUOTES_NOT_ESCAPED || quoteRule <= topQuoteRule) && // for test 1834 where every line contains a correctly quoted field contain sep
18181818
(topNumFields <= 1 || sep != ' '))) {
18191819
topNumLines = thisBlockLines;
18201820
topNumFields = lastncol;

0 commit comments

Comments
 (0)