Skip to content

Commit d2b3ea7

Browse files
authored
additional formatting improvements for fread (#7106)
1 parent 131af20 commit d2b3ea7

File tree

1 file changed

+26
-27
lines changed

1 file changed

+26
-27
lines changed

src/fread.c

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ static inline bool end_of_field(const char *ch) {
286286
// default, and therefore characters in the range 0x80-0xFF are negative.
287287
// We use eol() because that looks at eol_one_r inside it w.r.t. \r
288288
// \0 (maybe more than one) before eof are part of field and do not end it; eol() returns false for \0 but the ch==eof will return true for the \0 at eof.
289-
return *ch==sep || ((uint8_t)*ch <= 13 && (ch == eof || eol(&ch)));
289+
return *ch == sep || ((uint8_t)*ch <= 13 && (ch == eof || eol(&ch)));
290290
}
291291

292292
static inline const char *end_NA_string(const char *start) {
@@ -297,7 +297,7 @@ static inline const char *end_NA_string(const char *start) {
297297
const char *ch1 = start;
298298
const char *ch2 = *nastr;
299299
while (*ch1 == *ch2 && *ch2 != '\0') { ch1++; ch2++; }
300-
if (*ch2=='\0' && ch1 > mostConsumed) mostConsumed = ch1;
300+
if (*ch2 == '\0' && ch1 > mostConsumed) mostConsumed = ch1;
301301
nastr++;
302302
}
303303
return mostConsumed;
@@ -320,7 +320,7 @@ static inline int countfields(const char **pch)
320320
if (sep == ' ') while (*ch == ' ') ch++; // multiple sep==' ' at the start does not mean sep
321321
skip_white(&ch);
322322
if (eol(&ch) || ch == eof) {
323-
*pch = ch+1;
323+
*pch = ch + 1;
324324
return 0;
325325
}
326326
int ncol = 1;
@@ -339,7 +339,7 @@ static inline int countfields(const char **pch)
339339
ch++; // Move onto end of line character
340340
}
341341
}
342-
if (*ch==sep) {
342+
if (*ch == sep) {
343343
ch++;
344344
ncol++;
345345
continue;
@@ -563,7 +563,7 @@ static void Field(FieldParseContext *ctx)
563563
}
564564
target->len = (int32_t)(ch - fieldStart);
565565
target->off = (int32_t)(fieldStart - ctx->anchor);
566-
if (*ch==quote) { // quote=='\0' (user set quote="") would have returned earlier above in the same branch as quoteRule 3
566+
if (*ch == quote) { // quote=='\0' (user set quote="") would have returned earlier above in the same branch as quoteRule 3
567567
ch++;
568568
skip_white(&ch);
569569
*ctx->ch = ch;
@@ -917,7 +917,7 @@ static void parse_double_hexadecimal(FieldParseContext *ctx)
917917
E = 10 * E + digit;
918918
ch++;
919919
}
920-
E = 1023 + (Eneg? -E : E) - subnormal;
920+
E = 1023 + (Eneg ? -E : E) - subnormal;
921921
if (subnormal ? E : (E < 1 || E > 2046)) return;
922922

923923
*((uint64_t*)target) = (neg << 63) | (E << 52) | (acc);
@@ -976,7 +976,7 @@ static void parse_iso8601_date_core(const char **pch, int32_t *target)
976976
return;
977977

978978
*target =
979-
(year / 400 - 4)*cumDaysCycleYears[400] + // days to beginning of 400-year cycle
979+
(year / 400 - 4) * cumDaysCycleYears[400] + // days to beginning of 400-year cycle
980980
cumDaysCycleYears[year % 400] + // days to beginning of year within 400-year cycle
981981
(isLeapYear ? cumDaysCycleMonthsLeap[month - 1] : cumDaysCycleMonthsNorm[month - 1]) + // days to beginning of month within year
982982
day - 1; // day within month (subtract 1: 1970-01-01 -> 0)
@@ -1060,7 +1060,7 @@ static void parse_iso8601_timestamp(FieldParseContext *ctx)
10601060
}
10611061

10621062
// cast upfront needed to prevent silent overflow
1063-
*target = 86400*(double)date + 3600 * (hour - tz_hour) + 60 * (minute - tz_minute) + second;
1063+
*target = 86400 * (double)date + 3600 * (hour - tz_hour) + 60 * (minute - tz_minute) + second;
10641064

10651065
*ctx->ch = ch;
10661066
}
@@ -1317,7 +1317,7 @@ int freadMain(freadMainArgs _args) {
13171317
} else {
13181318
const char *ch = *nastr;
13191319
size_t nchar = strlen(ch);
1320-
if (isspace(ch[0]) || isspace(ch[nchar-1]))
1320+
if (isspace(ch[0]) || isspace(ch[nchar - 1]))
13211321
STOP(_("freadMain: NAstring <<%s>> has whitespace at the beginning or end"), ch);
13221322
if (strcmp(ch,"T") == 0 || strcmp(ch,"F") == 0 ||
13231323
strcmp(ch,"TRUE") == 0 || strcmp(ch,"FALSE") == 0 ||
@@ -1429,18 +1429,18 @@ int freadMain(freadMainArgs _args) {
14291429
HANDLE hFile = INVALID_HANDLE_VALUE;
14301430
int attempts = 0;
14311431
while(hFile == INVALID_HANDLE_VALUE && attempts < 5) {
1432-
hFile = CreateFile(fnam, GENERIC_READ, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
1432+
hFile = CreateFile(fnam, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
14331433
// FILE_SHARE_WRITE is required otherwise if the file is open in Excel, CreateFile fails. Should be ok now.
14341434
if (hFile == INVALID_HANDLE_VALUE) {
1435-
if (GetLastError() == ERROR_FILE_NOT_FOUND) STOP(_("File not found: %s"),fnam);
1435+
if (GetLastError() == ERROR_FILE_NOT_FOUND) STOP(_("File not found: %s"), fnam);
14361436
if (attempts < 4) Sleep(250); // 250ms
14371437
}
14381438
attempts++;
14391439
// Looped retry to avoid ephemeral locks by system utilities as recommended here : http://support.microsoft.com/kb/316609
14401440
}
14411441
if (hFile == INVALID_HANDLE_VALUE) STOP(_("Unable to open file after %d attempts (error %lu): %s"), attempts, GetLastError(), fnam);
14421442
LARGE_INTEGER liFileSize;
1443-
if (GetFileSizeEx(hFile, &liFileSize)==0) { CloseHandle(hFile); STOP(_("GetFileSizeEx failed (returned 0) on file: %s"), fnam); }
1443+
if (GetFileSizeEx(hFile, &liFileSize) == 0) { CloseHandle(hFile); STOP(_("GetFileSizeEx failed (returned 0) on file: %s"), fnam); }
14441444
if (liFileSize.QuadPart > SIZE_MAX) {
14451445
CloseHandle(hFile); // # nocov
14461446
STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str(liFileSize.QuadPart), fnam); // # nocov
@@ -1719,7 +1719,7 @@ int freadMain(freadMainArgs _args) {
17191719
topQuoteRule = quoteRule;
17201720
firstJumpEnd = ch; // to know how many bytes jump 0 is, for nrow estimate later (a less-good estimate when fill=true since line lengths vary more)
17211721
if (verbose) {
1722-
DTPRINT((unsigned)sep<32
1722+
DTPRINT((unsigned)sep < 32
17231723
? _(" sep=%#02x with %d fields using quote rule %d\n")
17241724
: _(" sep='%c' with %d fields using quote rule %d\n"),
17251725
sep, topNumFields, quoteRule);
@@ -1749,7 +1749,7 @@ int freadMain(freadMainArgs _args) {
17491749
prevLineStart = NULL; lineStart = ch; thisRow++;
17501750
thisncol = countfields(&ch);
17511751
}
1752-
if (thisncol>0) {
1752+
if (thisncol > 0) {
17531753
lastncol = thisncol;
17541754
thisBlockLines = 1;
17551755
thisBlockPrevStart = prevLineStart; // remember previous line start in case it has column names to be filled
@@ -1768,7 +1768,7 @@ int freadMain(freadMainArgs _args) {
17681768
firstJumpEnd = ch;
17691769
topStart = thisBlockStart;
17701770
prevStart = thisBlockPrevStart; // only used when line prior to contiguous block has a wrong number of column names to be filled
1771-
topSkip = thisRow-thisBlockLines;
1771+
topSkip = thisRow - thisBlockLines;
17721772
if (topSkip < 0) topSkip = 0; // inelegant but will do for now to pass single row input such as test 890
17731773
if (verbose) {
17741774
DTPRINT((unsigned)sep < 32
@@ -1954,7 +1954,7 @@ int freadMain(freadMainArgs _args) {
19541954
if (verbose)
19551955
DTPRINT(thisNcol < ncol ? _(" A line with too-few fields (%d/%d) was found on line %d of sample jump %d. %s\n")
19561956
: _(" A line with too-many fields (%d/%d) was found on line %d of sample jump %d. %s\n"),
1957-
thisNcol, ncol, jumpLine, jump, jump>0 ? _("Most likely this jump landed awkwardly so type bumps here will be skipped.") : "");
1957+
thisNcol, ncol, jumpLine, jump, jump > 0 ? _("Most likely this jump landed awkwardly so type bumps here will be skipped.") : "");
19581958
bumped = false;
19591959
if (jump == 0) lastRowEnd = eof; // to prevent the end from being tested; e.g. a short file with blank line within first 100 like test 976
19601960
break;
@@ -2224,7 +2224,7 @@ int freadMain(freadMainArgs _args) {
22242224
if (verbose) {
22252225
DTPRINT(_("[10] Allocate memory for the datatable\n"));
22262226
DTPRINT(_(" Allocating %d column slots (%d - %d dropped) with %"PRId64" rows\n"),
2227-
ncol-ndrop, ncol, ndrop, allocnrow);
2227+
ncol - ndrop, ncol, ndrop, allocnrow);
22282228
}
22292229
size_t DTbytes = allocateDT(type, size, ncol, ndrop, allocnrow);
22302230
double tAlloc = wallclock();
@@ -2506,7 +2506,7 @@ int freadMain(freadMainArgs _args) {
25062506
// check this line has the correct number of fields. If not, don't apply the bump from this invalid line. Instead fall through to myStopEarly below.
25072507
const char *tt = fieldStart;
25082508
int fieldsRemaining = countfields(&tt);
2509-
if (j+fieldsRemaining != ncol) break;
2509+
if (j + fieldsRemaining != ncol) break;
25102510
checkedNumberOfFields = true;
25112511
}
25122512
if (thisType <= TOGGLE_BUMP(NUMTYPE)) {
@@ -2523,7 +2523,7 @@ int freadMain(freadMainArgs _args) {
25232523
_("Column %d%s%.*s%s bumped from '%s' to '%s' due to <<%.*s>> on row %"PRId64"\n"),
25242524
j + 1, colNames ? " <<" : "", colNames ? (colNames[j].len) : 0, colNames ? (colNamesAnchor + colNames[j].off) : "", colNames ? ">>" : "",
25252525
typeName[IGNORE_BUMP(joldType)], typeName[IGNORE_BUMP(thisType)],
2526-
(int)(tch-fieldStart), fieldStart, (int64_t)(ctx.DTi+myNrow));
2526+
(int)(tch - fieldStart), fieldStart, (int64_t)(ctx.DTi + myNrow));
25272527
if (len > 1000) len = 1000;
25282528
if (len > 0) {
25292529
typeBumpMsg = realloc(typeBumpMsg, typeBumpMsgSize + len + 1);
@@ -2554,7 +2554,7 @@ int freadMain(freadMainArgs _args) {
25542554
if (tch != eof) tch++;
25552555
myNrow++;
25562556
}
2557-
if (verbose) { double now = wallclock(); thRead += now-tLast; tLast = now; }
2557+
if (verbose) { double now = wallclock(); thRead += now - tLast; tLast = now; }
25582558
ctx.anchor = thisJumpStart;
25592559
ctx.nRows = myNrow;
25602560
postprocessBuffer(&ctx);
@@ -2596,7 +2596,7 @@ int freadMain(freadMainArgs _args) {
25962596
if (quoteRuleBumpedCh == NULL) {
25972597
// for warning message if the quote rule bump does in fact manage to heal it, e.g. test 1881
25982598
quoteRuleBumpedCh = tLineStart;
2599-
quoteRuleBumpedLine = row1line+DTi;
2599+
quoteRuleBumpedLine = row1line + DTi;
26002600
}
26012601
restartTeam = true;
26022602
jump0 = jump; // this jump will restart from headPos, not from its beginning, e.g. test 1453
@@ -2644,13 +2644,12 @@ int freadMain(freadMainArgs _args) {
26442644
dropFill = malloc(sizeof(*dropFill) * ndropFill);
26452645
if (!dropFill)
26462646
STOP(_("Failed to allocate %zu bytes for '%s'."), sizeof(*dropFill) * ndropFill, "dropFill"); // # nocov
2647-
int i=0;
2648-
for (int j = max_col; j < ncol; j++) {
2647+
for (int i = 0, j = max_col; j < ncol; j++, i++) {
26492648
type[j] = CT_DROP;
26502649
size[j] = 0;
26512650
ndrop++;
26522651
nNonStringCols--;
2653-
dropFill[i++] = j;
2652+
dropFill[i] = j;
26542653
}
26552654
dropFilledCols(dropFill, ndropFill);
26562655
}
@@ -2761,10 +2760,10 @@ int freadMain(freadMainArgs _args) {
27612760
int tt = countfields(&ch);
27622761
if (fill > 0) {
27632762
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>"),
2764-
DTi+row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {}, 500));
2763+
DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {}, 500));
27652764
} else {
27662765
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>"),
2767-
DTi+row1line, ncol, tt, strlim(skippedFooter, (char[500]) {}, 500));
2766+
DTi + row1line, ncol, tt, strlim(skippedFooter, (char[500]) {}, 500));
27682767
}
27692768
}
27702769
}
@@ -2777,7 +2776,7 @@ int freadMain(freadMainArgs _args) {
27772776
DTPRINT("=============================\n"); // # notranslate
27782777
if (tTot < 0.000001) tTot = 0.000001; // to avoid nan% output in some trivially small tests where tot==0.000s
27792778
DTPRINT(_("%8.3fs (%3.0f%%) Memory map %.3fGB file\n"), tMap - t0, 100.0 * (tMap - t0) / tTot, 1.0 * fileSize / (1024 * 1024 * 1024));
2780-
DTPRINT(_("%8.3fs (%3.0f%%) sep="), tLayout-tMap, 100.0 * (tLayout - tMap) / tTot);
2779+
DTPRINT(_("%8.3fs (%3.0f%%) sep="), tLayout - tMap, 100.0 * (tLayout - tMap) / tTot);
27812780
DTPRINT(sep == '\t' ? "'\\t'" : (sep == '\n' ? "'\\n'" : "'%c'"), sep); // # notranslate
27822781
DTPRINT(_(" ncol=%d and header detection\n"), ncol);
27832782
DTPRINT(_("%8.3fs (%3.0f%%) Column type detection using %"PRId64" sample rows\n"),

0 commit comments

Comments
 (0)