Skip to content

Commit 75542b5

Browse files
committed
scrapped and redone
1 parent 7807adb commit 75542b5

File tree

1 file changed

+53
-57
lines changed

1 file changed

+53
-57
lines changed

src/fread.c

Lines changed: 53 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -222,9 +222,10 @@ static const char* strlim(const char *ch, char buf[static 500], size_t limit) {
222222

223223
static const char *typeLetter = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
224224

225-
static char *typesAsString(char str[static 101], int ncol) {
225+
static char *typesAsString(int ncol) {
226226
int nLetters = strlen(typeLetter);
227227
if (NUMTYPE > nLetters) INTERNAL_STOP("NUMTYPE(%d) > nLetters(%d)", NUMTYPE, nLetters); // # nocov
228+
static char str[101];
228229
int i = 0;
229230
if (ncol <= 100) {
230231
for (; i < ncol; i++) str[i] = typeLetter[IGNORE_BUMP(type[i])];
@@ -404,9 +405,10 @@ double wallclock(void)
404405
* multiple threads at the same time, or hold on to the value returned for
405406
* extended periods of time.
406407
*/
407-
static const char* filesize_to_str(char output[static 100], const uint64_t fsize)
408+
static const char* filesize_to_str(const uint64_t fsize)
408409
{
409-
static const char suffixes[] = {'T', 'G', 'M', 'K'};
410+
static const char suffixes[] = { 'T', 'G', 'M', 'K' };
411+
static char output[100];
410412
for (int i = 0; i <= sizeof(suffixes); i++) {
411413
int shift = (sizeof(suffixes) - i) * 10;
412414
if ((fsize >> shift) == 0) continue;
@@ -416,18 +418,18 @@ static const char* filesize_to_str(char output[static 100], const uint64_t fsize
416418
}
417419
if (ndigits == 0 || (fsize == (fsize >> shift << shift))) {
418420
if (i < sizeof(suffixes)) {
419-
snprintf(output, 100, "%"PRIu64"%ciB (%"PRIu64" bytes)", // # notranslate
420-
(fsize >> shift), suffixes[i], fsize);
421+
snprintf(output, sizeof(output), "%"PRIu64"%ciB (%"PRIu64" bytes)", // # notranslate
422+
fsize >> shift, suffixes[i], fsize);
421423
return output;
422424
}
423425
} else {
424-
snprintf(output, 100, "%.*f%ciB (%"PRIu64" bytes)", // # notranslate
426+
snprintf(output, sizeof(output), "%.*f%ciB (%"PRIu64" bytes)", // # notranslate
425427
ndigits, (double)fsize / (1LL << shift), suffixes[i], fsize);
426428
return output;
427429
}
428430
}
429431
if (fsize == 1) return "1 byte";
430-
snprintf(output, 100, "%"PRIu64" bytes", fsize); // # notranslate
432+
snprintf(output, sizeof(output), "%"PRIu64" bytes", fsize); // # notranslate
431433
return output;
432434
}
433435

@@ -1405,11 +1407,11 @@ int freadMain(freadMainArgs _args) {
14051407
}
14061408
if (stat_buf.st_size > SIZE_MAX) {
14071409
close(fd); // # nocov
1408-
STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str((char[100]) {}, stat_buf.st_size), fnam); // # nocov
1410+
STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str(stat_buf.st_size), fnam); // # nocov
14091411
}
14101412
fileSize = (size_t) stat_buf.st_size;
14111413
if (fileSize == 0) {close(fd); STOP(_("File is empty: %s"), fnam);}
1412-
if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str((char[100]) {}, fileSize));
1414+
if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str(fileSize));
14131415

14141416
// No MAP_POPULATE for faster nrows=10 and to make possible earlier progress bar in row count stage
14151417
// Mac doesn't appear to support MAP_POPULATE anyway (failed on CRAN when I tried).
@@ -1441,20 +1443,20 @@ int freadMain(freadMainArgs _args) {
14411443
if (GetFileSizeEx(hFile, &liFileSize)==0) { CloseHandle(hFile); STOP(_("GetFileSizeEx failed (returned 0) on file: %s"), fnam); }
14421444
if (liFileSize.QuadPart > SIZE_MAX) {
14431445
CloseHandle(hFile); // # nocov
1444-
STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str((char[100]) {}, liFileSize.QuadPart), fnam); // # nocov
1446+
STOP(_("File size [%s] exceeds the address space: %s"), filesize_to_str(liFileSize.QuadPart), fnam); // # nocov
14451447
}
14461448
fileSize = (size_t)liFileSize.QuadPart;
1447-
if (fileSize == 0) { CloseHandle(hFile); STOP(_("File is empty: %s"), fnam); }
1448-
if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str((char[100]) {}, fileSize));
1449-
HANDLE hMap=CreateFileMapping(hFile, NULL, PAGE_WRITECOPY, 0, 0, NULL);
1449+
if (fileSize==0) { CloseHandle(hFile); STOP(_("File is empty: %s"), fnam); }
1450+
if (verbose) DTPRINT(_(" File opened, size = %s.\n"), filesize_to_str(fileSize));
1451+
HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_WRITECOPY, 0, 0, NULL);
14501452
if (hMap == NULL) { CloseHandle(hFile); STOP(_("This is Windows, CreateFileMapping returned error %lu for file %s"), GetLastError(), fnam); }
1451-
mmp = MapViewOfFile(hMap, FILE_MAP_COPY, 0, 0, fileSize); // fileSize must be <= hilo passed to CreateFileMapping above.
1453+
mmp = MapViewOfFile(hMap,FILE_MAP_COPY,0,0,fileSize); // fileSize must be <= hilo passed to CreateFileMapping above.
14521454
CloseHandle(hMap); // we don't need to keep the file open; the MapView keeps an internal reference;
14531455
CloseHandle(hFile); // see https://msdn.microsoft.com/en-us/library/windows/desktop/aa366537(v=vs.85).aspx
14541456
if (mmp == NULL) {
14551457
#endif
14561458
int nbit = 8 * sizeof(char *); // #nocov
1457-
STOP(_("Opened %s file ok but could not memory map it. This is a %dbit process. %s."), filesize_to_str((char[100]) {}, fileSize), nbit, // # nocov
1459+
STOP(_("Opened %s file ok but could not memory map it. This is a %dbit process. %s."), filesize_to_str(fileSize), nbit, // # nocov
14581460
nbit <= 32 ? _("Please upgrade to 64bit") : _("There is probably not enough contiguous virtual memory available")); // # nocov
14591461
}
14601462
sof = (const char*) mmp;
@@ -1550,7 +1552,7 @@ int freadMain(freadMainArgs _args) {
15501552
// # nocov start
15511553
if (!verbose)
15521554
DTPRINT(_("%s. Attempt to copy file in RAM failed."), msg);
1553-
STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str((char[100]) {}, fileSize));
1555+
STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str(fileSize));
15541556
// # nocov end
15551557
}
15561558
if (verbose)
@@ -1826,33 +1828,31 @@ int freadMain(freadMainArgs _args) {
18261828
DTPRINT(_(" Quote rule picked = %d\n"), quoteRule);
18271829
DTPRINT(_(" fill=%s and the most number of columns found is %d\n"), fill ? "true" : "false", ncol);
18281830
}
1829-
}
1830-
1831-
if (ncol < 1 || row1line < 1) INTERNAL_STOP("ncol==%d line==%d after detecting sep, ncol and first line", ncol, row1line); // # nocov
1832-
int tt = countfields(&ch);
1833-
ch = pos; // move back to start of line since countfields() moved to next
1834-
if (!fill && tt!=ncol) INTERNAL_STOP("first line has field count %d but expecting %d", tt, ncol); // # nocov
1835-
if (verbose) {
1836-
DTPRINT(_(" Detected %d columns on line %d. This line is either column names or first data row. Line starts as: <<%s>>\n"),
1837-
tt, row1line, strlim(pos, (char[500]) {}, 30));
1838-
DTPRINT(_(" Quote rule picked = %d\n"), quoteRule);
1839-
DTPRINT(_(" fill=%s and the most number of columns found is %d\n"), fill?"true":"false", ncol);
1840-
}
1841-
1842-
if (ncol == 1 && lastEOLreplaced && (eof[-1]=='\n' || eof[-1]=='\r')) {
1843-
// Multiple newlines at the end are significant in the case of 1-column files only (multiple NA at the end)
1844-
if (fileSize % 4096 == 0) {
1845-
const char *msg = _("This file is very unusual: it's one single column, ends with 2 or more end-of-line (representing several NA at the end), and the file size is a multiple of 4096, too");
1846-
if (verbose)
1847-
DTPRINT(_(" Copying file in RAM. %s\n"), msg);
1848-
ASSERT(mmp_copy == NULL, "mmp has already been copied due to abrupt non-eol ending, so it does not end with 2 or more eol.%s", ""/*dummy arg for macro*/); // #nocov
1849-
double time_taken = copyFile(fileSize);
1850-
if (time_taken == -1.0) {
1851-
// # nocov start
1852-
if (!verbose)
1853-
DTPRINT(_("%s. Attempt to copy file in RAM failed."), msg);
1854-
STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str((char[100]) {}, fileSize));
1855-
// # nocov end
1831+
1832+
if (ncol == 1 && lastEOLreplaced && (eof[-1] == '\n' || eof[-1] == '\r')) {
1833+
// Multiple newlines at the end are significant in the case of 1-column files only (multiple NA at the end)
1834+
if (fileSize % 4096 == 0) {
1835+
const char *msg = _("This file is very unusual: it's one single column, ends with 2 or more end-of-line (representing several NA at the end), and the file size is a multiple of 4096, too");
1836+
if (verbose)
1837+
DTPRINT(_(" Copying file in RAM. %s\n"), msg);
1838+
ASSERT(mmp_copy == NULL, "mmp has already been copied due to abrupt non-eol ending, so it does not end with 2 or more eol.%s", ""/*dummy arg for macro*/); // #nocov
1839+
double time_taken = copyFile(fileSize);
1840+
if (time_taken == -1.0) {
1841+
// # nocov start
1842+
if (!verbose)
1843+
DTPRINT(_("%s. Attempt to copy file in RAM failed."), msg);
1844+
STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str(fileSize));
1845+
// # nocov end
1846+
}
1847+
if (verbose)
1848+
DTPRINT(_(" File copy in RAM took %.3f seconds.\n"), time_taken);
1849+
else if (tt > 0.5) // # nocov
1850+
DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg); // # nocov. not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded
1851+
pos = sof + (pos - (const char *)mmp);
1852+
firstJumpEnd = sof + (firstJumpEnd - (const char *)mmp);
1853+
} else {
1854+
if (verbose) DTPRINT(_(" 1-column file ends with 2 or more end-of-line. Restoring last eol using extra byte in cow page.\n"));
1855+
eof++;
18561856
}
18571857
*const_cast(eof - 1) = eol_one_r ? '\r' : '\n';
18581858
*const_cast(eof) = '\0';
@@ -1986,10 +1986,8 @@ int freadMain(freadMainArgs _args) {
19861986
ASSERT(jump > 0, "jump(%d)>0", jump);
19871987
memcpy(type, tmpType, ncol);
19881988
}
1989-
19901989
if (verbose && (bumped || jump == 0 || jump == nJumps - 1)) {
1991-
DTPRINT(_(" Type codes (jump %03d) : %s Quote rule %d\n"), jump, typesAsString((char[101]) {}, ncol), quoteRule);
1992-
1990+
DTPRINT(_(" Type codes (jump %03d) : %s Quote rule %d\n"), jump, typesAsString(ncol), quoteRule);
19931991
}
19941992
}
19951993

@@ -2084,7 +2082,7 @@ int freadMain(freadMainArgs _args) {
20842082
type[j] = tmpType[j];
20852083
}
20862084
}
2087-
if (verbose && bumped) DTPRINT(_(" Type codes (first row) : %s Quote rule %d\n"), typesAsString((char[101]) {}, ncol), quoteRule);
2085+
if (verbose && bumped) DTPRINT(_(" Type codes (first row) : %s Quote rule %d\n"), typesAsString(ncol), quoteRule);
20882086
}
20892087

20902088
estnrow = 1;
@@ -2216,8 +2214,7 @@ int freadMain(freadMainArgs _args) {
22162214
rowSize8 += (size[j] & 8);
22172215
if (type[j] == CT_STRING) nStringCols++; else nNonStringCols++;
22182216
}
2219-
2220-
if (verbose) DTPRINT(_(" After %d type and %d drop user overrides : %s\n"), nUserBumped, ndrop, typesAsString((char[101]) {}, ncol));
2217+
if (verbose) DTPRINT(_(" After %d type and %d drop user overrides : %s\n"), nUserBumped, ndrop, typesAsString(ncol));
22212218
tColType = wallclock();
22222219
}
22232220

@@ -2680,11 +2677,11 @@ int freadMain(freadMainArgs _args) {
26802677
}
26812678
// else nrowLimit applied and stopped early normally
26822679
}
2683-
2680+
26842681
// tell progress meter to finish up; e.g. write final newline
26852682
// if there's a reread, the progress meter will start again from 0
26862683
if (args.showProgress) progress(100, 0);
2687-
2684+
26882685
if (firstTime) {
26892686
tReread = tRead = wallclock();
26902687

@@ -2694,7 +2691,7 @@ int freadMain(freadMainArgs _args) {
26942691
for (int i = 0; i < ncol; i++) typeCounts[IGNORE_BUMP(type[i])]++;
26952692

26962693
if (nTypeBump) {
2697-
if (verbose) DTPRINT(_(" %d out-of-sample type bumps: %s\n"), nTypeBump, typesAsString((char[101]) {}, ncol));
2694+
if (verbose) DTPRINT(_(" %d out-of-sample type bumps: %s\n"), nTypeBump, typesAsString(ncol));
26982695
rowSize1 = rowSize4 = rowSize8 = 0;
26992696
nStringCols = 0;
27002697
nNonStringCols = 0;
@@ -2732,7 +2729,7 @@ int freadMain(freadMainArgs _args) {
27322729
}
27332730
double tTot = tReread - t0; // tReread==tRead when there was no reread
27342731
if (verbose) DTPRINT(_("Read %"PRIu64" rows x %d columns from %s file in %02d:%06.3f wall clock time\n"),
2735-
(uint64_t)DTi, ncol - ndrop, filesize_to_str((char[100]) {}, fileSize), (int)tTot / 60, fmod(tTot, 60.0));
2732+
(uint64_t)DTi, ncol - ndrop, filesize_to_str(fileSize), (int)tTot / 60, fmod(tTot, 60.0));
27362733

27372734
//*********************************************************************************************
27382735
// [12] Finalize the datatable
@@ -2758,22 +2755,21 @@ int freadMain(freadMainArgs _args) {
27582755
while (ch < eof && isspace(*ch)) ch++;
27592756
if (ch == eof) {
27602757
DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[500]) {}, 500));
2761-
27622758
}
27632759
else {
27642760
ch = headPos;
27652761
int tt = countfields(&ch);
27662762
if (fill > 0) {
27672763
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>"),
2768-
DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {}, 500));
2764+
DTi+row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {}, 500));
27692765
} else {
27702766
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>"),
2771-
DTi + row1line, ncol, tt, strlim(skippedFooter, (char[500]) {}, 500));
2767+
DTi+row1line, ncol, tt, strlim(skippedFooter, (char[500]) {}, 500));
27722768
}
27732769
}
27742770
}
27752771
}
2776-
if (quoteRuleBumpedCh!=NULL && quoteRuleBumpedCh<headPos) {
2772+
if (quoteRuleBumpedCh != NULL && quoteRuleBumpedCh<headPos) {
27772773
DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRId64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[500]) {}, 500));
27782774
}
27792775

0 commit comments

Comments
 (0)