Skip to content

Commit c240e4a

Browse files
Merge branch 'master' into staticBufferRemoval
2 parents 21a2dba + f3f7ce0 commit c240e4a

File tree

1 file changed

+44
-44
lines changed

1 file changed

+44
-44
lines changed

src/fread.c

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,7 @@ int freadMain(freadMainArgs _args) {
13611361
else
13621362
DTPRINT(_(" None of the NAstrings look like numbers.\n"));
13631363
}
1364-
if (args.skipNrow >= 0) DTPRINT(_(" skip num lines = %"PRId64"\n"), (int64_t)args.skipNrow);
1364+
if (args.skipNrow >= 0) DTPRINT(_(" skip num lines = %"PRId64"\n"), args.skipNrow);
13651365
if (args.skipString) DTPRINT(_(" skip to string = <<%s>>\n"), args.skipString);
13661366
DTPRINT(_(" show progress = %d\n"), args.showProgress);
13671367
DTPRINT(_(" 0/1 column will be read as %s\n"), args.logical01? "boolean" : "integer");
@@ -1614,8 +1614,8 @@ int freadMain(freadMainArgs _args) {
16141614
pos = ch;
16151615
ch = sof;
16161616
while (ch<pos) row1line+=(*ch++=='\n');
1617-
if (verbose) DTPRINT(_("Found skip='%s' on line %"PRIu64". Taking this to be header row or first row of data.\n"),
1618-
args.skipString, (uint64_t)row1line);
1617+
if (verbose) DTPRINT(_("Found skip='%s' on line %d. Taking this to be header row or first row of data.\n"),
1618+
args.skipString, row1line);
16191619
ch = pos;
16201620
}
16211621
else if (args.skipNrow >= 0) {
@@ -1628,12 +1628,12 @@ int freadMain(freadMainArgs _args) {
16281628
}
16291629
}
16301630
if (ch > sof && verbose)
1631-
DTPRINT(_(" Skipped to line %"PRIu64" in the file"), (uint64_t)row1line);
1631+
DTPRINT(_(" Skipped to line %d in the file"), row1line);
16321632
if (ch>=eof)
16331633
STOP(Pl_(row1line,
1634-
"skip=%"PRIu64" but the input only has %"PRIu64" line",
1635-
"skip=%"PRIu64" but the input only has %"PRIu64" lines"),
1636-
(uint64_t)args.skipNrow, (uint64_t)row1line);
1634+
"skip=%"PRId64" but the input only has %d line",
1635+
"skip=%"PRId64" but the input only has %d lines"),
1636+
args.skipNrow, row1line);
16371637
pos = ch;
16381638
}
16391639

@@ -1918,12 +1918,12 @@ int freadMain(freadMainArgs _args) {
19181918
}
19191919
if (verbose) {
19201920
if (nrowLimit<INT64_MAX) {
1921-
DTPRINT(_(" Number of sampling jump points = %d because nrow limit (%"PRIu64") supplied\n"), nJumps, (uint64_t)nrowLimit);
1921+
DTPRINT(_(" Number of sampling jump points = %d because nrow limit (%"PRId64") supplied\n"), nJumps, nrowLimit);
19221922
} else if (jump0size==0) {
19231923
DTPRINT(_(" Number of sampling jump points = %d because jump0size==0\n"), nJumps);
19241924
} else {
1925-
DTPRINT(_(" Number of sampling jump points = %d because (%ld bytes from row 1 to eof) / (2 * %ld jump0size) == %ld\n"),
1926-
nJumps, (long int)sz, (long int)jump0size, (long int)(sz/(2*jump0size)));
1925+
DTPRINT(_(" Number of sampling jump points = %d because (%td bytes from row 1 to eof) / (2 * %td jump0size) == %td\n"),
1926+
nJumps, sz, jump0size, sz/(2*jump0size));
19271927
}
19281928
}
19291929
nJumps++; // the extra sample at the very end (up to eof) is sampled and format checked but not jumped to when reading
@@ -2102,7 +2102,7 @@ int freadMain(freadMainArgs _args) {
21022102
bytesRead=0; // Bytes in the data section (i.e. excluding column names, header and footer, if any)
21032103

21042104
if (sampleLines <= jumpLines) {
2105-
if (verbose) DTPRINT(_(" All rows were sampled since file is small so we know nrow=%"PRIu64" exactly\n"), (uint64_t)sampleLines);
2105+
if (verbose) DTPRINT(_(" All rows were sampled since file is small so we know nrow=%"PRId64" exactly\n"), sampleLines);
21062106
estnrow = allocnrow = sampleLines;
21072107
} else {
21082108
bytesRead = eof - firstRowStart;
@@ -2115,19 +2115,19 @@ int freadMain(freadMainArgs _args) {
21152115
// blank lines have length 1 so for fill=true apply a +100% maximum. It'll be grown if needed.
21162116
if (verbose) {
21172117
DTPRINT(" =====\n"); // # notranslate
2118-
DTPRINT(_(" Sampled %"PRIu64" rows (handled \\n inside quoted fields) at %d jump points\n"), (uint64_t)sampleLines, nJumps);
2119-
DTPRINT(_(" Bytes from first data row on line %d to the end of last row: %"PRIu64"\n"), row1line, (uint64_t)bytesRead);
2118+
DTPRINT(_(" Sampled %"PRId64" rows (handled \\n inside quoted fields) at %d jump points\n"), sampleLines, nJumps);
2119+
DTPRINT(_(" Bytes from first data row on line %d to the end of last row: %td\n"), row1line, bytesRead);
21202120
DTPRINT(_(" Line length: mean=%.2f sd=%.2f min=%d max=%d\n"), meanLineLen, sd, minLen, maxLen);
2121-
DTPRINT(_(" Estimated number of rows: %"PRIu64" / %.2f = %"PRIu64"\n"), (uint64_t)bytesRead, meanLineLen, (uint64_t)estnrow);
2122-
DTPRINT(_(" Initial alloc = %"PRIu64" rows (%"PRIu64" + %d%%) using bytes/max(mean-2*sd,min) clamped between [1.1*estn, 2.0*estn]\n"),
2123-
(uint64_t)allocnrow, (uint64_t)estnrow, (int)(100.0*allocnrow/estnrow-100.0));
2121+
DTPRINT(_(" Estimated number of rows: %td / %.2f = %"PRId64"\n"), bytesRead, meanLineLen, estnrow);
2122+
DTPRINT(_(" Initial alloc = %"PRId64" rows (%"PRId64" + %d%%) using bytes/max(mean-2*sd,min) clamped between [1.1*estn, 2.0*estn]\n"),
2123+
allocnrow, estnrow, (int)(100.0*allocnrow/estnrow-100.0));
21242124
DTPRINT(" =====\n"); // # notranslate
21252125
} else {
2126-
if (sampleLines > allocnrow) INTERNAL_STOP("sampleLines(%"PRIu64") > allocnrow(%"PRIu64")", (uint64_t)sampleLines, (uint64_t)allocnrow); // # nocov
2126+
if (sampleLines > allocnrow) INTERNAL_STOP("sampleLines(%"PRId64") > allocnrow(%"PRId64")", sampleLines, allocnrow); // # nocov
21272127
}
21282128
}
21292129
if (nrowLimit < allocnrow) {
2130-
if (verbose) DTPRINT(_(" Alloc limited to lower nrows=%"PRIu64" passed in.\n"), (uint64_t)nrowLimit);
2130+
if (verbose) DTPRINT(_(" Alloc limited to lower nrows=%"PRId64" passed in.\n"), nrowLimit);
21312131
estnrow = allocnrow = nrowLimit;
21322132
}
21332133
}
@@ -2203,7 +2203,7 @@ int freadMain(freadMainArgs _args) {
22032203
rowSize8 = 0;
22042204
size = malloc(sizeof(*size) * ncol); // TODO: remove size[] when we implement Pasha's idea to += size inside processor
22052205
if (!size)
2206-
STOP(_("Failed to allocate %d bytes for '%s': %s"), (int)(sizeof(*size) * ncol), "size", strerror(errno)); // # nocov
2206+
STOP(_("Failed to allocate %zu bytes for '%s': %s"), sizeof(*size) * ncol, "size", strerror(errno)); // # nocov
22072207
nStringCols = 0;
22082208
nNonStringCols = 0;
22092209
for (int j=0; j<ncol; j++) {
@@ -2234,8 +2234,8 @@ int freadMain(freadMainArgs _args) {
22342234
//*********************************************************************************************
22352235
if (verbose) {
22362236
DTPRINT(_("[10] Allocate memory for the datatable\n"));
2237-
DTPRINT(_(" Allocating %d column slots (%d - %d dropped) with %"PRIu64" rows\n"),
2238-
ncol-ndrop, ncol, ndrop, (uint64_t)allocnrow);
2237+
DTPRINT(_(" Allocating %d column slots (%d - %d dropped) with %"PRId64" rows\n"),
2238+
ncol-ndrop, ncol, ndrop, allocnrow);
22392239
}
22402240
size_t DTbytes = allocateDT(type, size, ncol, ndrop, allocnrow);
22412241
double tAlloc = wallclock();
@@ -2263,7 +2263,7 @@ int freadMain(freadMainArgs _args) {
22632263
// For the 44GB file with 12875 columns, the max line len is 108,497. We may want each chunk to write to its
22642264
// own page (4k) of the final column, hence 1000 rows of the smallest type (4 byte int) is just
22652265
// under 4096 to leave space for R's header + malloc's header.
2266-
size_t chunkBytes = umax((size_t)(1000*meanLineLen), 1ULL/*MB*/ *1024*1024);
2266+
size_t chunkBytes = umax((uint64_t)(1000*meanLineLen), 1ULL/*MB*/ *1024*1024);
22672267
// Index of the first jump to read. May be modified if we ever need to restart
22682268
// reading from the middle of the file.
22692269
int jump0 = 0;
@@ -2282,22 +2282,22 @@ int freadMain(freadMainArgs _args) {
22822282
ASSERT(nJumps==1 /*when nrowLimit supplied*/ || nJumps==2 /*small files*/, "nJumps (%d) != 1|2", nJumps);
22832283
nJumps=1;
22842284
}
2285-
int64_t initialBuffRows = (int64_t)allocnrow / nJumps;
2285+
int64_t initialBuffRows = allocnrow / nJumps;
22862286

22872287
// Catch initialBuffRows==0 when max_nrows is small, seg fault #2243
22882288
// Rather than 10, maybe 1 would work too but then 1.5 grow factor * 1 would still be 1. This clamp
22892289
// should only engage when max_nrows is supplied, and supplied small too, so doesn't matter too much.
22902290
if (initialBuffRows < 10) initialBuffRows = 10;
22912291

2292-
if (initialBuffRows > INT32_MAX) STOP(_("Buffer size %"PRId64" is too large\n"), (int64_t)initialBuffRows);
2292+
if (initialBuffRows > INT32_MAX) STOP(_("Buffer size %"PRId64" is too large\n"), initialBuffRows);
22932293
nth = imin(nJumps, nth);
22942294

22952295
if (verbose) DTPRINT(_("[11] Read the data\n"));
22962296
while(true){ // we'll return here to reread any columns with out-of-sample type exceptions, or dirty jumps
22972297
restartTeam = false;
22982298
if (verbose)
2299-
DTPRINT(" jumps=[%d..%d), chunk_size=%"PRIu64", total_size=%"PRIu64"\n", jump0, nJumps, (uint64_t)chunkBytes, (uint64_t)(eof-pos)); // # notranslate
2300-
ASSERT(allocnrow <= nrowLimit, "allocnrow(%"PRIu64") <= nrowLimit(%"PRIu64")", (uint64_t)allocnrow, (uint64_t)nrowLimit);
2299+
DTPRINT(" jumps=[%d..%d), chunk_size=%zu, total_size=%td\n", jump0, nJumps, chunkBytes, eof-pos); // # notranslate
2300+
ASSERT(allocnrow <= nrowLimit, "allocnrow(%"PRId64") <= nrowLimit(%"PRId64")", allocnrow, nrowLimit);
23012301
#pragma omp parallel num_threads(nth)
23022302
{
23032303
int me = omp_get_thread_num();
@@ -2526,10 +2526,10 @@ int freadMain(freadMainArgs _args) {
25262526
if (verbose) {
25272527
char temp[1001];
25282528
int len = snprintf(temp, 1000,
2529-
_("Column %d%s%.*s%s bumped from '%s' to '%s' due to <<%.*s>> on row %"PRIu64"\n"),
2529+
_("Column %d%s%.*s%s bumped from '%s' to '%s' due to <<%.*s>> on row %"PRId64"\n"),
25302530
j+1, colNames?" <<":"", colNames?(colNames[j].len):0, colNames?(colNamesAnchor+colNames[j].off):"", colNames?">>":"",
25312531
typeName[IGNORE_BUMP(joldType)], typeName[IGNORE_BUMP(thisType)],
2532-
(int)(tch-fieldStart), fieldStart, (uint64_t)(ctx.DTi+myNrow));
2532+
(int)(tch-fieldStart), fieldStart, (int64_t)(ctx.DTi+myNrow));
25332533
if (len > 1000) len = 1000;
25342534
if (len > 0) {
25352535
typeBumpMsg = realloc(typeBumpMsg, typeBumpMsgSize + len + 1);
@@ -2576,7 +2576,7 @@ int freadMain(freadMainArgs _args) {
25762576
}
25772577
else if (headPos!=thisJumpStart && nrowLimit>0) { // do not care for dirty jumps since we do not read data and only want to know types
25782578
// # nocov start
2579-
snprintf(internalErr, internalErrSize, "invalid head position. jump=%d, headPos=%p, thisJumpStart=%p, sof=%p", jump, (void*)headPos, (void*)thisJumpStart, (void*)sof); // # notranslate
2579+
snprintf(internalErr, internalErrSize, "invalid head position. jump=%d, headPos=%p, thisJumpStart=%p, sof=%p", jump, headPos, thisJumpStart, sof); // # notranslate
25802580
stopTeam = true;
25812581
// # nocov end
25822582
}
@@ -2649,7 +2649,7 @@ int freadMain(freadMainArgs _args) {
26492649
}
26502650
dropFill = malloc(sizeof(*dropFill) * ndropFill);
26512651
if (!dropFill)
2652-
STOP(_("Failed to allocate %d bytes for '%s'."), (int)(sizeof(*dropFill) * ndropFill), "dropFill"); // # nocov
2652+
STOP(_("Failed to allocate %zu bytes for '%s'."), sizeof(*dropFill) * ndropFill, "dropFill"); // # nocov
26532653
int i=0;
26542654
for (int j=max_col; j<ncol; ++j) {
26552655
type[j] = CT_DROP;
@@ -2670,15 +2670,15 @@ int freadMain(freadMainArgs _args) {
26702670
if (extraAllocRows && nrowLimit>0) { // no allocating needed for nrows=0
26712671
allocnrow += extraAllocRows;
26722672
if (allocnrow > nrowLimit) allocnrow = nrowLimit;
2673-
if (verbose) DTPRINT(_(" Too few rows allocated. Allocating additional %"PRIu64" rows (now nrows=%"PRIu64") and continue reading from jump %d\n"),
2674-
(uint64_t)extraAllocRows, (uint64_t)allocnrow, jump0);
2673+
if (verbose) DTPRINT(_(" Too few rows allocated. Allocating additional %"PRId64" rows (now nrows=%"PRId64") and continue reading from jump %d\n"),
2674+
extraAllocRows, allocnrow, jump0);
26752675
allocateDT(type, size, ncol, ncol - nStringCols - nNonStringCols, allocnrow);
26762676
extraAllocRows = 0;
26772677
continue;
26782678
}
26792679
if (restartTeam && nrowLimit>0) { // no restarting needed for nrows=0 since we discard read data anyway
26802680
if (verbose) DTPRINT(_(" Restarting team from jump %d. nSwept==%d quoteRule==%d\n"), jump0, nSwept, quoteRule);
2681-
ASSERT(nSwept>0 || quoteRuleBumpedCh!=NULL, "team restart but nSwept==%d and quoteRuleBumpedCh==%p", nSwept, (void *)quoteRuleBumpedCh); // # nocov
2681+
ASSERT(nSwept>0 || quoteRuleBumpedCh!=NULL, "team restart but nSwept==%d and quoteRuleBumpedCh==%p", nSwept, quoteRuleBumpedCh); // # nocov
26822682
continue;
26832683
}
26842684
// else nrowLimit applied and stopped early normally
@@ -2763,17 +2763,17 @@ int freadMain(freadMainArgs _args) {
27632763
ch = headPos;
27642764
int tt = countfields(&ch);
27652765
if (fill>0) {
2766-
DTWARN(_("Stopped early on line %"PRIu64". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>"),
2767-
(uint64_t)DTi+row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {}, 500));
2766+
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>"),
2767+
DTi+row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {}, 500));
27682768
} else {
2769-
DTWARN(_("Stopped early on line %"PRIu64". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>"),
2770-
(uint64_t)DTi+row1line, ncol, tt, strlim(skippedFooter, (char[500]) {}, 500));
2769+
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>"),
2770+
DTi+row1line, ncol, tt, strlim(skippedFooter, (char[500]) {}, 500));
27712771
}
27722772
}
27732773
}
27742774
}
27752775
if (quoteRuleBumpedCh!=NULL && quoteRuleBumpedCh<headPos) {
2776-
DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRIu64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), (uint64_t)quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[500]) {}, 500));
2776+
DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRId64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[500]) {}, 500));
27772777
}
27782778

27792779
if (verbose) {
@@ -2783,14 +2783,14 @@ int freadMain(freadMainArgs _args) {
27832783
DTPRINT(_("%8.3fs (%3.0f%%) sep="), tLayout-tMap, 100.0*(tLayout-tMap)/tTot);
27842784
DTPRINT(sep=='\t' ? "'\\t'" : (sep=='\n' ? "'\\n'" : "'%c'"), sep); // # notranslate
27852785
DTPRINT(_(" ncol=%d and header detection\n"), ncol);
2786-
DTPRINT(_("%8.3fs (%3.0f%%) Column type detection using %"PRIu64" sample rows\n"),
2787-
tColType-tLayout, 100.0*(tColType-tLayout)/tTot, (uint64_t)sampleLines);
2788-
DTPRINT(_("%8.3fs (%3.0f%%) Allocation of %"PRIu64" rows x %d cols (%.3fGB) of which %"PRIu64" (%3.0f%%) rows used\n"),
2789-
tAlloc-tColType, 100.0*(tAlloc-tColType)/tTot, (uint64_t)allocnrow, ncol, DTbytes/(1024.0*1024*1024), (uint64_t)DTi, 100.0*DTi/allocnrow);
2786+
DTPRINT(_("%8.3fs (%3.0f%%) Column type detection using %"PRId64" sample rows\n"),
2787+
tColType-tLayout, 100.0*(tColType-tLayout)/tTot, sampleLines);
2788+
DTPRINT(_("%8.3fs (%3.0f%%) Allocation of %"PRId64" rows x %d cols (%.3fGB) of which %"PRId64" (%3.0f%%) rows used\n"),
2789+
tAlloc-tColType, 100.0*(tAlloc-tColType)/tTot, allocnrow, ncol, DTbytes/(1024.0*1024*1024), DTi, 100.0*DTi/allocnrow);
27902790
thRead/=nth; thPush/=nth;
27912791
double thWaiting = tReread-tAlloc-thRead-thPush;
2792-
DTPRINT(_("%8.3fs (%3.0f%%) Reading %d chunks (%d swept) of %.3fMB (each chunk %d rows) using %d threads\n"),
2793-
tReread-tAlloc, 100.0*(tReread-tAlloc)/tTot, nJumps, nSwept, (double)chunkBytes/(1024*1024), (int)(DTi/nJumps), nth);
2792+
DTPRINT(_("%8.3fs (%3.0f%%) Reading %d chunks (%d swept) of %.3fMB (each chunk %"PRId64" rows) using% d threads\n"),
2793+
tReread-tAlloc, 100.0*(tReread-tAlloc)/tTot, nJumps, nSwept, (double)chunkBytes/(1024*1024), DTi/nJumps, nth);
27942794
DTPRINT(_(" + %8.3fs (%3.0f%%) Parse to row-major thread buffers (grown %d times)\n"), thRead, 100.0*thRead/tTot, buffGrown);
27952795
DTPRINT(_(" + %8.3fs (%3.0f%%) Transpose\n"), thPush, 100.0*thPush/tTot);
27962796
DTPRINT(_(" + %8.3fs (%3.0f%%) Waiting\n"), thWaiting, 100.0*thWaiting/tTot);

0 commit comments

Comments
 (0)