@@ -1361,7 +1361,7 @@ int freadMain(freadMainArgs _args) {
13611361 else
13621362 DTPRINT (_ (" None of the NAstrings look like numbers.\n" ));
13631363 }
1364- if (args .skipNrow >= 0 ) DTPRINT (_ (" skip num lines = %" PRId64 "\n" ), ( int64_t ) args .skipNrow );
1364+ if (args .skipNrow >= 0 ) DTPRINT (_ (" skip num lines = %" PRId64 "\n" ), args .skipNrow );
13651365 if (args .skipString ) DTPRINT (_ (" skip to string = <<%s>>\n" ), args .skipString );
13661366 DTPRINT (_ (" show progress = %d\n" ), args .showProgress );
13671367 DTPRINT (_ (" 0/1 column will be read as %s\n" ), args .logical01 ? "boolean" : "integer" );
@@ -1614,8 +1614,8 @@ int freadMain(freadMainArgs _args) {
16141614 pos = ch ;
16151615 ch = sof ;
16161616 while (ch < pos ) row1line += (* ch ++ == '\n' );
1617- if (verbose ) DTPRINT (_ ("Found skip='%s' on line %" PRIu64 " . Taking this to be header row or first row of data.\n" ),
1618- args .skipString , ( uint64_t ) row1line );
1617+ if (verbose ) DTPRINT (_ ("Found skip='%s' on line %d . Taking this to be header row or first row of data.\n" ),
1618+ args .skipString , row1line );
16191619 ch = pos ;
16201620 }
16211621 else if (args .skipNrow >= 0 ) {
@@ -1628,12 +1628,12 @@ int freadMain(freadMainArgs _args) {
16281628 }
16291629 }
16301630 if (ch > sof && verbose )
1631- DTPRINT (_ (" Skipped to line %" PRIu64 " in the file" ), ( uint64_t ) row1line );
1631+ DTPRINT (_ (" Skipped to line %d in the file" ), row1line );
16321632 if (ch >=eof )
16331633 STOP (Pl_ (row1line ,
1634- "skip=%" PRIu64 " but the input only has %" PRIu64 " line" ,
1635- "skip=%" PRIu64 " but the input only has %" PRIu64 " lines" ),
1636- ( uint64_t ) args .skipNrow , ( uint64_t ) row1line );
1634+ "skip=%" PRId64 " but the input only has %d line" ,
1635+ "skip=%" PRId64 " but the input only has %d lines" ),
1636+ args .skipNrow , row1line );
16371637 pos = ch ;
16381638 }
16391639
@@ -1918,12 +1918,12 @@ int freadMain(freadMainArgs _args) {
19181918 }
19191919 if (verbose ) {
19201920 if (nrowLimit < INT64_MAX ) {
1921- DTPRINT (_ (" Number of sampling jump points = %d because nrow limit (%" PRIu64 ") supplied\n" ), nJumps , ( uint64_t ) nrowLimit );
1921+ DTPRINT (_ (" Number of sampling jump points = %d because nrow limit (%" PRId64 ") supplied\n" ), nJumps , nrowLimit );
19221922 } else if (jump0size == 0 ) {
19231923 DTPRINT (_ (" Number of sampling jump points = %d because jump0size==0\n" ), nJumps );
19241924 } else {
1925- DTPRINT (_ (" Number of sampling jump points = %d because (%ld bytes from row 1 to eof) / (2 * %ld jump0size) == %ld \n" ),
1926- nJumps , ( long int ) sz , ( long int ) jump0size , ( long int )( sz /(2 * jump0size ) ));
1925+ DTPRINT (_ (" Number of sampling jump points = %d because (%td bytes from row 1 to eof) / (2 * %td jump0size) == %td \n" ),
1926+ nJumps , sz , jump0size , sz /(2 * jump0size ));
19271927 }
19281928 }
19291929 nJumps ++ ; // the extra sample at the very end (up to eof) is sampled and format checked but not jumped to when reading
@@ -2102,7 +2102,7 @@ int freadMain(freadMainArgs _args) {
21022102 bytesRead = 0 ; // Bytes in the data section (i.e. excluding column names, header and footer, if any)
21032103
21042104 if (sampleLines <= jumpLines ) {
2105- if (verbose ) DTPRINT (_ (" All rows were sampled since file is small so we know nrow=%" PRIu64 " exactly\n" ), ( uint64_t ) sampleLines );
2105+ if (verbose ) DTPRINT (_ (" All rows were sampled since file is small so we know nrow=%" PRId64 " exactly\n" ), sampleLines );
21062106 estnrow = allocnrow = sampleLines ;
21072107 } else {
21082108 bytesRead = eof - firstRowStart ;
@@ -2115,19 +2115,19 @@ int freadMain(freadMainArgs _args) {
21152115 // blank lines have length 1 so for fill=true apply a +100% maximum. It'll be grown if needed.
21162116 if (verbose ) {
21172117 DTPRINT (" =====\n" ); // # notranslate
2118- DTPRINT (_ (" Sampled %" PRIu64 " rows (handled \\n inside quoted fields) at %d jump points\n" ), ( uint64_t ) sampleLines , nJumps );
2119- DTPRINT (_ (" Bytes from first data row on line %d to the end of last row: %" PRIu64 " \n" ), row1line , ( uint64_t ) bytesRead );
2118+ DTPRINT (_ (" Sampled %" PRId64 " rows (handled \\n inside quoted fields) at %d jump points\n" ), sampleLines , nJumps );
2119+ DTPRINT (_ (" Bytes from first data row on line %d to the end of last row: %td \n" ), row1line , bytesRead );
21202120 DTPRINT (_ (" Line length: mean=%.2f sd=%.2f min=%d max=%d\n" ), meanLineLen , sd , minLen , maxLen );
2121- DTPRINT (_ (" Estimated number of rows: %" PRIu64 " / %.2f = %"PRIu64 "\n" ), ( uint64_t ) bytesRead , meanLineLen , ( uint64_t ) estnrow );
2122- DTPRINT (_ (" Initial alloc = %" PRIu64 " rows (%" PRIu64 " + %d%%) using bytes/max(mean-2*sd,min) clamped between [1.1*estn, 2.0*estn]\n" ),
2123- ( uint64_t ) allocnrow , ( uint64_t ) estnrow , (int )(100.0 * allocnrow /estnrow - 100.0 ));
2121+ DTPRINT (_ (" Estimated number of rows: %td / %.2f = %" PRId64 "\n" ), bytesRead , meanLineLen , estnrow );
2122+ DTPRINT (_ (" Initial alloc = %" PRId64 " rows (%" PRId64 " + %d%%) using bytes/max(mean-2*sd,min) clamped between [1.1*estn, 2.0*estn]\n" ),
2123+ allocnrow , estnrow , (int )(100.0 * allocnrow /estnrow - 100.0 ));
21242124 DTPRINT (" =====\n" ); // # notranslate
21252125 } else {
2126- if (sampleLines > allocnrow ) INTERNAL_STOP ("sampleLines(%" PRIu64 ") > allocnrow(%" PRIu64 ")" , ( uint64_t ) sampleLines , ( uint64_t ) allocnrow ); // # nocov
2126+ if (sampleLines > allocnrow ) INTERNAL_STOP ("sampleLines(%" PRId64 ") > allocnrow(%" PRId64 ")" , sampleLines , allocnrow ); // # nocov
21272127 }
21282128 }
21292129 if (nrowLimit < allocnrow ) {
2130- if (verbose ) DTPRINT (_ (" Alloc limited to lower nrows=%" PRIu64 " passed in.\n" ), ( uint64_t ) nrowLimit );
2130+ if (verbose ) DTPRINT (_ (" Alloc limited to lower nrows=%" PRId64 " passed in.\n" ), nrowLimit );
21312131 estnrow = allocnrow = nrowLimit ;
21322132 }
21332133 }
@@ -2203,7 +2203,7 @@ int freadMain(freadMainArgs _args) {
22032203 rowSize8 = 0 ;
22042204 size = malloc (sizeof (* size ) * ncol ); // TODO: remove size[] when we implement Pasha's idea to += size inside processor
22052205 if (!size )
2206- STOP (_ ("Failed to allocate %d bytes for '%s': %s" ), ( int )( sizeof (* size ) * ncol ) , "size" , strerror (errno )); // # nocov
2206+ STOP (_ ("Failed to allocate %zu bytes for '%s': %s" ), sizeof (* size ) * ncol , "size" , strerror (errno )); // # nocov
22072207 nStringCols = 0 ;
22082208 nNonStringCols = 0 ;
22092209 for (int j = 0 ; j < ncol ; j ++ ) {
@@ -2234,8 +2234,8 @@ int freadMain(freadMainArgs _args) {
22342234 //*********************************************************************************************
22352235 if (verbose ) {
22362236 DTPRINT (_ ("[10] Allocate memory for the datatable\n" ));
2237- DTPRINT (_ (" Allocating %d column slots (%d - %d dropped) with %" PRIu64 " rows\n" ),
2238- ncol - ndrop , ncol , ndrop , ( uint64_t ) allocnrow );
2237+ DTPRINT (_ (" Allocating %d column slots (%d - %d dropped) with %" PRId64 " rows\n" ),
2238+ ncol - ndrop , ncol , ndrop , allocnrow );
22392239 }
22402240 size_t DTbytes = allocateDT (type , size , ncol , ndrop , allocnrow );
22412241 double tAlloc = wallclock ();
@@ -2263,7 +2263,7 @@ int freadMain(freadMainArgs _args) {
22632263 // For the 44GB file with 12875 columns, the max line len is 108,497. We may want each chunk to write to its
22642264 // own page (4k) of the final column, hence 1000 rows of the smallest type (4 byte int) is just
22652265 // under 4096 to leave space for R's header + malloc's header.
2266- size_t chunkBytes = umax ((size_t )(1000 * meanLineLen ), 1ULL /*MB*/ * 1024 * 1024 );
2266+ size_t chunkBytes = umax ((uint64_t )(1000 * meanLineLen ), 1ULL /*MB*/ * 1024 * 1024 );
22672267 // Index of the first jump to read. May be modified if we ever need to restart
22682268 // reading from the middle of the file.
22692269 int jump0 = 0 ;
@@ -2282,22 +2282,22 @@ int freadMain(freadMainArgs _args) {
22822282 ASSERT (nJumps == 1 /*when nrowLimit supplied*/ || nJumps == 2 /*small files*/ , "nJumps (%d) != 1|2" , nJumps );
22832283 nJumps = 1 ;
22842284 }
2285- int64_t initialBuffRows = ( int64_t ) allocnrow / nJumps ;
2285+ int64_t initialBuffRows = allocnrow / nJumps ;
22862286
22872287 // Catch initialBuffRows==0 when max_nrows is small, seg fault #2243
22882288 // Rather than 10, maybe 1 would work too but then 1.5 grow factor * 1 would still be 1. This clamp
22892289 // should only engage when max_nrows is supplied, and supplied small too, so doesn't matter too much.
22902290 if (initialBuffRows < 10 ) initialBuffRows = 10 ;
22912291
2292- if (initialBuffRows > INT32_MAX ) STOP (_ ("Buffer size %" PRId64 " is too large\n" ), ( int64_t ) initialBuffRows );
2292+ if (initialBuffRows > INT32_MAX ) STOP (_ ("Buffer size %" PRId64 " is too large\n" ), initialBuffRows );
22932293 nth = imin (nJumps , nth );
22942294
22952295 if (verbose ) DTPRINT (_ ("[11] Read the data\n" ));
22962296 while (true){ // we'll return here to reread any columns with out-of-sample type exceptions, or dirty jumps
22972297 restartTeam = false;
22982298 if (verbose )
2299- DTPRINT (" jumps=[%d..%d), chunk_size=%" PRIu64 " , total_size=%" PRIu64 " \n" , jump0 , nJumps , ( uint64_t ) chunkBytes , ( uint64_t )( eof - pos ) ); // # notranslate
2300- ASSERT (allocnrow <= nrowLimit , "allocnrow(%" PRIu64 ") <= nrowLimit(%" PRIu64 ")" , ( uint64_t ) allocnrow , ( uint64_t ) nrowLimit );
2299+ DTPRINT (" jumps=[%d..%d), chunk_size=%zu , total_size=%td \n" , jump0 , nJumps , chunkBytes , eof - pos ); // # notranslate
2300+ ASSERT (allocnrow <= nrowLimit , "allocnrow(%" PRId64 ") <= nrowLimit(%" PRId64 ")" , allocnrow , nrowLimit );
23012301 #pragma omp parallel num_threads(nth)
23022302 {
23032303 int me = omp_get_thread_num ();
@@ -2526,10 +2526,10 @@ int freadMain(freadMainArgs _args) {
25262526 if (verbose ) {
25272527 char temp [1001 ];
25282528 int len = snprintf (temp , 1000 ,
2529- _ ("Column %d%s%.*s%s bumped from '%s' to '%s' due to <<%.*s>> on row %" PRIu64 "\n" ),
2529+ _ ("Column %d%s%.*s%s bumped from '%s' to '%s' due to <<%.*s>> on row %" PRId64 "\n" ),
25302530 j + 1 , colNames ?" <<" :"" , colNames ?(colNames [j ].len ):0 , colNames ?(colNamesAnchor + colNames [j ].off ):"" , colNames ?">>" :"" ,
25312531 typeName [IGNORE_BUMP (joldType )], typeName [IGNORE_BUMP (thisType )],
2532- (int )(tch - fieldStart ), fieldStart , (uint64_t )(ctx .DTi + myNrow ));
2532+ (int )(tch - fieldStart ), fieldStart , (int64_t )(ctx .DTi + myNrow ));
25332533 if (len > 1000 ) len = 1000 ;
25342534 if (len > 0 ) {
25352535 typeBumpMsg = realloc (typeBumpMsg , typeBumpMsgSize + len + 1 );
@@ -2576,7 +2576,7 @@ int freadMain(freadMainArgs _args) {
25762576 }
25772577 else if (headPos != thisJumpStart && nrowLimit > 0 ) { // do not care for dirty jumps since we do not read data and only want to know types
25782578 // # nocov start
2579- snprintf (internalErr , internalErrSize , "invalid head position. jump=%d, headPos=%p, thisJumpStart=%p, sof=%p" , jump , ( void * ) headPos , ( void * ) thisJumpStart , ( void * ) sof ); // # notranslate
2579+ snprintf (internalErr , internalErrSize , "invalid head position. jump=%d, headPos=%p, thisJumpStart=%p, sof=%p" , jump , headPos , thisJumpStart , sof ); // # notranslate
25802580 stopTeam = true;
25812581 // # nocov end
25822582 }
@@ -2649,7 +2649,7 @@ int freadMain(freadMainArgs _args) {
26492649 }
26502650 dropFill = malloc (sizeof (* dropFill ) * ndropFill );
26512651 if (!dropFill )
2652- STOP (_ ("Failed to allocate %d bytes for '%s'." ), ( int )( sizeof (* dropFill ) * ndropFill ) , "dropFill" ); // # nocov
2652+ STOP (_ ("Failed to allocate %zu bytes for '%s'." ), sizeof (* dropFill ) * ndropFill , "dropFill" ); // # nocov
26532653 int i = 0 ;
26542654 for (int j = max_col ; j < ncol ; ++ j ) {
26552655 type [j ] = CT_DROP ;
@@ -2670,15 +2670,15 @@ int freadMain(freadMainArgs _args) {
26702670 if (extraAllocRows && nrowLimit > 0 ) { // no allocating needed for nrows=0
26712671 allocnrow += extraAllocRows ;
26722672 if (allocnrow > nrowLimit ) allocnrow = nrowLimit ;
2673- if (verbose ) DTPRINT (_ (" Too few rows allocated. Allocating additional %" PRIu64 " rows (now nrows=%" PRIu64 ") and continue reading from jump %d\n" ),
2674- ( uint64_t ) extraAllocRows , ( uint64_t ) allocnrow , jump0 );
2673+ if (verbose ) DTPRINT (_ (" Too few rows allocated. Allocating additional %" PRId64 " rows (now nrows=%" PRId64 ") and continue reading from jump %d\n" ),
2674+ extraAllocRows , allocnrow , jump0 );
26752675 allocateDT (type , size , ncol , ncol - nStringCols - nNonStringCols , allocnrow );
26762676 extraAllocRows = 0 ;
26772677 continue ;
26782678 }
26792679 if (restartTeam && nrowLimit > 0 ) { // no restarting needed for nrows=0 since we discard read data anyway
26802680 if (verbose ) DTPRINT (_ (" Restarting team from jump %d. nSwept==%d quoteRule==%d\n" ), jump0 , nSwept , quoteRule );
2681- ASSERT (nSwept > 0 || quoteRuleBumpedCh != NULL , "team restart but nSwept==%d and quoteRuleBumpedCh==%p" , nSwept , ( void * ) quoteRuleBumpedCh ); // # nocov
2681+ ASSERT (nSwept > 0 || quoteRuleBumpedCh != NULL , "team restart but nSwept==%d and quoteRuleBumpedCh==%p" , nSwept , quoteRuleBumpedCh ); // # nocov
26822682 continue ;
26832683 }
26842684 // else nrowLimit applied and stopped early normally
@@ -2763,17 +2763,17 @@ int freadMain(freadMainArgs _args) {
27632763 ch = headPos ;
27642764 int tt = countfields (& ch );
27652765 if (fill > 0 ) {
2766- DTWARN (_ ("Stopped early on line %" PRIu64 ". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>" ),
2767- ( uint64_t ) DTi + row1line , ncol , tt , tt , strlim (skippedFooter , (char [500 ]) {}, 500 ));
2766+ DTWARN (_ ("Stopped early on line %" PRId64 ". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>" ),
2767+ DTi + row1line , ncol , tt , tt , strlim (skippedFooter , (char [500 ]) {}, 500 ));
27682768 } else {
2769- DTWARN (_ ("Stopped early on line %" PRIu64 ". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>" ),
2770- ( uint64_t ) DTi + row1line , ncol , tt , strlim (skippedFooter , (char [500 ]) {}, 500 ));
2769+ DTWARN (_ ("Stopped early on line %" PRId64 ". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>" ),
2770+ DTi + row1line , ncol , tt , strlim (skippedFooter , (char [500 ]) {}, 500 ));
27712771 }
27722772 }
27732773 }
27742774 }
27752775 if (quoteRuleBumpedCh != NULL && quoteRuleBumpedCh < headPos ) {
2776- DTWARN (_ ("Found and resolved improper quoting out-of-sample. First healed line %" PRIu64 ": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning." ), ( uint64_t ) quoteRuleBumpedLine , strlim (quoteRuleBumpedCh , (char [500 ]) {}, 500 ));
2776+ DTWARN (_ ("Found and resolved improper quoting out-of-sample. First healed line %" PRId64 ": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning." ), quoteRuleBumpedLine , strlim (quoteRuleBumpedCh , (char [500 ]) {}, 500 ));
27772777 }
27782778
27792779 if (verbose ) {
@@ -2783,14 +2783,14 @@ int freadMain(freadMainArgs _args) {
27832783 DTPRINT (_ ("%8.3fs (%3.0f%%) sep=" ), tLayout - tMap , 100.0 * (tLayout - tMap )/tTot );
27842784 DTPRINT (sep == '\t' ? "'\\t'" : (sep == '\n' ? "'\\n'" : "'%c'" ), sep ); // # notranslate
27852785 DTPRINT (_ (" ncol=%d and header detection\n" ), ncol );
2786- DTPRINT (_ ("%8.3fs (%3.0f%%) Column type detection using %" PRIu64 " sample rows\n" ),
2787- tColType - tLayout , 100.0 * (tColType - tLayout )/tTot , ( uint64_t ) sampleLines );
2788- DTPRINT (_ ("%8.3fs (%3.0f%%) Allocation of %" PRIu64 " rows x %d cols (%.3fGB) of which %" PRIu64 " (%3.0f%%) rows used\n" ),
2789- tAlloc - tColType , 100.0 * (tAlloc - tColType )/tTot , ( uint64_t ) allocnrow , ncol , DTbytes /(1024.0 * 1024 * 1024 ), ( uint64_t ) DTi , 100.0 * DTi /allocnrow );
2786+ DTPRINT (_ ("%8.3fs (%3.0f%%) Column type detection using %" PRId64 " sample rows\n" ),
2787+ tColType - tLayout , 100.0 * (tColType - tLayout )/tTot , sampleLines );
2788+ DTPRINT (_ ("%8.3fs (%3.0f%%) Allocation of %" PRId64 " rows x %d cols (%.3fGB) of which %" PRId64 " (%3.0f%%) rows used\n" ),
2789+ tAlloc - tColType , 100.0 * (tAlloc - tColType )/tTot , allocnrow , ncol , DTbytes /(1024.0 * 1024 * 1024 ), DTi , 100.0 * DTi /allocnrow );
27902790 thRead /=nth ; thPush /=nth ;
27912791 double thWaiting = tReread - tAlloc - thRead - thPush ;
2792- DTPRINT (_ ("%8.3fs (%3.0f%%) Reading %d chunks (%d swept) of %.3fMB (each chunk %d rows) using % d threads\n" ),
2793- tReread - tAlloc , 100.0 * (tReread - tAlloc )/tTot , nJumps , nSwept , (double )chunkBytes /(1024 * 1024 ), ( int )( DTi /nJumps ) , nth );
2792+ DTPRINT (_ ("%8.3fs (%3.0f%%) Reading %d chunks (%d swept) of %.3fMB (each chunk %" PRId64 " rows) using% d threads\n" ),
2793+ tReread - tAlloc , 100.0 * (tReread - tAlloc )/tTot , nJumps , nSwept , (double )chunkBytes /(1024 * 1024 ), DTi /nJumps , nth );
27942794 DTPRINT (_ (" + %8.3fs (%3.0f%%) Parse to row-major thread buffers (grown %d times)\n" ), thRead , 100.0 * thRead /tTot , buffGrown );
27952795 DTPRINT (_ (" + %8.3fs (%3.0f%%) Transpose\n" ), thPush , 100.0 * thPush /tTot );
27962796 DTPRINT (_ (" + %8.3fs (%3.0f%%) Waiting\n" ), thWaiting , 100.0 * thWaiting /tTot );
0 commit comments