@@ -222,9 +222,10 @@ static const char* strlim(const char *ch, char buf[static 500], size_t limit) {
222222
223223static const char * typeLetter = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ;
224224
225- static char * typesAsString (char str [ static 101 ], int ncol ) {
225+ static char * typesAsString (int ncol ) {
226226 int nLetters = strlen (typeLetter );
227227 if (NUMTYPE > nLetters ) INTERNAL_STOP ("NUMTYPE(%d) > nLetters(%d)" , NUMTYPE , nLetters ); // # nocov
228+ static char str [101 ];
228229 int i = 0 ;
229230 if (ncol <= 100 ) {
230231 for (; i < ncol ; i ++ ) str [i ] = typeLetter [IGNORE_BUMP (type [i ])];
@@ -404,9 +405,10 @@ double wallclock(void)
404405 * multiple threads at the same time, or hold on to the value returned for
405406 * extended periods of time.
406407 */
407- static const char * filesize_to_str (char output [ static 100 ], const uint64_t fsize )
408+ static const char * filesize_to_str (const uint64_t fsize )
408409{
409- static const char suffixes [] = {'T' , 'G' , 'M' , 'K' };
410+ static const char suffixes [] = { 'T' , 'G' , 'M' , 'K' };
411+ static char output [100 ];
410412 for (int i = 0 ; i <= sizeof (suffixes ); i ++ ) {
411413 int shift = (sizeof (suffixes ) - i ) * 10 ;
412414 if ((fsize >> shift ) == 0 ) continue ;
@@ -416,18 +418,18 @@ static const char* filesize_to_str(char output[static 100], const uint64_t fsize
416418 }
417419 if (ndigits == 0 || (fsize == (fsize >> shift << shift ))) {
418420 if (i < sizeof (suffixes )) {
419- snprintf (output , 100 , "%" PRIu64 "%ciB (%" PRIu64 " bytes)" , // # notranslate
420- ( fsize >> shift ) , suffixes [i ], fsize );
421+ snprintf (output , sizeof ( output ) , "%" PRIu64 "%ciB (%" PRIu64 " bytes)" , // # notranslate
422+ fsize >> shift , suffixes [i ], fsize );
421423 return output ;
422424 }
423425 } else {
424- snprintf (output , 100 , "%.*f%ciB (%" PRIu64 " bytes)" , // # notranslate
426+ snprintf (output , sizeof ( output ) , "%.*f%ciB (%" PRIu64 " bytes)" , // # notranslate
425427 ndigits , (double )fsize / (1LL << shift ), suffixes [i ], fsize );
426428 return output ;
427429 }
428430 }
429431 if (fsize == 1 ) return "1 byte" ;
430- snprintf (output , 100 , "%" PRIu64 " bytes" , fsize ); // # notranslate
432+ snprintf (output , sizeof ( output ) , "%" PRIu64 " bytes" , fsize ); // # notranslate
431433 return output ;
432434}
433435
@@ -1405,11 +1407,11 @@ int freadMain(freadMainArgs _args) {
14051407 }
14061408 if (stat_buf .st_size > SIZE_MAX ) {
14071409 close (fd ); // # nocov
1408- STOP (_ ("File size [%s] exceeds the address space: %s" ), filesize_to_str (( char [ 100 ]) {}, stat_buf .st_size ), fnam ); // # nocov
1410+ STOP (_ ("File size [%s] exceeds the address space: %s" ), filesize_to_str (stat_buf .st_size ), fnam ); // # nocov
14091411 }
14101412 fileSize = (size_t ) stat_buf .st_size ;
14111413 if (fileSize == 0 ) {close (fd ); STOP (_ ("File is empty: %s" ), fnam );}
1412- if (verbose ) DTPRINT (_ (" File opened, size = %s.\n" ), filesize_to_str (( char [ 100 ]) {}, fileSize ));
1414+ if (verbose ) DTPRINT (_ (" File opened, size = %s.\n" ), filesize_to_str (fileSize ));
14131415
14141416 // No MAP_POPULATE for faster nrows=10 and to make possible earlier progress bar in row count stage
14151417 // Mac doesn't appear to support MAP_POPULATE anyway (failed on CRAN when I tried).
@@ -1441,20 +1443,20 @@ int freadMain(freadMainArgs _args) {
14411443 if (GetFileSizeEx (hFile , & liFileSize )== 0 ) { CloseHandle (hFile ); STOP (_ ("GetFileSizeEx failed (returned 0) on file: %s" ), fnam ); }
14421444 if (liFileSize .QuadPart > SIZE_MAX ) {
14431445 CloseHandle (hFile ); // # nocov
1444- STOP (_ ("File size [%s] exceeds the address space: %s" ), filesize_to_str (( char [ 100 ]) {}, liFileSize .QuadPart ), fnam ); // # nocov
1446+ STOP (_ ("File size [%s] exceeds the address space: %s" ), filesize_to_str (liFileSize .QuadPart ), fnam ); // # nocov
14451447 }
14461448 fileSize = (size_t )liFileSize .QuadPart ;
1447- if (fileSize == 0 ) { CloseHandle (hFile ); STOP (_ ("File is empty: %s" ), fnam ); }
1448- if (verbose ) DTPRINT (_ (" File opened, size = %s.\n" ), filesize_to_str (( char [ 100 ]) {}, fileSize ));
1449- HANDLE hMap = CreateFileMapping (hFile , NULL , PAGE_WRITECOPY , 0 , 0 , NULL );
1449+ if (fileSize == 0 ) { CloseHandle (hFile ); STOP (_ ("File is empty: %s" ), fnam ); }
1450+ if (verbose ) DTPRINT (_ (" File opened, size = %s.\n" ), filesize_to_str (fileSize ));
1451+ HANDLE hMap = CreateFileMapping (hFile , NULL , PAGE_WRITECOPY , 0 , 0 , NULL );
14501452 if (hMap == NULL ) { CloseHandle (hFile ); STOP (_ ("This is Windows, CreateFileMapping returned error %lu for file %s" ), GetLastError (), fnam ); }
1451- mmp = MapViewOfFile (hMap , FILE_MAP_COPY , 0 , 0 , fileSize ); // fileSize must be <= hilo passed to CreateFileMapping above.
1453+ mmp = MapViewOfFile (hMap ,FILE_MAP_COPY ,0 , 0 , fileSize ); // fileSize must be <= hilo passed to CreateFileMapping above.
14521454 CloseHandle (hMap ); // we don't need to keep the file open; the MapView keeps an internal reference;
14531455 CloseHandle (hFile ); // see https://msdn.microsoft.com/en-us/library/windows/desktop/aa366537(v=vs.85).aspx
14541456 if (mmp == NULL ) {
14551457 #endif
14561458 int nbit = 8 * sizeof (char * ); // #nocov
1457- STOP (_ ("Opened %s file ok but could not memory map it. This is a %dbit process. %s." ), filesize_to_str (( char [ 100 ]) {}, fileSize ), nbit , // # nocov
1459+ STOP (_ ("Opened %s file ok but could not memory map it. This is a %dbit process. %s." ), filesize_to_str (fileSize ), nbit , // # nocov
14581460 nbit <= 32 ? _ ("Please upgrade to 64bit" ) : _ ("There is probably not enough contiguous virtual memory available" )); // # nocov
14591461 }
14601462 sof = (const char * ) mmp ;
@@ -1550,7 +1552,7 @@ int freadMain(freadMainArgs _args) {
15501552 // # nocov start
15511553 if (!verbose )
15521554 DTPRINT (_ ("%s. Attempt to copy file in RAM failed." ), msg );
1553- STOP (_ ("Unable to allocate %s of contiguous virtual RAM." ), filesize_to_str (( char [ 100 ]) {}, fileSize ));
1555+ STOP (_ ("Unable to allocate %s of contiguous virtual RAM." ), filesize_to_str (fileSize ));
15541556 // # nocov end
15551557 }
15561558 if (verbose )
@@ -1826,33 +1828,31 @@ int freadMain(freadMainArgs _args) {
18261828 DTPRINT (_ (" Quote rule picked = %d\n" ), quoteRule );
18271829 DTPRINT (_ (" fill=%s and the most number of columns found is %d\n" ), fill ? "true" : "false" , ncol );
18281830 }
1829- }
1830-
1831- if (ncol < 1 || row1line < 1 ) INTERNAL_STOP ("ncol==%d line==%d after detecting sep, ncol and first line" , ncol , row1line ); // # nocov
1832- int tt = countfields (& ch );
1833- ch = pos ; // move back to start of line since countfields() moved to next
1834- if (!fill && tt != ncol ) INTERNAL_STOP ("first line has field count %d but expecting %d" , tt , ncol ); // # nocov
1835- if (verbose ) {
1836- DTPRINT (_ (" Detected %d columns on line %d. This line is either column names or first data row. Line starts as: <<%s>>\n" ),
1837- tt , row1line , strlim (pos , (char [500 ]) {}, 30 ));
1838- DTPRINT (_ (" Quote rule picked = %d\n" ), quoteRule );
1839- DTPRINT (_ (" fill=%s and the most number of columns found is %d\n" ), fill ?"true" :"false" , ncol );
1840- }
1841-
1842- if (ncol == 1 && lastEOLreplaced && (eof [-1 ]== '\n' || eof [-1 ]== '\r' )) {
1843- // Multiple newlines at the end are significant in the case of 1-column files only (multiple NA at the end)
1844- if (fileSize % 4096 == 0 ) {
1845- const char * msg = _ ("This file is very unusual: it's one single column, ends with 2 or more end-of-line (representing several NA at the end), and the file size is a multiple of 4096, too" );
1846- if (verbose )
1847- DTPRINT (_ (" Copying file in RAM. %s\n" ), msg );
1848- ASSERT (mmp_copy == NULL , "mmp has already been copied due to abrupt non-eol ending, so it does not end with 2 or more eol.%s" , "" /*dummy arg for macro*/ ); // #nocov
1849- double time_taken = copyFile (fileSize );
1850- if (time_taken == -1.0 ) {
1851- // # nocov start
1852- if (!verbose )
1853- DTPRINT (_ ("%s. Attempt to copy file in RAM failed." ), msg );
1854- STOP (_ ("Unable to allocate %s of contiguous virtual RAM." ), filesize_to_str ((char [100 ]) {}, fileSize ));
1855- // # nocov end
1831+
1832+ if (ncol == 1 && lastEOLreplaced && (eof [-1 ] == '\n' || eof [-1 ] == '\r' )) {
1833+ // Multiple newlines at the end are significant in the case of 1-column files only (multiple NA at the end)
1834+ if (fileSize % 4096 == 0 ) {
1835+ const char * msg = _ ("This file is very unusual: it's one single column, ends with 2 or more end-of-line (representing several NA at the end), and the file size is a multiple of 4096, too" );
1836+ if (verbose )
1837+ DTPRINT (_ (" Copying file in RAM. %s\n" ), msg );
1838+ ASSERT (mmp_copy == NULL , "mmp has already been copied due to abrupt non-eol ending, so it does not end with 2 or more eol.%s" , "" /*dummy arg for macro*/ ); // #nocov
1839+ double time_taken = copyFile (fileSize );
1840+ if (time_taken == -1.0 ) {
1841+ // # nocov start
1842+ if (!verbose )
1843+ DTPRINT (_ ("%s. Attempt to copy file in RAM failed." ), msg );
1844+ STOP (_ ("Unable to allocate %s of contiguous virtual RAM." ), filesize_to_str (fileSize ));
1845+ // # nocov end
1846+ }
1847+ if (verbose )
1848+ DTPRINT (_ (" File copy in RAM took %.3f seconds.\n" ), time_taken );
1849+ else if (tt > 0.5 ) // # nocov
1850+ DTPRINT (_ ("Avoidable file copy in RAM took %.3f seconds. %s.\n" ), time_taken , msg ); // # nocov. not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded
1851+ pos = sof + (pos - (const char * )mmp );
1852+ firstJumpEnd = sof + (firstJumpEnd - (const char * )mmp );
1853+ } else {
1854+ if (verbose ) DTPRINT (_ (" 1-column file ends with 2 or more end-of-line. Restoring last eol using extra byte in cow page.\n" ));
1855+ eof ++ ;
18561856 }
18571857 * const_cast (eof - 1 ) = eol_one_r ? '\r' : '\n' ;
18581858 * const_cast (eof ) = '\0' ;
@@ -1986,10 +1986,8 @@ int freadMain(freadMainArgs _args) {
19861986 ASSERT (jump > 0 , "jump(%d)>0" , jump );
19871987 memcpy (type , tmpType , ncol );
19881988 }
1989-
19901989 if (verbose && (bumped || jump == 0 || jump == nJumps - 1 )) {
1991- DTPRINT (_ (" Type codes (jump %03d) : %s Quote rule %d\n" ), jump , typesAsString ((char [101 ]) {}, ncol ), quoteRule );
1992-
1990+ DTPRINT (_ (" Type codes (jump %03d) : %s Quote rule %d\n" ), jump , typesAsString (ncol ), quoteRule );
19931991 }
19941992 }
19951993
@@ -2084,7 +2082,7 @@ int freadMain(freadMainArgs _args) {
20842082 type [j ] = tmpType [j ];
20852083 }
20862084 }
2087- if (verbose && bumped ) DTPRINT (_ (" Type codes (first row) : %s Quote rule %d\n" ), typesAsString (( char [ 101 ]) {}, ncol ), quoteRule );
2085+ if (verbose && bumped ) DTPRINT (_ (" Type codes (first row) : %s Quote rule %d\n" ), typesAsString (ncol ), quoteRule );
20882086 }
20892087
20902088 estnrow = 1 ;
@@ -2216,8 +2214,7 @@ int freadMain(freadMainArgs _args) {
22162214 rowSize8 += (size [j ] & 8 );
22172215 if (type [j ] == CT_STRING ) nStringCols ++ ; else nNonStringCols ++ ;
22182216 }
2219-
2220- if (verbose ) DTPRINT (_ (" After %d type and %d drop user overrides : %s\n" ), nUserBumped , ndrop , typesAsString ((char [101 ]) {}, ncol ));
2217+ if (verbose ) DTPRINT (_ (" After %d type and %d drop user overrides : %s\n" ), nUserBumped , ndrop , typesAsString (ncol ));
22212218 tColType = wallclock ();
22222219 }
22232220
@@ -2680,11 +2677,11 @@ int freadMain(freadMainArgs _args) {
26802677 }
26812678 // else nrowLimit applied and stopped early normally
26822679 }
2683-
2680+
26842681 // tell progress meter to finish up; e.g. write final newline
26852682 // if there's a reread, the progress meter will start again from 0
26862683 if (args .showProgress ) progress (100 , 0 );
2687-
2684+
26882685 if (firstTime ) {
26892686 tReread = tRead = wallclock ();
26902687
@@ -2694,7 +2691,7 @@ int freadMain(freadMainArgs _args) {
26942691 for (int i = 0 ; i < ncol ; i ++ ) typeCounts [IGNORE_BUMP (type [i ])]++ ;
26952692
26962693 if (nTypeBump ) {
2697- if (verbose ) DTPRINT (_ (" %d out-of-sample type bumps: %s\n" ), nTypeBump , typesAsString (( char [ 101 ]) {}, ncol ));
2694+ if (verbose ) DTPRINT (_ (" %d out-of-sample type bumps: %s\n" ), nTypeBump , typesAsString (ncol ));
26982695 rowSize1 = rowSize4 = rowSize8 = 0 ;
26992696 nStringCols = 0 ;
27002697 nNonStringCols = 0 ;
@@ -2732,7 +2729,7 @@ int freadMain(freadMainArgs _args) {
27322729 }
27332730 double tTot = tReread - t0 ; // tReread==tRead when there was no reread
27342731 if (verbose ) DTPRINT (_ ("Read %" PRIu64 " rows x %d columns from %s file in %02d:%06.3f wall clock time\n" ),
2735- (uint64_t )DTi , ncol - ndrop , filesize_to_str (( char [ 100 ]) {}, fileSize ), (int )tTot / 60 , fmod (tTot , 60.0 ));
2732+ (uint64_t )DTi , ncol - ndrop , filesize_to_str (fileSize ), (int )tTot / 60 , fmod (tTot , 60.0 ));
27362733
27372734 //*********************************************************************************************
27382735 // [12] Finalize the datatable
@@ -2758,22 +2755,21 @@ int freadMain(freadMainArgs _args) {
27582755 while (ch < eof && isspace (* ch )) ch ++ ;
27592756 if (ch == eof ) {
27602757 DTWARN (_ ("Discarded single-line footer: <<%s>>" ), strlim (skippedFooter , (char [500 ]) {}, 500 ));
2761-
27622758 }
27632759 else {
27642760 ch = headPos ;
27652761 int tt = countfields (& ch );
27662762 if (fill > 0 ) {
27672763 DTWARN (_ ("Stopped early on line %" PRId64 ". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>" ),
2768- DTi + row1line , ncol , tt , tt , strlim (skippedFooter , (char [500 ]) {}, 500 ));
2764+ DTi + row1line , ncol , tt , tt , strlim (skippedFooter , (char [500 ]) {}, 500 ));
27692765 } else {
27702766 DTWARN (_ ("Stopped early on line %" PRId64 ". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>" ),
2771- DTi + row1line , ncol , tt , strlim (skippedFooter , (char [500 ]) {}, 500 ));
2767+ DTi + row1line , ncol , tt , strlim (skippedFooter , (char [500 ]) {}, 500 ));
27722768 }
27732769 }
27742770 }
27752771 }
2776- if (quoteRuleBumpedCh != NULL && quoteRuleBumpedCh < headPos ) {
2772+ if (quoteRuleBumpedCh != NULL && quoteRuleBumpedCh < headPos ) {
27772773 DTWARN (_ ("Found and resolved improper quoting out-of-sample. First healed line %" PRId64 ": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning." ), quoteRuleBumpedLine , strlim (quoteRuleBumpedCh , (char [500 ]) {}, 500 ));
27782774 }
27792775
0 commit comments