@@ -77,7 +77,8 @@ SEXP freadR(
7777 SEXP encodingArg ,
7878 SEXP keepLeadingZerosArgs ,
7979 SEXP noTZasUTC
80- ) {
80+ )
81+ {
8182 verbose = LOGICAL (verboseArg )[0 ];
8283 warningsAreErrors = LOGICAL (warnings2errorsArg )[0 ];
8384
@@ -227,14 +228,16 @@ SEXP freadR(
227228 return DT ;
228229}
229230
230- static void applyDrop (SEXP items , int8_t * type , int ncol , int dropSource ) {
231+ static void applyDrop (SEXP items , int8_t * type , int ncol , int dropSource )
232+ {
231233 if (!length (items )) return ;
232- SEXP itemsInt = PROTECT (isString (items ) ? chmatch (items , colNamesSxp , NA_INTEGER ) : coerceVector (items , INTSXP ));
233- const int * itemsD = INTEGER (itemsInt ), n = LENGTH (itemsInt );
234+ const SEXP itemsInt = PROTECT (isString (items ) ? chmatch (items , colNamesSxp , NA_INTEGER ) : coerceVector (items , INTSXP ));
235+ const int * const itemsD = INTEGER (itemsInt );
236+ const int n = LENGTH (itemsInt );
234237 for (int j = 0 ; j < n ; j ++ ) {
235- int k = itemsD [j ];
238+ const int k = itemsD [j ];
236239 if (k == NA_INTEGER || k < 1 || k > ncol ) {
237- static char buff [50 ];
240+ char buff [50 ];
238241 if (dropSource == -1 ) snprintf (buff , sizeof (buff ), "drop[%d]" , j + 1 ); // # notranslate
239242 else snprintf (buff , sizeof (buff ), "colClasses[[%d]][%d]" , dropSource + 1 , j + 1 ); // # notranslate
240243 if (k == NA_INTEGER ) {
@@ -258,7 +261,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
258261{
259262 // use typeSize superfluously to avoid not-used warning; otherwise could move typeSize from fread.h into fread.c
260263 if (typeSize [CT_BOOL8_N ] != 1 ) internal_error (__func__ , "typeSize[CT_BOOL8_N] != 1" ); // # nocov
261- if (typeSize [CT_STRING ] != 8 ) internal_error (__func__ , "typeSize[CT_STRING] != 1 " ); // # nocov
264+ if (typeSize [CT_STRING ] != 8 ) internal_error (__func__ , "typeSize[CT_STRING] != 8 " ); // # nocov
262265 colNamesSxp = R_NilValue ;
263266 SET_VECTOR_ELT (RCHK , 1 , colNamesSxp = allocVector (STRSXP , ncol ));
264267 for (int i = 0 ; i < ncol ; i ++ ) {
@@ -300,7 +303,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
300303 SET_VECTOR_ELT (RCHK , 3 , selectRank = allocVector (INTSXP , ncol ));
301304 int * selectRankD = INTEGER (selectRank ), rank = 1 ;
302305 for (int i = 0 ; i < n ; i ++ ) {
303- int k = selectInts [i ];
306+ const int k = selectInts [i ];
304307 if (k == NA_INTEGER ) continue ; // missing column name warned above and skipped
305308 if (k < 0 ) STOP (_ ("Column number %d (select[%d]) is negative but should be in the range [1,ncol=%d]. Consider drop= for column exclusion." ), k , i + 1 , ncol );
306309 if (k == 0 ) STOP (_ ("select = 0 (select[%d]) has no meaning. All values of select should be in the range [1,ncol=%d]." ), i + 1 , ncol );
@@ -335,7 +338,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
335338 if (type [i ] == CT_DROP ) continue ; // user might have specified the type of all columns including those dropped with drop=
336339 const SEXP tt = STRING_ELT (colClassesSxp , i & mask ); // mask recycles colClassesSxp when it's length-1
337340 if (tt == NA_STRING || tt == R_BlankString ) continue ; // user is ok with inherent type for this column
338- int w = INTEGER (typeEnum_idx )[i & mask ];
341+ const int w = INTEGER (typeEnum_idx )[i & mask ];
339342 if (tt == char_POSIXct ) {
340343 // from v1.13.0, POSIXct is a built in type, but if the built-in doesn't support (e.g. test 1743.25 has missing tzone) then we still dispatch to as.POSIXct afterwards
341344 if (type [i ] != CT_ISO8601_TIME ) {
@@ -354,8 +357,8 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
354357 for (int i = 0 ; i < n ; i ++ ) {
355358 SEXP tt = STRING_ELT (colClassesSxp , i );
356359 if (tt == NA_STRING || tt == R_BlankString ) continue ;
357- int w = INTEGER (typeEnum_idx )[i ];
358- int y = selectInts [i ];
360+ const int w = INTEGER (typeEnum_idx )[i ];
361+ const int y = selectInts [i ];
359362 if (y == NA_INTEGER ) continue ;
360363 if (tt == char_POSIXct ) {
361364 if (type [y - 1 ] != CT_ISO8601_TIME ) {
@@ -391,7 +394,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
391394 else itemsInt = PROTECT (coerceVector (items , INTSXP ));
392395 // UNPROTECTed directly just after this for loop. No nprotect++ here is correct.
393396 for (int j = 0 ; j < LENGTH (items ); j ++ ) {
394- int colIdx = INTEGER (itemsInt )[j ]; // NB: 1-based
397+ const int colIdx = INTEGER (itemsInt )[j ]; // NB: 1-based
395398 if (colIdx == NA_INTEGER ) {
396399 if (isString (items ))
397400 DTWARN (_ ("Column name '%s' (colClasses[[%d]][%d]) not found" ), CHAR (STRING_ELT (items , j )), i + 1 , j + 1 );
@@ -432,18 +435,19 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
432435 }
433436 UNPROTECT (1 ); // typeRName_sxp
434437 }
435- UNPROTECT (nprotect );
438+ if ( nprotect ) UNPROTECT (nprotect );
436439 if (readInt64As != CT_INT64 ) {
437440 for (int i = 0 ; i < ncol ; i ++ ) if (type [i ] == CT_INT64 ) type [i ] = readInt64As ;
438441 }
439442 return true;
440443}
441444
442- size_t allocateDT (int8_t * typeArg , int8_t * sizeArg , int ncolArg , int ndrop , size_t allocNrow ) {
445+ size_t allocateDT (int8_t * typeArg , int8_t * sizeArg , int ncolArg , int ndrop , size_t allocNrow )
446+ {
443447 // save inputs for use by pushBuffer
444448 size = sizeArg ;
445449 type = typeArg ;
446- int newDT = (ncol == 0 );
450+ const bool newDT = (ncol == 0 );
447451 if (newDT ) {
448452 ncol = ncolArg ;
449453 dtnrows = allocNrow ;
@@ -453,20 +457,28 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size
453457 if (colClassesAs ) setAttrib (DT , sym_colClassesAs , colClassesAs );
454458 } else {
455459 int nprotect = 0 ;
456- SEXP tt , ss = R_NilValue ;
457- setAttrib (DT , R_NamesSymbol , tt = PROTECT (allocVector (STRSXP , ncol - ndrop ))); nprotect ++ ;
460+ SEXP tt = PROTECT (allocVector (STRSXP , ncol - ndrop ));
461+ setAttrib (DT , R_NamesSymbol , tt ); nprotect ++ ;
462+
463+ SEXP ss ;
458464 if (colClassesAs ) {
459- setAttrib (DT , sym_colClassesAs , ss = PROTECT (allocVector (STRSXP , ncol - ndrop ))); nprotect ++ ;
465+ ss = PROTECT (allocVector (STRSXP , ncol - ndrop )); nprotect ++ ;
466+ setAttrib (DT , sym_colClassesAs , ss );
467+ } else {
468+ ss = R_NilValue ;
460469 }
470+
461471 for (int i = 0 , resi = 0 ; i < ncol ; i ++ ) if (type [i ] != CT_DROP ) {
462472 if (colClassesAs ) SET_STRING_ELT (ss , resi , STRING_ELT (colClassesAs , i ));
463473 SET_STRING_ELT (tt , resi ++ , STRING_ELT (colNamesSxp , i ));
464474 }
475+
465476 UNPROTECT (nprotect );
466477 }
467478 if (selectRank ) {
468479 SEXP tt = PROTECT (allocVector (INTSXP , ncol - ndrop ));
469- int * ttD = INTEGER (tt ), * rankD = INTEGER (selectRank ), rank = 1 ;
480+ int * ttD = INTEGER (tt ), rank = 1 ;
481+ const int * rankD = INTEGER (selectRank );
470482 for (int i = 0 ; i < ncol ; i ++ ) if (type [i ] != CT_DROP ) ttD [rankD [i ] - 1 ] = rank ++ ;
471483 SET_VECTOR_ELT (RCHK , 3 , selectRank = tt );
472484 // selectRank now holds the order not the rank (so its name is now misleading). setFinalNRow passes it to setcolorder
@@ -494,13 +506,13 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size
494506 for (int i = 0 , resi = 0 ; i < ncol ; i ++ ) {
495507 if (type [i ] == CT_DROP ) continue ;
496508 SEXP col = VECTOR_ELT (DT , resi );
497- int oldIsInt64 = newDT ? 0 : INHERITS (col , char_integer64 );
498- int newIsInt64 = type [i ] == CT_INT64 ;
499- int typeChanged = (type [i ] > 0 ) && (newDT || TYPEOF (col ) != typeSxp [type [i ]] || oldIsInt64 != newIsInt64 );
500- int nrowChanged = (allocNrow != dtnrows );
509+ const bool oldIsInt64 = newDT ? 0 : INHERITS (col , char_integer64 );
510+ const bool newIsInt64 = type [i ] == CT_INT64 ;
511+ const bool typeChanged = (type [i ] > 0 ) && (newDT || TYPEOF (col ) != typeSxp [type [i ]] || oldIsInt64 != newIsInt64 );
512+ const bool nrowChanged = (allocNrow != dtnrows );
501513 if (typeChanged || nrowChanged ) {
502- SEXP thiscol = typeChanged ? allocVector (typeSxp [type [i ]], allocNrow ) // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1
503- : growVector ( col , allocNrow );
514+ SEXP thiscol = typeChanged ? allocVector (typeSxp [type [i ]], allocNrow ) : growVector ( col , allocNrow ); // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1
515+
504516 SET_VECTOR_ELT (DT , resi , thiscol );
505517 if (type [i ] == CT_INT64 ) {
506518 SEXP tt = PROTECT (ScalarString (char_integer64 ));
@@ -530,7 +542,8 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size
530542 return DTbytes ;
531543}
532544
533- void setFinalNrow (size_t nrow ) {
545+ void setFinalNrow (size_t nrow )
546+ {
534547 if (selectRank ) setcolorder (DT , selectRank ); // selectRank was changed to contain order (not rank) in allocateDT above
535548 if (length (DT )) {
536549 if (nrow == dtnrows )
@@ -545,9 +558,10 @@ void setFinalNrow(size_t nrow) {
545558 R_FlushConsole (); // # 2481. Just a convenient place; nothing per se to do with setFinalNrow()
546559}
547560
548- void dropFilledCols (int * dropArg , int ndelete ) {
561+ void dropFilledCols (int * dropArg , int ndelete )
562+ {
549563 dropFill = dropArg ;
550- int ndt = length (DT );
564+ const int ndt = length (DT );
551565 for (int i = 0 ; i < ndelete ; i ++ ) {
552566 SET_VECTOR_ELT (DT , dropFill [i ], R_NilValue );
553567 SET_STRING_ELT (colNamesSxp , dropFill [i ], NA_STRING );
@@ -562,13 +576,13 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx)
562576 const void * buff4 = ctx -> buff4 ;
563577 const void * buff1 = ctx -> buff1 ;
564578 const char * anchor = ctx -> anchor ;
565- int nRows = ( int ) ctx -> nRows ;
566- size_t DTi = ctx -> DTi ;
567- int rowSize8 = ( int ) ctx -> rowSize8 ;
568- int rowSize4 = ( int ) ctx -> rowSize4 ;
569- int rowSize1 = ( int ) ctx -> rowSize1 ;
570- int nStringCols = ctx -> nStringCols ;
571- int nNonStringCols = ctx -> nNonStringCols ;
579+ const size_t nRows = ctx -> nRows ;
580+ const size_t DTi = ctx -> DTi ;
581+ const size_t rowSize8 = ctx -> rowSize8 ;
582+ const size_t rowSize4 = ctx -> rowSize4 ;
583+ const size_t rowSize1 = ctx -> rowSize1 ;
584+ const int nStringCols = ctx -> nStringCols ;
585+ const int nNonStringCols = ctx -> nNonStringCols ;
572586
573587 // Do all the string columns first so as to minimize and concentrate the time inside the single critical.
574588 // While the string columns are happening other threads before me can be copying their non-string buffers to the
@@ -581,14 +595,14 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx)
581595 #pragma omp critical
582596 {
583597 int off8 = 0 ;
584- int cnt8 = rowSize8 / 8 ;
585- lenOff * buff8_lenoffs = (lenOff * ) buff8 ;
598+ const int cnt8 = rowSize8 / 8 ;
599+ const lenOff * buff8_lenoffs = (const lenOff * )buff8 ;
586600 for (int j = 0 , resj = -1 , done = 0 ; done < nStringCols && j < ncol ; j ++ ) {
587601 if (type [j ] == CT_DROP ) continue ;
588602 resj ++ ;
589603 if (type [j ] == CT_STRING ) {
590604 SEXP dest = VECTOR_ELT (DT , resj );
591- lenOff * source = buff8_lenoffs + off8 ;
605+ const lenOff * source = buff8_lenoffs + off8 ;
592606 for (int i = 0 ; i < nRows ; i ++ ) {
593607 int strLen = source -> len ;
594608 if (strLen <= 0 ) {
@@ -600,7 +614,7 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx)
600614 while (c < strLen && str [c ]) c ++ ;
601615 if (c < strLen ) {
602616 // embedded nul found; any at the beginning or the end of the field should have already been excluded but this will strip those too if present just in case
603- char * last = (char * )str + c ; // obtain write access to (const char *)anchor;
617+ char * last = (char * )str + c ; // obtain write access to (const char *)anchor;
604618 while (c < strLen ) {
605619 if (str [c ]) * last ++ = str [c ]; // cow page write: saves allocation and management of a temp that would need to thread-safe in future.
606620 c ++ ; // This is only thread accessing this region. For non-mmap direct input nul are not possible (R would not have accepted nul earlier).
@@ -613,55 +627,69 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx)
613627 }
614628 done ++ ; // if just one string col near the start, don't loop over the other 10,000 cols. TODO? start on first too
615629 }
616- off8 += (size [j ] == 8 );
630+
631+ if (size [j ] == 8 ) off8 ++ ;
617632 }
618633 }
619634 }
620635
621636 int off1 = 0 , off4 = 0 , off8 = 0 ;
622- for (int j = 0 , resj = -1 , done = 0 ; done < nNonStringCols && j < ncol ; j ++ ) {
637+ for (int j = 0 , resj = 0 , done = 0 ; done < nNonStringCols && j < ncol ; j ++ ) {
623638 if (type [j ] == CT_DROP ) continue ;
624- int thisSize = size [j ];
625- resj ++ ;
639+
626640 if (type [j ] != CT_STRING && type [j ] > 0 ) {
627- if (thisSize == 8 ) {
641+ switch (size [j ])
642+ {
643+ case 8 : {
628644 double * dest = REAL (VECTOR_ELT (DT , resj )) + DTi ;
629- const char * src8 = (char * )buff8 + off8 ;
645+ const char * src8 = (const char * )buff8 + off8 ;
630646 for (int i = 0 ; i < nRows ; i ++ ) {
631- * dest = * (double * )src8 ;
647+ * dest = * (double * )src8 ;
632648 src8 += rowSize8 ;
633649 dest ++ ;
634650 }
635- } else if (thisSize == 4 ) {
651+ break ;
652+ }
653+ case 4 : {
636654 int * dest = INTEGER (VECTOR_ELT (DT , resj )) + DTi ;
637- const char * src4 = (char * )buff4 + off4 ;
655+ const char * src4 = (const char * )buff4 + off4 ;
638656 // debug line for #3369 ... if (DTi>2638000) printf("freadR.c:460: thisSize==4, resj=%d, %"PRIu64", %d, %d, j=%d, done=%d\n", resj, (uint64_t)DTi, off4, rowSize4, j, done);
639657 for (int i = 0 ; i < nRows ; i ++ ) {
640- * dest = * (int * )src4 ;
658+ * dest = * (int * )src4 ;
641659 src4 += rowSize4 ;
642660 dest ++ ;
643661 }
644- } else if (thisSize == 1 ) {
662+ break ;
663+ }
664+ case 1 : {
645665 if (type [j ] > CT_BOOL8_Y ) STOP (_ ("Field size is 1 but the field is of type %d\n" ), type [j ]);
646666 int * dest = LOGICAL (VECTOR_ELT (DT , resj )) + DTi ;
647- const char * src1 = (char * )buff1 + off1 ;
667+ const char * src1 = (const char * )buff1 + off1 ;
648668 for (int i = 0 ; i < nRows ; i ++ ) {
649- int8_t v = * (int8_t * )src1 ;
669+ const int8_t v = * (int8_t * )src1 ;
650670 * dest = (v == INT8_MIN ? NA_INTEGER : v );
651671 src1 += rowSize1 ;
652672 dest ++ ;
653673 }
654- } else internal_error (__func__ , "unexpected field of size %d\n" , thisSize ); // # nocov
674+ break ;
675+ }
676+ default : { // # nocov
677+ internal_error (__func__ , "unexpected field of size %d\n" , size [j ]); // # nocov
678+ break ; // # nocov
679+ }
680+ }
655681 done ++ ;
656682 }
657683 off8 += (size [j ] & 8 );
658684 off4 += (size [j ] & 4 );
659685 off1 += (size [j ] & 1 );
686+ resj ++ ;
660687 }
661688}
662689
663690// # nocov start
664- void progress (int p , int eta ) {
691+ void progress (int p , int eta )
692+ {
665693 // called from thread 0 only
666694 // p between 0 and 100
667695 // eta in seconds
@@ -707,7 +735,8 @@ void progress(int p, int eta) {
707735}
708736// # nocov end
709737
710- void halt__ (bool warn , const char * format , ...) {
738+ void halt__ (bool warn , const char * format , ...)
739+ {
711740 // Solves: http://stackoverflow.com/questions/18597123/fread-data-table-locks-files
712741 // TODO: always include fnam in the STOP message. For log files etc.
713742 va_list args ;
0 commit comments