Skip to content

Commit 83b0de0

Browse files
committed
various constness, control flow, and object placement improvements in freadR
1 parent 9367451 commit 83b0de0

File tree

1 file changed

+84
-55
lines changed

1 file changed

+84
-55
lines changed

src/freadR.c

Lines changed: 84 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ SEXP freadR(
7777
SEXP encodingArg,
7878
SEXP keepLeadingZerosArgs,
7979
SEXP noTZasUTC
80-
) {
80+
)
81+
{
8182
verbose = LOGICAL(verboseArg)[0];
8283
warningsAreErrors = LOGICAL(warnings2errorsArg)[0];
8384

@@ -227,14 +228,16 @@ SEXP freadR(
227228
return DT;
228229
}
229230

230-
static void applyDrop(SEXP items, int8_t *type, int ncol, int dropSource) {
231+
static void applyDrop(SEXP items, int8_t *type, int ncol, int dropSource)
232+
{
231233
if (!length(items)) return;
232-
SEXP itemsInt = PROTECT(isString(items) ? chmatch(items, colNamesSxp, NA_INTEGER) : coerceVector(items, INTSXP));
233-
const int *itemsD = INTEGER(itemsInt), n=LENGTH(itemsInt);
234+
const SEXP itemsInt = PROTECT(isString(items) ? chmatch(items, colNamesSxp, NA_INTEGER) : coerceVector(items, INTSXP));
235+
const int* const itemsD = INTEGER(itemsInt);
236+
const int n = LENGTH(itemsInt);
234237
for (int j = 0; j < n; j++) {
235-
int k = itemsD[j];
238+
const int k = itemsD[j];
236239
if (k == NA_INTEGER || k < 1 || k > ncol) {
237-
static char buff[50];
240+
char buff[50];
238241
if (dropSource == -1) snprintf(buff, sizeof(buff), "drop[%d]", j + 1); // # notranslate
239242
else snprintf(buff, sizeof(buff), "colClasses[[%d]][%d]", dropSource + 1, j + 1); // # notranslate
240243
if (k == NA_INTEGER) {
@@ -258,7 +261,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
258261
{
259262
// use typeSize superfluously to avoid not-used warning; otherwise could move typeSize from fread.h into fread.c
260263
if (typeSize[CT_BOOL8_N] != 1) internal_error(__func__, "typeSize[CT_BOOL8_N] != 1"); // # nocov
261-
if (typeSize[CT_STRING] != 8) internal_error(__func__, "typeSize[CT_STRING] != 1"); // # nocov
264+
if (typeSize[CT_STRING] != 8) internal_error(__func__, "typeSize[CT_STRING] != 8"); // # nocov
262265
colNamesSxp = R_NilValue;
263266
SET_VECTOR_ELT(RCHK, 1, colNamesSxp = allocVector(STRSXP, ncol));
264267
for (int i = 0; i < ncol; i++) {
@@ -300,7 +303,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
300303
SET_VECTOR_ELT(RCHK, 3, selectRank = allocVector(INTSXP, ncol));
301304
int *selectRankD = INTEGER(selectRank), rank = 1;
302305
for (int i = 0; i < n; i++) {
303-
int k = selectInts[i];
306+
const int k = selectInts[i];
304307
if (k == NA_INTEGER) continue; // missing column name warned above and skipped
305308
if (k < 0) STOP(_("Column number %d (select[%d]) is negative but should be in the range [1,ncol=%d]. Consider drop= for column exclusion."), k, i + 1, ncol);
306309
if (k == 0) STOP(_("select = 0 (select[%d]) has no meaning. All values of select should be in the range [1,ncol=%d]."), i + 1, ncol);
@@ -335,7 +338,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
335338
if (type[i] == CT_DROP) continue; // user might have specified the type of all columns including those dropped with drop=
336339
const SEXP tt = STRING_ELT(colClassesSxp, i & mask); // mask recycles colClassesSxp when it's length-1
337340
if (tt == NA_STRING || tt == R_BlankString) continue; // user is ok with inherent type for this column
338-
int w = INTEGER(typeEnum_idx)[i & mask];
341+
const int w = INTEGER(typeEnum_idx)[i & mask];
339342
if (tt == char_POSIXct) {
340343
// from v1.13.0, POSIXct is a built in type, but if the built-in doesn't support (e.g. test 1743.25 has missing tzone) then we still dispatch to as.POSIXct afterwards
341344
if (type[i] != CT_ISO8601_TIME) {
@@ -354,8 +357,8 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
354357
for (int i = 0; i < n; i++) {
355358
SEXP tt = STRING_ELT(colClassesSxp, i);
356359
if (tt == NA_STRING || tt == R_BlankString) continue;
357-
int w = INTEGER(typeEnum_idx)[i];
358-
int y = selectInts[i];
360+
const int w = INTEGER(typeEnum_idx)[i];
361+
const int y = selectInts[i];
359362
if (y == NA_INTEGER) continue;
360363
if (tt == char_POSIXct) {
361364
if (type[y - 1] != CT_ISO8601_TIME) {
@@ -391,7 +394,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
391394
else itemsInt = PROTECT(coerceVector(items, INTSXP));
392395
// UNPROTECTed directly just after this for loop. No nprotect++ here is correct.
393396
for (int j = 0; j < LENGTH(items); j++) {
394-
int colIdx = INTEGER(itemsInt)[j]; // NB: 1-based
397+
const int colIdx = INTEGER(itemsInt)[j]; // NB: 1-based
395398
if (colIdx == NA_INTEGER) {
396399
if (isString(items))
397400
DTWARN(_("Column name '%s' (colClasses[[%d]][%d]) not found"), CHAR(STRING_ELT(items, j)), i + 1, j + 1);
@@ -432,18 +435,19 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
432435
}
433436
UNPROTECT(1); // typeRName_sxp
434437
}
435-
UNPROTECT(nprotect);
438+
if(nprotect) UNPROTECT(nprotect);
436439
if (readInt64As != CT_INT64) {
437440
for (int i = 0; i < ncol; i++) if (type[i] == CT_INT64) type[i] = readInt64As;
438441
}
439442
return true;
440443
}
441444

442-
size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size_t allocNrow) {
445+
size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size_t allocNrow)
446+
{
443447
// save inputs for use by pushBuffer
444448
size = sizeArg;
445449
type = typeArg;
446-
int newDT = (ncol == 0);
450+
const bool newDT = (ncol == 0);
447451
if (newDT) {
448452
ncol = ncolArg;
449453
dtnrows = allocNrow;
@@ -453,20 +457,28 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size
453457
if (colClassesAs) setAttrib(DT, sym_colClassesAs, colClassesAs);
454458
} else {
455459
int nprotect = 0;
456-
SEXP tt, ss = R_NilValue;
457-
setAttrib(DT, R_NamesSymbol, tt = PROTECT(allocVector(STRSXP, ncol - ndrop))); nprotect++;
460+
SEXP tt = PROTECT(allocVector(STRSXP, ncol - ndrop));
461+
setAttrib(DT, R_NamesSymbol, tt); nprotect++;
462+
463+
SEXP ss;
458464
if (colClassesAs) {
459-
setAttrib(DT, sym_colClassesAs, ss = PROTECT(allocVector(STRSXP, ncol - ndrop))); nprotect++;
465+
ss = PROTECT(allocVector(STRSXP, ncol - ndrop)); nprotect++;
466+
setAttrib(DT, sym_colClassesAs, ss);
467+
} else {
468+
ss = R_NilValue;
460469
}
470+
461471
for (int i = 0, resi = 0; i < ncol; i++) if (type[i] != CT_DROP) {
462472
if (colClassesAs) SET_STRING_ELT(ss, resi, STRING_ELT(colClassesAs, i));
463473
SET_STRING_ELT(tt, resi++, STRING_ELT(colNamesSxp, i));
464474
}
475+
465476
UNPROTECT(nprotect);
466477
}
467478
if (selectRank) {
468479
SEXP tt = PROTECT(allocVector(INTSXP, ncol - ndrop));
469-
int *ttD = INTEGER(tt), *rankD = INTEGER(selectRank), rank = 1;
480+
int *ttD = INTEGER(tt), rank = 1;
481+
const int *rankD = INTEGER(selectRank);
470482
for (int i = 0; i < ncol; i++) if (type[i] != CT_DROP) ttD[rankD[i] - 1] = rank++;
471483
SET_VECTOR_ELT(RCHK, 3, selectRank = tt);
472484
// selectRank now holds the order not the rank (so its name is now misleading). setFinalNRow passes it to setcolorder
@@ -494,13 +506,13 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size
494506
for (int i = 0, resi = 0; i < ncol; i++) {
495507
if (type[i] == CT_DROP) continue;
496508
SEXP col = VECTOR_ELT(DT, resi);
497-
int oldIsInt64 = newDT ? 0 : INHERITS(col, char_integer64);
498-
int newIsInt64 = type[i] == CT_INT64;
499-
int typeChanged = (type[i] > 0) && (newDT || TYPEOF(col) != typeSxp[type[i]] || oldIsInt64 != newIsInt64);
500-
int nrowChanged = (allocNrow != dtnrows);
509+
const bool oldIsInt64 = newDT ? 0 : INHERITS(col, char_integer64);
510+
const bool newIsInt64 = type[i] == CT_INT64;
511+
const bool typeChanged = (type[i] > 0) && (newDT || TYPEOF(col) != typeSxp[type[i]] || oldIsInt64 != newIsInt64);
512+
const bool nrowChanged = (allocNrow != dtnrows);
501513
if (typeChanged || nrowChanged) {
502-
SEXP thiscol = typeChanged ? allocVector(typeSxp[type[i]], allocNrow) // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1
503-
: growVector(col, allocNrow);
514+
SEXP thiscol = typeChanged ? allocVector(typeSxp[type[i]], allocNrow) : growVector(col, allocNrow); // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1
515+
504516
SET_VECTOR_ELT(DT, resi, thiscol);
505517
if (type[i] == CT_INT64) {
506518
SEXP tt = PROTECT(ScalarString(char_integer64));
@@ -530,7 +542,8 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size
530542
return DTbytes;
531543
}
532544

533-
void setFinalNrow(size_t nrow) {
545+
void setFinalNrow(size_t nrow)
546+
{
534547
if (selectRank) setcolorder(DT, selectRank); // selectRank was changed to contain order (not rank) in allocateDT above
535548
if (length(DT)) {
536549
if (nrow == dtnrows)
@@ -545,9 +558,10 @@ void setFinalNrow(size_t nrow) {
545558
R_FlushConsole(); // # 2481. Just a convenient place; nothing per se to do with setFinalNrow()
546559
}
547560

548-
void dropFilledCols(int* dropArg, int ndelete) {
561+
void dropFilledCols(int* dropArg, int ndelete)
562+
{
549563
dropFill = dropArg;
550-
int ndt = length(DT);
564+
const int ndt = length(DT);
551565
for (int i = 0; i < ndelete; i++) {
552566
SET_VECTOR_ELT(DT, dropFill[i], R_NilValue);
553567
SET_STRING_ELT(colNamesSxp, dropFill[i], NA_STRING);
@@ -562,13 +576,13 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx)
562576
const void *buff4 = ctx->buff4;
563577
const void *buff1 = ctx->buff1;
564578
const char *anchor = ctx->anchor;
565-
int nRows = (int) ctx->nRows;
566-
size_t DTi = ctx->DTi;
567-
int rowSize8 = (int) ctx->rowSize8;
568-
int rowSize4 = (int) ctx->rowSize4;
569-
int rowSize1 = (int) ctx->rowSize1;
570-
int nStringCols = ctx->nStringCols;
571-
int nNonStringCols = ctx->nNonStringCols;
579+
const size_t nRows = ctx->nRows;
580+
const size_t DTi = ctx->DTi;
581+
const size_t rowSize8 = ctx->rowSize8;
582+
const size_t rowSize4 = ctx->rowSize4;
583+
const size_t rowSize1 = ctx->rowSize1;
584+
const int nStringCols = ctx->nStringCols;
585+
const int nNonStringCols = ctx->nNonStringCols;
572586

573587
// Do all the string columns first so as to minimize and concentrate the time inside the single critical.
574588
// While the string columns are happening other threads before me can be copying their non-string buffers to the
@@ -581,14 +595,14 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx)
581595
#pragma omp critical
582596
{
583597
int off8 = 0;
584-
int cnt8 = rowSize8 / 8;
585-
lenOff *buff8_lenoffs = (lenOff*) buff8;
598+
const int cnt8 = rowSize8 / 8;
599+
const lenOff *buff8_lenoffs = (const lenOff*)buff8;
586600
for (int j = 0, resj = -1, done = 0; done < nStringCols && j < ncol; j++) {
587601
if (type[j] == CT_DROP) continue;
588602
resj++;
589603
if (type[j] == CT_STRING) {
590604
SEXP dest = VECTOR_ELT(DT, resj);
591-
lenOff *source = buff8_lenoffs + off8;
605+
const lenOff *source = buff8_lenoffs + off8;
592606
for (int i = 0; i < nRows; i++) {
593607
int strLen = source->len;
594608
if (strLen <= 0) {
@@ -600,7 +614,7 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx)
600614
while (c < strLen && str[c]) c++;
601615
if (c < strLen) {
602616
// embedded nul found; any at the beginning or the end of the field should have already been excluded but this will strip those too if present just in case
603-
char *last = (char *)str + c; // obtain write access to (const char *)anchor;
617+
char *last = (char*)str + c; // obtain write access to (const char *)anchor;
604618
while (c < strLen) {
605619
if (str[c]) *last++ = str[c]; // cow page write: saves allocation and management of a temp that would need to thread-safe in future.
606620
c++; // This is only thread accessing this region. For non-mmap direct input nul are not possible (R would not have accepted nul earlier).
@@ -613,55 +627,69 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx)
613627
}
614628
done++; // if just one string col near the start, don't loop over the other 10,000 cols. TODO? start on first too
615629
}
616-
off8 += (size[j] == 8);
630+
631+
if (size[j] == 8) off8++;
617632
}
618633
}
619634
}
620635

621636
int off1 = 0, off4 = 0, off8 = 0;
622-
for (int j = 0, resj = -1, done = 0; done < nNonStringCols && j < ncol; j++) {
637+
for (int j = 0, resj = 0, done = 0; done < nNonStringCols && j < ncol; j++) {
623638
if (type[j] == CT_DROP) continue;
624-
int thisSize = size[j];
625-
resj++;
639+
626640
if (type[j] != CT_STRING && type[j] > 0) {
627-
if (thisSize == 8) {
641+
switch(size[j])
642+
{
643+
case 8: {
628644
double *dest = REAL(VECTOR_ELT(DT, resj)) + DTi;
629-
const char *src8 = (char*)buff8 + off8;
645+
const char *src8 = (const char*)buff8 + off8;
630646
for (int i = 0; i < nRows; i++) {
631-
*dest = *(double *)src8;
647+
*dest = *(double*)src8;
632648
src8 += rowSize8;
633649
dest++;
634650
}
635-
} else if (thisSize == 4) {
651+
break;
652+
}
653+
case 4: {
636654
int *dest = INTEGER(VECTOR_ELT(DT, resj)) + DTi;
637-
const char *src4 = (char*)buff4 + off4;
655+
const char *src4 = (const char*)buff4 + off4;
638656
// debug line for #3369 ... if (DTi>2638000) printf("freadR.c:460: thisSize==4, resj=%d, %"PRIu64", %d, %d, j=%d, done=%d\n", resj, (uint64_t)DTi, off4, rowSize4, j, done);
639657
for (int i = 0; i < nRows; i++) {
640-
*dest = *(int *)src4;
658+
*dest = *(int*)src4;
641659
src4 += rowSize4;
642660
dest++;
643661
}
644-
} else if (thisSize == 1) {
662+
break;
663+
}
664+
case 1: {
645665
if (type[j] > CT_BOOL8_Y) STOP(_("Field size is 1 but the field is of type %d\n"), type[j]);
646666
int *dest = LOGICAL(VECTOR_ELT(DT, resj)) + DTi;
647-
const char *src1 = (char*)buff1 + off1;
667+
const char *src1 = (const char*)buff1 + off1;
648668
for (int i = 0; i < nRows; i++) {
649-
int8_t v = *(int8_t *)src1;
669+
const int8_t v = *(int8_t*)src1;
650670
*dest = (v == INT8_MIN ? NA_INTEGER : v);
651671
src1 += rowSize1;
652672
dest++;
653673
}
654-
} else internal_error(__func__, "unexpected field of size %d\n", thisSize); // # nocov
674+
break;
675+
}
676+
default: { // # nocov
677+
internal_error(__func__, "unexpected field of size %d\n", size[j]); // # nocov
678+
break; // # nocov
679+
}
680+
}
655681
done++;
656682
}
657683
off8 += (size[j] & 8);
658684
off4 += (size[j] & 4);
659685
off1 += (size[j] & 1);
686+
resj++;
660687
}
661688
}
662689

663690
// # nocov start
664-
void progress(int p, int eta) {
691+
void progress(int p, int eta)
692+
{
665693
// called from thread 0 only
666694
// p between 0 and 100
667695
// eta in seconds
@@ -707,7 +735,8 @@ void progress(int p, int eta) {
707735
}
708736
// # nocov end
709737

710-
void halt__(bool warn, const char *format, ...) {
738+
void halt__(bool warn, const char *format, ...)
739+
{
711740
// Solves: http://stackoverflow.com/questions/18597123/fread-data-table-locks-files
712741
// TODO: always include fnam in the STOP message. For log files etc.
713742
va_list args;

0 commit comments

Comments
 (0)