Skip to content

Commit 6a4bd54

Browse files
Refactor colClasses handling for readability (#6106)
Towards #6105. This PR is a simple refactor to reduce code nesting / improve readability. It makes the actual implementation of #6105, #6107, much simpler.
1 parent 35f2979 commit 6a4bd54

File tree

1 file changed

+28
-24
lines changed

1 file changed

+28
-24
lines changed

src/freadR.c

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
307307
else type[i]=CT_DROP;
308308
}
309309
}
310-
colClassesAs = NULL;
310+
colClassesAs = NULL; // any coercions we can't handle here in C are deferred to R (to handle with methods::as) via this attribute
311311
if (length(colClassesSxp)) {
312312
SEXP typeRName_sxp = PROTECT(allocVector(STRSXP, NUT));
313313
for (int i=0; i<NUT; i++) SET_STRING_ELT(typeRName_sxp, i, mkChar(typeRName[i]));
@@ -316,7 +316,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
316316
SET_STRING_ELT(typeRName_sxp, CT_ISO8601_DATE, R_BlankString);
317317
SET_STRING_ELT(typeRName_sxp, CT_ISO8601_TIME, R_BlankString);
318318
}
319-
SET_VECTOR_ELT(RCHK, 2, colClassesAs=allocVector(STRSXP, ncol)); // if any, this attached to the DT for R level to call as_ methods on
319+
SET_VECTOR_ELT(RCHK, 2, colClassesAs=allocVector(STRSXP, ncol));
320320
if (isString(colClassesSxp)) {
321321
SEXP typeEnum_idx = PROTECT(chmatch(colClassesSxp, typeRName_sxp, NUT));
322322
if (selectColClasses==false) {
@@ -375,45 +375,49 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
375375
}
376376

377377
for (int i=0; i<LENGTH(colClassesSxp); i++) {
378-
const int w = INTEGER(typeEnum_idx)[i];
379-
signed char thisType = typeEnum[w-1];
380-
if (thisType==CT_DROP) continue; // was dealt with earlier above
378+
const int colClassTypeIdx = INTEGER(typeEnum_idx)[i];
379+
signed char colClassType = typeEnum[colClassTypeIdx-1];
380+
if (colClassType == CT_DROP) continue; // was dealt with earlier above
381381
SEXP items = VECTOR_ELT(colClassesSxp,i);
382382
SEXP itemsInt;
383383
if (isString(items)) itemsInt = PROTECT(chmatch(items, colNamesSxp, NA_INTEGER));
384384
else itemsInt = PROTECT(coerceVector(items, INTSXP));
385385
// UNPROTECTed directly just after this for loop. No nprotect++ here is correct.
386386
for (int j=0; j<LENGTH(items); j++) {
387-
int k = INTEGER(itemsInt)[j];
388-
if (k==NA_INTEGER) {
387+
int colIdx = INTEGER(itemsInt)[j]; // NB: 1-based
388+
if (colIdx == NA_INTEGER) {
389389
if (isString(items))
390390
DTWARN(_("Column name '%s' (colClasses[[%d]][%d]) not found"), CHAR(STRING_ELT(items, j)), i+1, j+1);
391391
else
392392
DTWARN(_("colClasses[[%d]][%d] is NA"), i+1, j+1);
393+
continue;
394+
}
395+
if (colIdx<1 || colIdx>ncol) {
396+
DTWARN(_("Column number %d (colClasses[[%d]][%d]) is out of range [1,ncol=%d]"), colIdx, i+1, j+1, ncol);
397+
continue;
398+
}
399+
if (type[colIdx-1]<0) {
400+
DTWARN(_("Column %d ('%s') appears more than once in colClasses. The second time is colClasses[[%d]][%d]."), colIdx, CHAR(STRING_ELT(colNamesSxp,colIdx-1)), i+1, j+1);
401+
continue;
402+
}
403+
if (type[colIdx-1] == CT_DROP) {
404+
continue;
405+
}
406+
if (selectRankD) selectRankD[colIdx-1] = rank++;
407+
// NB: mark as negative to indicate 'seen'
408+
if (colClassType == CT_ISO8601_TIME && type[colIdx-1]!=CT_ISO8601_TIME) {
409+
type[colIdx-1] = -CT_STRING; // don't use in-built UTC parser, defer to character and as.POSIXct afterwards which reads in local time
410+
SET_STRING_ELT(colClassesAs, colIdx-1, STRING_ELT(listNames, i));
393411
} else {
394-
if (k>=1 && k<=ncol) {
395-
if (type[k-1]<0)
396-
DTWARN(_("Column %d ('%s') appears more than once in colClasses. The second time is colClasses[[%d]][%d]."), k, CHAR(STRING_ELT(colNamesSxp,k-1)), i+1, j+1);
397-
else if (type[k-1]!=CT_DROP) {
398-
if (thisType==CT_ISO8601_TIME && type[k-1]!=CT_ISO8601_TIME) {
399-
type[k-1] = -CT_STRING; // don't use in-built UTC parser, defer to character and as.POSIXct afterwards which reads in local time
400-
SET_STRING_ELT(colClassesAs, k-1, STRING_ELT(listNames,i));
401-
} else {
402-
type[k-1] = -thisType; // freadMain checks bump up only not down. Deliberately don't catch here to test freadMain; e.g. test 959
403-
if (w==NUT) SET_STRING_ELT(colClassesAs, k-1, STRING_ELT(listNames,i));
404-
}
405-
if (selectRankD) selectRankD[k-1] = rank++;
406-
}
407-
} else {
408-
DTWARN(_("Column number %d (colClasses[[%d]][%d]) is out of range [1,ncol=%d]"), k, i+1, j+1, ncol);
409-
}
412+
type[colIdx-1] = -colClassType; // freadMain checks bump up only not down. Deliberately don't catch here to test freadMain; e.g. test 959
413+
if (colClassTypeIdx == NUT) SET_STRING_ELT(colClassesAs, colIdx-1, STRING_ELT(listNames, i)); // unknown type --> defer to R
410414
}
411415
}
412416
UNPROTECT(1); // UNPROTECTing itemsInt inside loop to save protection stack
413417
}
414418
for (int i=0; i<ncol; i++) {
415419
if (type[i]<0) type[i] *= -1; // undo sign; was used to detect duplicates
416-
else if (selectColClasses) type[i] = CT_DROP; // reading will proceed in order of columns in file; reorder happens afterwards at R level
420+
else if (selectColClasses) type[i] = CT_DROP; // not seen --> drop; reading will proceed in order of columns in file; reorder happens afterwards at R level
417421
}
418422
UNPROTECT(2); // listNames and typeEnum_idx
419423
}

0 commit comments

Comments
 (0)