Skip to content

Commit 950120f

Browse files
committed
added type bump macros
1 parent 2a1f102 commit 950120f

File tree

1 file changed

+14
-11
lines changed

1 file changed

+14
-11
lines changed

src/fread.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ static void Field(FieldParseContext *ctx);
109109
#define AS_DIGIT(x) (uint_fast8_t)(x - '0')
110110
#define IS_DIGIT(x) AS_DIGIT(x) < 10
111111

112+
#define GET_UNBUMPED(x) abs(x)
113+
#define MAKE_BUMPED(x) (-x)
114+
112115
//=================================================================================================
113116
//
114117
// Utility functions
@@ -220,11 +223,11 @@ static char *typesAsString(int ncol) {
220223
static char str[101];
221224
int i=0;
222225
if (ncol<=100) {
223-
for (; i<ncol; i++) str[i] = typeLetter[abs(type[i])]; // abs for out-of-sample type bumps (negative)
226+
for (; i<ncol; i++) str[i] = typeLetter[GET_UNBUMPED(type[i])]; // abs for out-of-sample type bumps (negative)
224227
} else {
225-
for (; i<80; i++) str[i] = typeLetter[abs(type[i])];
228+
for (; i<80; i++) str[i] = typeLetter[GET_UNBUMPED(type[i])];
226229
str[i++]='.'; str[i++]='.'; str[i++]='.';
227-
for (int j=ncol-10; j<ncol; j++) str[i++] = typeLetter[abs(type[j])];
230+
for (int j=ncol-10; j<ncol; j++) str[i++] = typeLetter[GET_UNBUMPED(type[j])];
228231
}
229232
str[i] = '\0';
230233
return str;
@@ -2405,7 +2408,7 @@ int freadMain(freadMainArgs _args) {
24052408
// DTPRINT(_("Field %d: '%.10s' as type %d (tch=%p)\n"), j+1, tch, type[j], tch);
24062409
fieldStart = tch;
24072410
int8_t thisType = type[j]; // fetch shared type once. Cannot read half-written byte is one reason type's type is single byte to avoid atomic read here.
2408-
fun[abs(thisType)](&fctx);
2411+
fun[GET_UNBUMPED(thisType)](&fctx);
24092412
if (*tch!=sep) break;
24102413
int8_t thisSize = size[j];
24112414
if (thisSize) ((char **) targets)[thisSize] += thisSize; // 'if' for when rereading to avoid undefined NULL+0
@@ -2455,7 +2458,7 @@ int freadMain(freadMainArgs _args) {
24552458
fieldStart = tch;
24562459
int8_t joldType = type[j];
24572460
int8_t thisType = joldType; // to know if it was bumped in (rare) out-of-sample type exceptions
2458-
int8_t absType = (int8_t)abs(thisType);
2461+
int8_t absType = (int8_t)GET_UNBUMPED(thisType);
24592462

24602463
while (absType < NUMTYPE) {
24612464
tch = fieldStart;
@@ -2468,7 +2471,7 @@ int freadMain(freadMainArgs _args) {
24682471
if (!end_of_field(tch)) tch = afterSpace; // else it is the field_end, we're on closing sep|eol and we'll let processor write appropriate NA as if field was empty
24692472
if (*tch==quote && quote) { quoted=true; tch++; }
24702473
} // else Field() handles NA inside it unlike other processors e.g. ,, is interpreted as "" or NA depending on option read inside Field()
2471-
fun[abs(thisType)](&fctx);
2474+
fun[GET_UNBUMPED(thisType)](&fctx);
24722475
if (quoted) { // quoted was only set to true with '&& quote' above (=> quote!='\0' now)
24732476
if (*tch==quote) tch++;
24742477
else goto typebump;
@@ -2487,7 +2490,7 @@ int freadMain(freadMainArgs _args) {
24872490
// sure a single re-read will definitely work.
24882491
typebump:
24892492
while (++absType<CT_STRING && disabled_parsers[absType]) {};
2490-
thisType = -absType;
2493+
thisType = MAKE_BUMPED(absType);
24912494
tch = fieldStart;
24922495
}
24932496

@@ -2499,7 +2502,7 @@ int freadMain(freadMainArgs _args) {
24992502
if (j+fieldsRemaining != ncol) break;
25002503
checkedNumberOfFields = true;
25012504
}
2502-
if (thisType <= -NUMTYPE) {
2505+
if (thisType <= MAKE_BUMPED(NUMTYPE)) {
25032506
break; // Improperly quoted char field needs to be healed below, other columns will be filled #5041 and #4774
25042507
}
25052508
#pragma omp critical
@@ -2512,7 +2515,7 @@ int freadMain(freadMainArgs _args) {
25122515
int len = snprintf(temp, 1000,
25132516
_("Column %d%s%.*s%s bumped from '%s' to '%s' due to <<%.*s>> on row %"PRIu64"\n"),
25142517
j+1, colNames?" <<":"", colNames?(colNames[j].len):0, colNames?(colNamesAnchor+colNames[j].off):"", colNames?">>":"",
2515-
typeName[abs(joldType)], typeName[abs(thisType)],
2518+
typeName[GET_UNBUMPED(joldType)], typeName[GET_UNBUMPED(thisType)],
25162519
(int)(tch-fieldStart), fieldStart, (uint64_t)(ctx.DTi+myNrow));
25172520
if (len > 1000) len = 1000;
25182521
if (len > 0) {
@@ -2678,7 +2681,7 @@ int freadMain(freadMainArgs _args) {
26782681
// if nTypeBump>0, not-bumped columns are about to be assigned parse type -CT_STRING for the reread, so we have to count
26792682
// parse types now (for log). We can't count final column types afterwards because many parse types map to the same column type.
26802683
for (int i=0; i<NUMTYPE; i++) typeCounts[i] = 0;
2681-
for (int i=0; i<ncol; i++) typeCounts[ abs(type[i]) ]++;
2684+
for (int i=0; i<ncol; i++) typeCounts[GET_UNBUMPED(type[i])]++;
26822685

26832686
if (nTypeBump) {
26842687
if (verbose) DTPRINT(_(" %d out-of-sample type bumps: %s\n"), nTypeBump, typesAsString(ncol));
@@ -2698,7 +2701,7 @@ int freadMain(freadMainArgs _args) {
26982701
} else if (type[j]>=1) {
26992702
// we'll skip over non-bumped columns in the rerun, whilst still incrementing resi (hence not CT_DROP)
27002703
// not -type[i] either because that would reprocess the contents of not-bumped columns wastefully
2701-
type[j] = -CT_STRING;
2704+
type[j] = MAKE_BUMPED(CT_STRING);
27022705
size[j] = 0;
27032706
}
27042707
}

0 commit comments

Comments
 (0)