Skip to content

Commit e5bbe96

Browse files
committed
separate helpers
1 parent 79508a3 commit e5bbe96

File tree

1 file changed

+30
-19
lines changed

1 file changed

+30
-19
lines changed

src/fread.c

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -306,16 +306,31 @@ static inline bool eol(const char **pch)
306306
}
307307

308308
/**
309-
* Walk to the start of the next line (or `eof` if none) by skipping the
310-
* current line's contents and its newline sequence.
309+
* Walk to the newline sequence terminating the current line and return the pointer to its final
310+
* character (or `eof` if none exists).
311311
*/
312-
static inline const char *skip_line(const char *ch, const char *eof) {
312+
static inline const char *skip_to_eol(const char *ch, const char *eof)
313+
{
313314
while (ch < eof && *ch != '\n' && *ch != '\r')
314315
ch++;
315-
if (ch < eof && eol(&ch)) ch++;
316+
if (ch < eof) {
317+
const char *tmp = ch;
318+
if (eol(&tmp)) ch = tmp;
319+
}
316320
return ch;
317321
}
318322

323+
/**
324+
* Skip past the current line (including its newline sequence) and return the first character of the
325+
* next line, or `eof` if none exists.
326+
*/
327+
static inline const char *skip_to_nextline(const char *ch, const char *eof)
328+
{
329+
const char *lineEnd = skip_to_eol(ch, eof);
330+
if (lineEnd < eof) lineEnd++;
331+
return lineEnd;
332+
}
333+
319334
/**
320335
* Return True iff `ch` is a valid field terminator character: either a field
321336
* separator or a newline.
@@ -367,7 +382,7 @@ static inline int countfields(const char **pch)
367382
if (sep == ' ') while (*ch == ' ') ch++; // multiple sep==' ' at the start does not mean sep
368383
skip_white(&ch);
369384
if (commentChar && *ch == commentChar) {
370-
const char *next = skip_line(ch, eof);
385+
const char *next = skip_to_nextline(ch, eof);
371386
if (next < eof) {
372387
ch = next;
373388
continue; // rescan next line
@@ -390,7 +405,7 @@ static inline int countfields(const char **pch)
390405
while (ch < eof) {
391406
Field(&ctx);
392407
if (commentChar && *ch == commentChar) {
393-
ch = skip_line(ch, eof);
408+
ch = skip_to_nextline(ch, eof);
394409
*pch = ch;
395410
return ncol;
396411
}
@@ -422,7 +437,7 @@ static inline const char *nextGoodLine(const char *ch, int ncol)
422437
// If this doesn't return the true line start, no matter. The previous thread will run-on and
423438
// resolve it. A good guess is all we need here. Being wrong will just be a bit slower.
424439
// If there are no embedded newlines, all newlines are true, and this guess will never be wrong.
425-
ch = skip_line(ch, eof);
440+
ch = skip_to_nextline(ch, eof);
426441
if (ch == eof) return eof;
427442
const char *simpleNext = ch; // simply the first newline after the jump
428443
// if a better one can't be found, return this one (simpleNext). This will be the case when
@@ -431,7 +446,7 @@ static inline const char *nextGoodLine(const char *ch, int ncol)
431446
for (int attempts = 0; attempts < 5 && ch < eof; attempts++) {
432447
const char *ch2 = ch;
433448
if (countfields(&ch2) == ncol) return ch; // returns simpleNext here on first attempt, almost all the time
434-
ch = skip_line(ch, eof);
449+
ch = skip_to_nextline(ch, eof);
435450
}
436451
return simpleNext;
437452
}
@@ -2030,7 +2045,7 @@ int freadMain(freadMainArgs _args)
20302045
const char *lineStart = ch;
20312046
ch = skip_to_comment_or_nonwhite(ch);
20322047
if (ch < eof && *ch == commentChar) {
2033-
ch = skip_line(ch, eof);
2048+
ch = skip_to_nextline(ch, eof);
20342049
row1line++;
20352050
continue;
20362051
}
@@ -2248,7 +2263,7 @@ int freadMain(freadMainArgs _args)
22482263
while (ch < eof) {
22492264
ch = skip_to_comment_or_nonwhite(ch);
22502265
if (ch < eof && *ch == commentChar) {
2251-
ch = skip_line(ch, eof);
2266+
ch = skip_to_nextline(ch, eof);
22522267
} else break;
22532268
}
22542269
pos = ch;
@@ -2277,7 +2292,7 @@ int freadMain(freadMainArgs _args)
22772292
// skip leading whitespace to detect inline comment marker in header row
22782293
const char *commentPos = skip_to_comment_or_nonwhite(ch);
22792294
if (commentPos < eof && *commentPos == commentChar) {
2280-
ch = skip_line(commentPos, eof);
2295+
ch = skip_to_eol(commentPos, eof);
22812296
break; // stop header parsing after comment
22822297
}
22832298
}
@@ -2291,7 +2306,7 @@ int freadMain(freadMainArgs _args)
22912306
// fast-trim trailing comment text after the header names
22922307
const char *commentPos = skip_to_comment_or_nonwhite(ch);
22932308
if (commentPos < eof && *commentPos == commentChar) {
2294-
ch = skip_line(commentPos, eof);
2309+
ch = skip_to_eol(commentPos, eof);
22952310
}
22962311
}
22972312
if (ch == eof || *ch == '\0') {
@@ -2550,10 +2565,7 @@ int freadMain(freadMainArgs _args)
25502565
// treat lines whose first non-space character is the comment marker as empty
25512566
const char *afterWhite = skip_to_comment_or_nonwhite(tLineStart);
25522567
if (afterWhite < eof && *afterWhite == commentChar) {
2553-
const char *skip = afterWhite;
2554-
while (skip < eof && *skip != '\n' && *skip != '\r') skip++;
2555-
if (skip < eof && eol(&skip)) skip++;
2556-
tch = skip;
2568+
tch = skip_to_nextline(afterWhite, eof);
25572569
continue;
25582570
}
25592571
}
@@ -2704,8 +2716,7 @@ int freadMain(freadMainArgs _args)
27042716
if (commentChar) {
27052717
const char *commentPtr = skip_to_comment_or_nonwhite(tch);
27062718
if (commentPtr < eof && *commentPtr == commentChar) {
2707-
tch = commentPtr;
2708-
while (tch < eof && *tch != '\n' && *tch != '\r') tch++;
2719+
tch = skip_to_eol(commentPtr, eof);
27092720
break;
27102721
}
27112722
}
@@ -2919,7 +2930,7 @@ int freadMain(freadMainArgs _args)
29192930
} else {
29202931
const char *skippedFooter = ENC2NATIVE(ch);
29212932
// detect if it's a single line footer. Commonly the row count from SQL queries.
2922-
ch = skip_line(ch, eof);
2933+
ch = skip_to_nextline(ch, eof);
29232934
while (ch < eof && isspace(*ch)) ch++;
29242935
if (ch == eof) {
29252936
DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[500]) {0}, 500));

0 commit comments

Comments
 (0)