Skip to content

Commit 08d0f73

Browse files
authored
strlim: avoid one-byte stack overflow (#7408)
* strlim: avoid stack overflow Since the limit is up to 500 characters, make sure to allocate 501 bytes, including the terminator at the end, avoiding the stack overflow: data.table::fread(text = paste0( strrep("mary had a little lamb\n", 100), strrep("a", 500), "\n", "a") ) ==5358==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7ffd0f20c7e4 at pc 0x7f06826bb311 bp 0x7ffd0f20ad90 sp 0x7ffd0f20ad88 WRITE of size 1 at 0x7ffd0f20c7e4 thread T0 #0 0x7f06826bb310 in strlim /tmp/RtmpvbbbcM/R.INSTALLd607e4c6707/data.table/src/fread.c:235 #1 0x7f06826bb310 in freadMain /tmp/RtmpvbbbcM/R.INSTALLd607e4c6707/data.table/src/fread.c:2947 #2 0x7f06826c2d62 in freadR /tmp/RtmpvbbbcM/R.INSTALLd607e4c6707/data.table/src/freadR.c:229 * strlim(): limit and parametrise message size Truncate the messages silently in case of overflow. * Add a regression test
1 parent e4aef6d commit 08d0f73

File tree

2 files changed

+18
-10
lines changed

2 files changed

+18
-10
lines changed

inst/tests/tests.Rraw

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21858,3 +21858,10 @@ test(2344.04, key(DT[, .(V4 = c("b", "a"), V2, V5 = c("y", "x"), V1)]), c("V1",
2185821858

2185921859
# fread with quotes and single column #7366
2186021860
test(2345, fread('"this_that"\n"2025-01-01 00:00:01"'), data.table(this_that = as.POSIXct("2025-01-01 00:00:01", tz="UTC")))
21861+
21862+
# one-byte stack overflow in strlim() to be tested with sanitizers, #7408
21863+
text = paste0(
21864+
strrep("mary had a little lamb\n", 100),
21865+
strrep("a", 500), "\n", "a"
21866+
)
21867+
test(2346, data.table::fread(text = text), data.table(mary = rep("mary", 99), had = "had", a = "a", little = "little", lamb = "lamb"), warning = "First discarded non-empty line")

src/fread.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -219,15 +219,16 @@ static inline int64_t clamp_i64t(int64_t x, int64_t lower, int64_t upper)
219219
/**
220220
* Helper for error and warning messages to extract an input line starting at
221221
* `*ch` and until an end of line, but no longer than `limit` characters.
222-
* This function returns the string copied into an internal static buffer. Cannot
223-
* be called more than twice per single printf() invocation.
224-
* Parameter `limit` cannot exceed 500.
222+
* This function returns the string copied into a caller-allocated buffer (typically on the stack).
223+
* Parameter `limit` should not exceed STRLIM_BUF_SIZE-1 (500).
225224
* The data might contain % characters. Therefore, careful to ensure that if the msg
226225
* is constructed manually (using say snprintf) that warning(), stop()
227226
* and Rprintf() are all called as warning(_("%s"), msg) and not warning(msg).
228227
*/
229-
static const char* strlim(const char *ch, char buf[static 500], size_t limit)
228+
#define STRLIM_BUF_SIZE 501
229+
static const char* strlim(const char *ch, char buf[static STRLIM_BUF_SIZE], size_t limit)
230230
{
231+
if (limit >= STRLIM_BUF_SIZE) limit = STRLIM_BUF_SIZE-1;
231232
char *ch2 = buf;
232233
for (size_t width = 0; (*ch > '\r' || (*ch != '\0' && *ch != '\r' && *ch != '\n')) && width < limit; width++) {
233234
*ch2++ = *ch++;
@@ -1776,7 +1777,7 @@ int freadMain(freadMainArgs _args)
17761777
if (ch >= eof) STOP(_("Input is either empty, fully whitespace, or skip has been set after the last non-whitespace."));
17771778
if (verbose) {
17781779
if (lineStart > ch) DTPRINT(_(" Moved forward to first non-blank line (%d)\n"), row1line);
1779-
DTPRINT(_(" Positioned on line %d starting: <<%s>>\n"), row1line, strlim(lineStart, (char[500]) {0}, 30));
1780+
DTPRINT(_(" Positioned on line %d starting: <<%s>>\n"), row1line, strlim(lineStart, (char[STRLIM_BUF_SIZE]) {0}, 30));
17801781
}
17811782
ch = pos = lineStart;
17821783
}
@@ -1982,7 +1983,7 @@ int freadMain(freadMainArgs _args)
19821983
if (!fill && tt != ncol) INTERNAL_STOP("first line has field count %d but expecting %d", tt, ncol); // # nocov
19831984
if (verbose) {
19841985
DTPRINT(_(" Detected %d columns on line %d. This line is either column names or first data row. Line starts as: <<%s>>\n"),
1985-
tt, row1line, strlim(pos, (char[500]) {0}, 30));
1986+
tt, row1line, strlim(pos, (char[STRLIM_BUF_SIZE]) {0}, 30));
19861987
DTPRINT(_(" Quote rule picked = %d\n"), quoteRule);
19871988
DTPRINT(_(" fill=%s and the most number of columns found is %d\n"), fill ? "true" : "false", ncol);
19881989
}
@@ -2950,23 +2951,23 @@ int freadMain(freadMainArgs _args)
29502951
ch = skip_to_nextline(ch, eof);
29512952
while (ch < eof && isspace(*ch)) ch++;
29522953
if (ch == eof) {
2953-
DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[500]) {0}, 500));
2954+
DTWARN(_("Discarded single-line footer: <<%s>>"), strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500));
29542955
}
29552956
else {
29562957
ch = headPos;
29572958
int tt = countfields(&ch);
29582959
if (fill > 0) {
29592960
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=%d or even more based on your knowledge of the input file. Use fill=Inf for reading the whole file for detecting the number of fields. First discarded non-empty line: <<%s>>"),
2960-
DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[500]) {0}, 500));
2961+
DTi + row1line, ncol, tt, tt, strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500));
29612962
} else {
29622963
DTWARN(_("Stopped early on line %"PRId64". Expected %d fields but found %d. Consider fill=TRUE. First discarded non-empty line: <<%s>>"),
2963-
DTi + row1line, ncol, tt, strlim(skippedFooter, (char[500]) {0}, 500));
2964+
DTi + row1line, ncol, tt, strlim(skippedFooter, (char[STRLIM_BUF_SIZE]) {0}, 500));
29642965
}
29652966
}
29662967
}
29672968
}
29682969
if (quoteRuleBumpedCh != NULL && quoteRuleBumpedCh < headPos) {
2969-
DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRId64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[500]) {0}, 500));
2970+
DTWARN(_("Found and resolved improper quoting out-of-sample. First healed line %"PRId64": <<%s>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), quoteRuleBumpedLine, strlim(quoteRuleBumpedCh, (char[STRLIM_BUF_SIZE]) {0}, 500));
29702971
}
29712972

29722973
if (verbose) {

0 commit comments

Comments
 (0)