Skip to content

Commit d954d48

Browse files
committed
Detect integer overflow when processing sizes, indices, integer values.
1 parent a6ca68a commit d954d48

File tree

3 files changed

+149
-22
lines changed

3 files changed

+149
-22
lines changed

include/eminem/Parser.hpp

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <thread>
1111
#include <mutex>
1212
#include <condition_variable>
13+
#include <limits>
1314

1415
#include "byteme/RawBufferReader.hpp"
1516
#include "byteme/PerByte.hpp"
@@ -569,13 +570,24 @@ class Parser {
569570
}
570571
};
571572

573+
constexpr Index max_limit = std::numeric_limits<Index>::max();
574+
constexpr Index max_limit_before_mult = max_limit / 10;
575+
constexpr Index max_limit_mod = max_limit % 10;
576+
572577
while (1) {
573578
char x = input.get();
574579
switch(x) {
575580
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
581+
{
582+
Index delta = x - '0';
583+
// Structuring the conditionals so that it's most likely to short-circuit after only testing the first one.
584+
if (output.index >= max_limit_before_mult && !(output.index == max_limit_before_mult && delta <= max_limit_mod)) {
585+
throw std::runtime_error("integer overflow in " + what() + " field on line " + std::to_string(overall_line_count + 1));
586+
}
587+
output.index *= 10;
588+
output.index += delta;
589+
}
576590
found = true;
577-
output.index *= 10;
578-
output.index += x - '0';
579591
break;
580592
case '\n':
581593
// This check only needs to be put here, as all blanks should be chomped before calling
@@ -745,6 +757,8 @@ class Parser {
745757
private:
746758
template<typename Type_>
747759
struct ParseInfo {
760+
ParseInfo() = default;
761+
ParseInfo(Type_ value, bool remaining) : value(value), remaining(remaining) {}
748762
Type_ value;
749763
bool remaining;
750764
};
@@ -1358,41 +1372,55 @@ class Parser {
13581372
public:
13591373
template<class Input2_>
13601374
ParseInfo<Type_> operator()(Input2_& input, Index overall_line_count) {
1375+
Type_ val = 0;
1376+
bool found = false;
1377+
13611378
bool negative = (input.get() == '-');
13621379
if (negative) {
13631380
if (!(input.advance())) {
13641381
throw std::runtime_error("premature termination of an integer on line " + std::to_string(overall_line_count + 1));
13651382
}
13661383
}
13671384

1368-
Type_ val = 0;
1369-
auto finish = [&](bool valid) -> ParseInfo<Type_> {
1370-
ParseInfo<Type_> output;
1371-
output.remaining = valid;
1372-
if (negative) {
1373-
val *= -1;
1374-
}
1375-
output.value = val;
1376-
return output;
1377-
};
1385+
constexpr Type_ upper_limit = std::numeric_limits<Type_>::max();
1386+
constexpr Type_ upper_limit_before_mult = upper_limit / 10;
1387+
constexpr Type_ upper_limit_mod = upper_limit % 10;
1388+
constexpr Type_ lower_limit = std::numeric_limits<Type_>::lowest();
1389+
constexpr Type_ lower_limit_before_mult = lower_limit / 10;
1390+
constexpr Type_ lower_limit_mod = -(lower_limit % 10);
13781391

1379-
bool found = false;
13801392
while (1) {
13811393
char x = input.get();
13821394
switch (x) {
13831395
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1384-
val *= 10;
1385-
val += x - '0';
1396+
{
1397+
Type_ delta = x - '0';
1398+
// We have to handle negative and positive cases separately as they overflow at different thresholds.
1399+
if (negative) {
1400+
// Structuring the conditionals so that it's most likely to short-circuit after only testing the first one.
1401+
if (val <= lower_limit_before_mult && !(val == lower_limit_before_mult && delta <= lower_limit_mod)) {
1402+
throw std::runtime_error("integer underflow on line " + std::to_string(overall_line_count + 1));
1403+
}
1404+
val *= 10;
1405+
val -= delta;
1406+
} else {
1407+
if (val >= upper_limit_before_mult && !(val == upper_limit_before_mult && delta <= upper_limit_mod)) {
1408+
throw std::runtime_error("integer overflow on line " + std::to_string(overall_line_count + 1));
1409+
}
1410+
val *= 10;
1411+
val += delta;
1412+
}
1413+
}
13861414
found = true;
13871415
break;
13881416
case ' ': case '\t':
13891417
if (!advance_and_chomp(input)) { // skipping past the current position before chomping.
1390-
return finish(false);
1418+
return ParseInfo<Type_>(val, false);
13911419
}
13921420
if (input.get() != '\n') {
13931421
throw std::runtime_error("more fields than expected on line " + std::to_string(overall_line_count + 1));
13941422
}
1395-
return finish(input.advance()); // move past the newline.
1423+
return ParseInfo<Type_>(val, input.advance()); // move past the newline.
13961424
case '\n':
13971425
// This check only needs to be put here, as all blanks should be chomped before calling
13981426
// this function; so we must start on a non-blank character. This starting character is either:
@@ -1402,7 +1430,7 @@ class Parser {
14021430
if (!found) {
14031431
throw std::runtime_error("empty integer field on line " + std::to_string(overall_line_count + 1));
14041432
}
1405-
return finish(input.advance()); // move past the newline.
1433+
return ParseInfo<Type_>(val, input.advance()); // move past the newline.
14061434
default:
14071435
throw std::runtime_error("expected an integer value on line " + std::to_string(overall_line_count + 1));
14081436
}
@@ -1412,7 +1440,7 @@ class Parser {
14121440
}
14131441
}
14141442

1415-
return finish(false);
1443+
return ParseInfo<Type_>(val, false);
14161444
}
14171445
};
14181446

tests/src/integer.cpp

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,84 @@ TEST(ParserInteger, OtherType) {
9898
eminem::Parser parser(std::make_unique<byteme::PerByteSerial<char> >(std::move(reader)), {});
9999
parser.scan_preamble();
100100

101-
std::vector<uint16_t> observed;
102-
EXPECT_TRUE(parser.template scan_integer<uint16_t>([&](eminem::Index, eminem::Index, uint16_t val) {
101+
std::vector<std::uint16_t> observed;
102+
EXPECT_TRUE(parser.template scan_integer<std::uint16_t>([&](eminem::Index, eminem::Index, std::uint16_t val) {
103103
observed.push_back(val);
104104
}));
105-
std::vector<uint16_t> expected { 33, 666, 9 };
105+
std::vector<std::uint16_t> expected { 33, 666, 9 };
106106
EXPECT_EQ(observed, expected);
107107
}
108108

109+
TEST(ParserInteger, Overflow) {
110+
// Unsigned.
111+
{
112+
// First we check that it works as a positive control.
113+
std::string input = "%%MatrixMarket matrix coordinate integer general\n2 2 3\n1 1 255\n2 2 0\n2 1 -0\n";
114+
auto reader = std::make_unique<byteme::RawBufferReader>(reinterpret_cast<const unsigned char*>(input.data()), input.size());
115+
eminem::Parser parser(std::make_unique<byteme::PerByteSerial<char> >(std::move(reader)), {});
116+
parser.scan_preamble();
117+
118+
std::vector<std::uint8_t> observed;
119+
parser.scan_integer<std::uint8_t>([&](eminem::Index, eminem::Index, std::uint8_t val){
120+
observed.push_back(val);
121+
});
122+
std::vector<std::uint8_t> expected { 255, 0, 0 };
123+
EXPECT_EQ(observed, expected);
124+
125+
// Now testing for the failures.
126+
auto test_overflow = [&](std::string input) -> void {
127+
auto reader = std::make_unique<byteme::RawBufferReader>(reinterpret_cast<const unsigned char*>(input.data()), input.size());
128+
eminem::Parser parser(std::make_unique<byteme::PerByteSerial<char> >(std::move(reader)), {});
129+
parser.scan_preamble();
130+
EXPECT_ANY_THROW({
131+
try {
132+
parser.scan_integer<std::uint8_t>([&](eminem::Index, eminem::Index, std::uint8_t){});
133+
} catch (std::exception& e) {
134+
EXPECT_THAT(e.what(), ::testing::HasSubstr("flow"));
135+
throw;
136+
}
137+
});
138+
};
139+
test_overflow("%%MatrixMarket matrix coordinate integer general\n2 2 1\n1 1 256");
140+
test_overflow("%%MatrixMarket matrix coordinate integer general\n2 2 1\n1 1 1000");
141+
test_overflow("%%MatrixMarket matrix coordinate integer general\n2 2 1\n1 1 -1");
142+
}
143+
144+
// Signed.
145+
{
146+
// First we check that it works as a positive control.
147+
std::string input = "%%MatrixMarket matrix coordinate integer general\n2 2 3\n1 1 0\n2 2 127\n2 1 -128\n";
148+
auto reader = std::make_unique<byteme::RawBufferReader>(reinterpret_cast<const unsigned char*>(input.data()), input.size());
149+
eminem::Parser parser(std::make_unique<byteme::PerByteSerial<char> >(std::move(reader)), {});
150+
parser.scan_preamble();
151+
std::vector<std::int8_t> observed;
152+
parser.scan_integer<std::int8_t>([&](eminem::Index, eminem::Index, std::int8_t val){
153+
observed.push_back(val);
154+
});
155+
std::vector<std::int8_t> expected { 0, 127, -128 };
156+
EXPECT_EQ(observed, expected);
157+
158+
// Now testing for the failures.
159+
auto test_overflow = [&](std::string input) -> void {
160+
auto reader = std::make_unique<byteme::RawBufferReader>(reinterpret_cast<const unsigned char*>(input.data()), input.size());
161+
eminem::Parser parser(std::make_unique<byteme::PerByteSerial<char> >(std::move(reader)), {});
162+
parser.scan_preamble();
163+
EXPECT_ANY_THROW({
164+
try {
165+
parser.scan_integer<std::int8_t>([&](eminem::Index, eminem::Index, std::int8_t){});
166+
} catch (std::exception& e) {
167+
EXPECT_THAT(e.what(), ::testing::HasSubstr("flow"));
168+
throw;
169+
}
170+
});
171+
};
172+
test_overflow("%%MatrixMarket matrix coordinate integer general\n2 2 1\n1 1 128");
173+
test_overflow("%%MatrixMarket matrix coordinate integer general\n2 2 1\n1 1 1000");
174+
test_overflow("%%MatrixMarket matrix coordinate integer general\n2 2 1\n1 1 -129");
175+
test_overflow("%%MatrixMarket matrix coordinate integer general\n2 2 1\n1 1 -1000");
176+
}
177+
}
178+
109179
TEST(ParserInteger, QuitEarly) {
110180
std::string input = "%%MatrixMarket matrix coordinate integer general\n10 10 3\n1 2 33\n4 5 666\n7 8 9\n";
111181
auto reader = std::make_unique<byteme::RawBufferReader>(reinterpret_cast<const unsigned char*>(input.data()), input.size());

tests/src/preamble.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,35 @@ TEST(ParserPreamble, SizeErrors) {
236236
test_error("%%MatrixMarket matrix coordinate integer general\n2 -5 1\n", "unexpected character");
237237
}
238238

239+
TEST(ParserPreamble, SizeOverflowErrors) {
240+
auto limit = std::numeric_limits<eminem::Index>::max();
241+
242+
// No overflow at this point, as a negative control.
243+
auto limitstring = std::to_string(limit);
244+
{
245+
std::string payload = std::string("%%MatrixMarket matrix coordinate integer general\n2 ") + limitstring + std::string(" 0\n");
246+
auto reader = std::make_unique<byteme::RawBufferReader>(reinterpret_cast<const unsigned char*>(payload.data()), payload.size());
247+
eminem::Parser parser(std::make_unique<byteme::PerByteSerial<char> >(std::move(reader)), {});
248+
parser.scan_preamble();
249+
EXPECT_EQ(parser.get_ncols(), limit);
250+
}
251+
252+
// Obvious overflow if we increase it by 10-fold with an extra digit on the end.
253+
test_error("%%MatrixMarket matrix coordinate integer general\n1 " + limitstring + "0 0\n", "overflow");
254+
255+
// Manually adding one to the limit.
256+
auto extrastring = limitstring;
257+
for (auto it = extrastring.rbegin(); it != extrastring.rend(); ++it) {
258+
int digit = *it - '0';
259+
++digit;
260+
if (digit < 10) {
261+
*it = digit + '0';
262+
break;
263+
}
264+
}
265+
test_error("%%MatrixMarket matrix coordinate integer general\n1 " + extrastring + " 0\n", "overflow");
266+
}
267+
239268
/**********************************************
240269
**********************************************/
241270

0 commit comments

Comments
 (0)