Skip to content

Commit d18c788

Browse files
authored
Fix past end array access on unsigned char builds (#260)
- There are some platforms where `char` is implemented as `unsigned char` such as arm architectures. In this systems, the existing code to access the `ParseFlagMap` and `WhitespaceMap` arrays will access elements past the end of the arrays. - This PR fixes the bug by making the offset to convert the `char` into an array index dependent on how `char` is implemented. - Modifications on single headers were manually introduced as the command: `cmake --build build/ --target=generate_single_header` Introduces way more changes than the introduced ones.
1 parent 2303be9 commit d18c788

File tree

5 files changed

+76
-85
lines changed

5 files changed

+76
-85
lines changed

include/internal/basic_csv_parser.hpp

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,24 +23,27 @@
2323

2424
namespace csv {
2525
namespace internals {
26+
27+
/** Helper constexpr function to initialize an array with all the elements set to value
28+
*/
29+
template<typename OutArray, typename T = typename OutArray::type>
30+
HEDLEY_CONST CONSTEXPR_17 OutArray arrayToDefault(T&& value)
31+
{
32+
OutArray a {};
33+
for (auto& e : a)
34+
e = value;
35+
return a;
36+
}
37+
2638
/** Create a vector v where each index i corresponds to the
2739
* ASCII number for a character and, v[i + 128] labels it according to
2840
* the CSVReader::ParseFlags enum
2941
*/
3042
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter) {
31-
std::array<ParseFlags, 256> ret = {};
32-
for (int i = -128; i < 128; i++) {
33-
const int arr_idx = i + 128;
34-
char ch = char(i);
35-
36-
if (ch == delimiter)
37-
ret[arr_idx] = ParseFlags::DELIMITER;
38-
else if (ch == '\r' || ch == '\n')
39-
ret[arr_idx] = ParseFlags::NEWLINE;
40-
else
41-
ret[arr_idx] = ParseFlags::NOT_SPECIAL;
42-
}
43-
43+
auto ret = arrayToDefault<ParseFlagMap>(ParseFlags::NOT_SPECIAL);
44+
ret[delimiter + CHAR_OFFSET] = ParseFlags::DELIMITER;
45+
ret['\r' + CHAR_OFFSET] = ParseFlags::NEWLINE;
46+
ret['\n' + CHAR_OFFSET] = ParseFlags::NEWLINE;
4447
return ret;
4548
}
4649

@@ -50,7 +53,7 @@ namespace csv {
5053
*/
5154
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter, char quote_char) {
5255
std::array<ParseFlags, 256> ret = make_parse_flags(delimiter);
53-
ret[(size_t)quote_char + 128] = ParseFlags::QUOTE;
56+
ret[quote_char + CHAR_OFFSET] = ParseFlags::QUOTE;
5457
return ret;
5558
}
5659

@@ -59,19 +62,10 @@ namespace csv {
5962
* c is a whitespace character
6063
*/
6164
HEDLEY_CONST CONSTEXPR_17 WhitespaceMap make_ws_flags(const char* ws_chars, size_t n_chars) {
62-
std::array<bool, 256> ret = {};
63-
for (int i = -128; i < 128; i++) {
64-
const int arr_idx = i + 128;
65-
char ch = char(i);
66-
ret[arr_idx] = false;
67-
68-
for (size_t j = 0; j < n_chars; j++) {
69-
if (ws_chars[j] == ch) {
70-
ret[arr_idx] = true;
71-
}
72-
}
65+
auto ret = arrayToDefault<WhitespaceMap>(false);
66+
for (size_t j = 0; j < n_chars; j++) {
67+
ret[ws_chars[j] + CHAR_OFFSET] = true;
7368
}
74-
7569
return ret;
7670
}
7771

@@ -221,7 +215,7 @@ namespace csv {
221215
void end_feed();
222216

223217
CONSTEXPR_17 ParseFlags parse_flag(const char ch) const noexcept {
224-
return _parse_flags.data()[ch + 128];
218+
return _parse_flags.data()[ch + CHAR_OFFSET];
225219
}
226220

227221
CONSTEXPR_17 ParseFlags compound_parse_flag(const char ch) const noexcept {
@@ -285,7 +279,7 @@ namespace csv {
285279
RowCollection* _records = nullptr;
286280

287281
CONSTEXPR_17 bool ws_flag(const char ch) const noexcept {
288-
return _ws_flags.data()[ch + 128];
282+
return _ws_flags.data()[ch + CHAR_OFFSET];
289283
}
290284

291285
size_t& current_row_start() {

include/internal/common.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,4 +209,7 @@ namespace csv {
209209

210210
/** Integer indicating a requested column wasn't found. */
211211
constexpr int CSV_NOT_FOUND = -1;
212+
213+
/** Offset to convert char into array index. */
214+
constexpr unsigned CHAR_OFFSET = std::numeric_limits<char>::is_signed ? 128 : 0;
212215
}

include/internal/csv_row.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ namespace csv {
7878
if (value.empty()) {
7979
bool prev_ch_quote = false;
8080
for (size_t i = 0; i < field.length; i++) {
81-
if (this->data->parse_flags[field_str[i] + 128] == ParseFlags::QUOTE) {
81+
if (this->data->parse_flags[field_str[i] + CHAR_OFFSET] == ParseFlags::QUOTE) {
8282
if (prev_ch_quote) {
8383
prev_ch_quote = false;
8484
continue;

single_include/csv.hpp

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4846,6 +4846,9 @@ namespace csv {
48464846

48474847
/** Integer indicating a requested column wasn't found. */
48484848
constexpr int CSV_NOT_FOUND = -1;
4849+
4850+
/** Offset to convert char into array index. */
4851+
constexpr unsigned CHAR_OFFSET = std::numeric_limits<char>::is_signed ? 128 : 0;
48494852
}
48504853

48514854

@@ -5862,24 +5865,27 @@ inline std::ostream& operator << (std::ostream& os, csv::CSVField const& value)
58625865

58635866
namespace csv {
58645867
namespace internals {
5868+
5869+
/** Helper constexpr function to initialize an array with all the elements set to value
5870+
*/
5871+
template<typename OutArray, typename T = typename OutArray::type>
5872+
HEDLEY_CONST CONSTEXPR_17 OutArray arrayToDefault(T&& value)
5873+
{
5874+
OutArray a {};
5875+
for (auto& e : a)
5876+
e = value;
5877+
return a;
5878+
}
5879+
58655880
/** Create a vector v where each index i corresponds to the
58665881
* ASCII number for a character and, v[i + 128] labels it according to
58675882
* the CSVReader::ParseFlags enum
58685883
*/
58695884
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter) {
5870-
std::array<ParseFlags, 256> ret = {};
5871-
for (int i = -128; i < 128; i++) {
5872-
const int arr_idx = i + 128;
5873-
char ch = char(i);
5874-
5875-
if (ch == delimiter)
5876-
ret[arr_idx] = ParseFlags::DELIMITER;
5877-
else if (ch == '\r' || ch == '\n')
5878-
ret[arr_idx] = ParseFlags::NEWLINE;
5879-
else
5880-
ret[arr_idx] = ParseFlags::NOT_SPECIAL;
5881-
}
5882-
5885+
auto ret = arrayToDefault<ParseFlagMap>(ParseFlags::NOT_SPECIAL);
5886+
ret[delimiter + CHAR_OFFSET] = ParseFlags::DELIMITER;
5887+
ret['\r' + CHAR_OFFSET] = ParseFlags::NEWLINE;
5888+
ret['\n' + CHAR_OFFSET] = ParseFlags::NEWLINE;
58835889
return ret;
58845890
}
58855891

@@ -5889,7 +5895,7 @@ namespace csv {
58895895
*/
58905896
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter, char quote_char) {
58915897
std::array<ParseFlags, 256> ret = make_parse_flags(delimiter);
5892-
ret[(size_t)quote_char + 128] = ParseFlags::QUOTE;
5898+
ret[quote_char + CHAR_OFFSET] = ParseFlags::QUOTE;
58935899
return ret;
58945900
}
58955901

@@ -5898,19 +5904,10 @@ namespace csv {
58985904
* c is a whitespace character
58995905
*/
59005906
HEDLEY_CONST CONSTEXPR_17 WhitespaceMap make_ws_flags(const char* ws_chars, size_t n_chars) {
5901-
std::array<bool, 256> ret = {};
5902-
for (int i = -128; i < 128; i++) {
5903-
const int arr_idx = i + 128;
5904-
char ch = char(i);
5905-
ret[arr_idx] = false;
5906-
5907-
for (size_t j = 0; j < n_chars; j++) {
5908-
if (ws_chars[j] == ch) {
5909-
ret[arr_idx] = true;
5910-
}
5911-
}
5907+
auto ret = arrayToDefault<WhitespaceMap>(false);
5908+
for (size_t j = 0; j < n_chars; j++) {
5909+
ret[ws_chars[j] + CHAR_OFFSET] = true;
59125910
}
5913-
59145911
return ret;
59155912
}
59165913

@@ -6060,7 +6057,7 @@ namespace csv {
60606057
void end_feed();
60616058

60626059
CONSTEXPR_17 ParseFlags parse_flag(const char ch) const noexcept {
6063-
return _parse_flags.data()[ch + 128];
6060+
return _parse_flags.data()[ch + CHAR_OFFSET];
60646061
}
60656062

60666063
CONSTEXPR_17 ParseFlags compound_parse_flag(const char ch) const noexcept {
@@ -6124,7 +6121,7 @@ namespace csv {
61246121
RowCollection* _records = nullptr;
61256122

61266123
CONSTEXPR_17 bool ws_flag(const char ch) const noexcept {
6127-
return _ws_flags.data()[ch + 128];
6124+
return _ws_flags.data()[ch + CHAR_OFFSET];
61286125
}
61296126

61306127
size_t& current_row_start() {

single_include_test/csv.hpp

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4846,6 +4846,9 @@ namespace csv {
48464846

48474847
/** Integer indicating a requested column wasn't found. */
48484848
constexpr int CSV_NOT_FOUND = -1;
4849+
4850+
/** Offset to convert char into array index. */
4851+
constexpr unsigned CHAR_OFFSET = std::numeric_limits<char>::is_signed ? 128 : 0;
48494852
}
48504853

48514854

@@ -5862,24 +5865,27 @@ inline std::ostream& operator << (std::ostream& os, csv::CSVField const& value)
58625865

58635866
namespace csv {
58645867
namespace internals {
5868+
5869+
/** Helper constexpr function to initialize an array with all the elements set to value
5870+
*/
5871+
template<typename OutArray, typename T = typename OutArray::type>
5872+
HEDLEY_CONST CONSTEXPR_17 OutArray arrayToDefault(T&& value)
5873+
{
5874+
OutArray a {};
5875+
for (auto& e : a)
5876+
e = value;
5877+
return a;
5878+
}
5879+
58655880
/** Create a vector v where each index i corresponds to the
58665881
* ASCII number for a character and, v[i + 128] labels it according to
58675882
* the CSVReader::ParseFlags enum
58685883
*/
58695884
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter) {
5870-
std::array<ParseFlags, 256> ret = {};
5871-
for (int i = -128; i < 128; i++) {
5872-
const int arr_idx = i + 128;
5873-
char ch = char(i);
5874-
5875-
if (ch == delimiter)
5876-
ret[arr_idx] = ParseFlags::DELIMITER;
5877-
else if (ch == '\r' || ch == '\n')
5878-
ret[arr_idx] = ParseFlags::NEWLINE;
5879-
else
5880-
ret[arr_idx] = ParseFlags::NOT_SPECIAL;
5881-
}
5882-
5885+
auto ret = arrayToDefault<ParseFlagMap>(ParseFlags::NOT_SPECIAL);
5886+
ret[delimiter + CHAR_OFFSET] = ParseFlags::DELIMITER;
5887+
ret['\r' + CHAR_OFFSET] = ParseFlags::NEWLINE;
5888+
ret['\n' + CHAR_OFFSET] = ParseFlags::NEWLINE;
58835889
return ret;
58845890
}
58855891

@@ -5889,7 +5895,7 @@ namespace csv {
58895895
*/
58905896
HEDLEY_CONST CONSTEXPR_17 ParseFlagMap make_parse_flags(char delimiter, char quote_char) {
58915897
std::array<ParseFlags, 256> ret = make_parse_flags(delimiter);
5892-
ret[(size_t)quote_char + 128] = ParseFlags::QUOTE;
5898+
ret[quote_char + CHAR_OFFSET] = ParseFlags::QUOTE;
58935899
return ret;
58945900
}
58955901

@@ -5898,19 +5904,10 @@ namespace csv {
58985904
* c is a whitespace character
58995905
*/
59005906
HEDLEY_CONST CONSTEXPR_17 WhitespaceMap make_ws_flags(const char* ws_chars, size_t n_chars) {
5901-
std::array<bool, 256> ret = {};
5902-
for (int i = -128; i < 128; i++) {
5903-
const int arr_idx = i + 128;
5904-
char ch = char(i);
5905-
ret[arr_idx] = false;
5906-
5907-
for (size_t j = 0; j < n_chars; j++) {
5908-
if (ws_chars[j] == ch) {
5909-
ret[arr_idx] = true;
5910-
}
5911-
}
5907+
auto ret = arrayToDefault<WhitespaceMap>(false);
5908+
for (size_t j = 0; j < n_chars; j++) {
5909+
ret[ws_chars[j] + CHAR_OFFSET] = true;
59125910
}
5913-
59145911
return ret;
59155912
}
59165913

@@ -6060,7 +6057,7 @@ namespace csv {
60606057
void end_feed();
60616058

60626059
CONSTEXPR_17 ParseFlags parse_flag(const char ch) const noexcept {
6063-
return _parse_flags.data()[ch + 128];
6060+
return _parse_flags.data()[ch + CHAR_OFFSET];
60646061
}
60656062

60666063
CONSTEXPR_17 ParseFlags compound_parse_flag(const char ch) const noexcept {
@@ -6124,7 +6121,7 @@ namespace csv {
61246121
RowCollection* _records = nullptr;
61256122

61266123
CONSTEXPR_17 bool ws_flag(const char ch) const noexcept {
6127-
return _ws_flags.data()[ch + 128];
6124+
return _ws_flags.data()[ch + CHAR_OFFSET];
61286125
}
61296126

61306127
size_t& current_row_start() {

0 commit comments

Comments
 (0)