Skip to content

Commit 2d8e0c0

Browse files
committed
Merge bitcoin/bitcoin#20457: util: Make Parse{Int,UInt}{32,64} use locale independent std::from_chars(…) (C++17) instead of locale dependent strto{l,ll,ul,ull}
4747db8 util: Introduce ToIntegral<T>(const std::string&) for locale independent parsing using std::from_chars(…) (C++17) (practicalswift) Pull request description: Make `Parse{Int,UInt}{32,64}` use locale independent `std::from_chars(…)` (C++17) instead of locale dependent `strto{l,ll,ul,ull}`. [About `std::from_chars`](https://en.cppreference.com/w/cpp/utility/from_chars): _"Unlike other parsing functions in C++ and C libraries, `std::from_chars` is locale-independent, non-allocating, and non-throwing."_ ACKs for top commit: laanwj: Code review ACK 4747db8 Tree-SHA512: 40f2cd582bc19ddcf2c498eca3379167619eff6aa047bbac2f73b8fd8ecaefe5947c66700a189f83848751f9f8c05645e83afd4a44a1679062aee5440dba880a
2 parents 1cf7fb9 + 4747db8 commit 2d8e0c0

File tree

5 files changed

+270
-74
lines changed

5 files changed

+270
-74
lines changed

src/test/fuzz/string.cpp

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,99 @@
3131
#include <version.h>
3232

3333
#include <cstdint>
34+
#include <cstdlib>
3435
#include <string>
3536
#include <vector>
3637

38+
namespace {
39+
bool LegacyParsePrechecks(const std::string& str)
40+
{
41+
if (str.empty()) // No empty string allowed
42+
return false;
43+
if (str.size() >= 1 && (IsSpace(str[0]) || IsSpace(str[str.size() - 1]))) // No padding allowed
44+
return false;
45+
if (!ValidAsCString(str)) // No embedded NUL characters allowed
46+
return false;
47+
return true;
48+
}
49+
50+
bool LegacyParseInt32(const std::string& str, int32_t* out)
51+
{
52+
if (!LegacyParsePrechecks(str))
53+
return false;
54+
char* endp = nullptr;
55+
errno = 0; // strtol will not set errno if valid
56+
long int n = strtol(str.c_str(), &endp, 10);
57+
if (out) *out = (int32_t)n;
58+
// Note that strtol returns a *long int*, so even if strtol doesn't report an over/underflow
59+
// we still have to check that the returned value is within the range of an *int32_t*. On 64-bit
60+
// platforms the size of these types may be different.
61+
return endp && *endp == 0 && !errno &&
62+
n >= std::numeric_limits<int32_t>::min() &&
63+
n <= std::numeric_limits<int32_t>::max();
64+
}
65+
66+
bool LegacyParseInt64(const std::string& str, int64_t* out)
67+
{
68+
if (!LegacyParsePrechecks(str))
69+
return false;
70+
char* endp = nullptr;
71+
errno = 0; // strtoll will not set errno if valid
72+
long long int n = strtoll(str.c_str(), &endp, 10);
73+
if (out) *out = (int64_t)n;
74+
// Note that strtoll returns a *long long int*, so even if strtol doesn't report an over/underflow
75+
// we still have to check that the returned value is within the range of an *int64_t*.
76+
return endp && *endp == 0 && !errno &&
77+
n >= std::numeric_limits<int64_t>::min() &&
78+
n <= std::numeric_limits<int64_t>::max();
79+
}
80+
81+
bool LegacyParseUInt32(const std::string& str, uint32_t* out)
82+
{
83+
if (!LegacyParsePrechecks(str))
84+
return false;
85+
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoul accepts these by default if they fit in the range
86+
return false;
87+
char* endp = nullptr;
88+
errno = 0; // strtoul will not set errno if valid
89+
unsigned long int n = strtoul(str.c_str(), &endp, 10);
90+
if (out) *out = (uint32_t)n;
91+
// Note that strtoul returns a *unsigned long int*, so even if it doesn't report an over/underflow
92+
// we still have to check that the returned value is within the range of an *uint32_t*. On 64-bit
93+
// platforms the size of these types may be different.
94+
return endp && *endp == 0 && !errno &&
95+
n <= std::numeric_limits<uint32_t>::max();
96+
}
97+
98+
bool LegacyParseUInt8(const std::string& str, uint8_t* out)
99+
{
100+
uint32_t u32;
101+
if (!LegacyParseUInt32(str, &u32) || u32 > std::numeric_limits<uint8_t>::max()) {
102+
return false;
103+
}
104+
if (out != nullptr) {
105+
*out = static_cast<uint8_t>(u32);
106+
}
107+
return true;
108+
}
109+
110+
bool LegacyParseUInt64(const std::string& str, uint64_t* out)
111+
{
112+
if (!LegacyParsePrechecks(str))
113+
return false;
114+
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoull accepts these by default if they fit in the range
115+
return false;
116+
char* endp = nullptr;
117+
errno = 0; // strtoull will not set errno if valid
118+
unsigned long long int n = strtoull(str.c_str(), &endp, 10);
119+
if (out) *out = (uint64_t)n;
120+
// Note that strtoull returns a *unsigned long long int*, so even if it doesn't report an over/underflow
121+
// we still have to check that the returned value is within the range of an *uint64_t*.
122+
return endp && *endp == 0 && !errno &&
123+
n <= std::numeric_limits<uint64_t>::max();
124+
}
125+
}; // namespace
126+
37127
FUZZ_TARGET(string)
38128
{
39129
FuzzedDataProvider fuzzed_data_provider(buffer.data(), buffer.size());
@@ -133,4 +223,49 @@ FUZZ_TARGET(string)
133223
const bilingual_str bs2{random_string_2, random_string_1};
134224
(void)(bs1 + bs2);
135225
}
226+
{
227+
int32_t i32;
228+
int64_t i64;
229+
uint32_t u32;
230+
uint64_t u64;
231+
uint8_t u8;
232+
const bool ok_i32 = ParseInt32(random_string_1, &i32);
233+
const bool ok_i64 = ParseInt64(random_string_1, &i64);
234+
const bool ok_u32 = ParseUInt32(random_string_1, &u32);
235+
const bool ok_u64 = ParseUInt64(random_string_1, &u64);
236+
const bool ok_u8 = ParseUInt8(random_string_1, &u8);
237+
238+
int32_t i32_legacy;
239+
int64_t i64_legacy;
240+
uint32_t u32_legacy;
241+
uint64_t u64_legacy;
242+
uint8_t u8_legacy;
243+
const bool ok_i32_legacy = LegacyParseInt32(random_string_1, &i32_legacy);
244+
const bool ok_i64_legacy = LegacyParseInt64(random_string_1, &i64_legacy);
245+
const bool ok_u32_legacy = LegacyParseUInt32(random_string_1, &u32_legacy);
246+
const bool ok_u64_legacy = LegacyParseUInt64(random_string_1, &u64_legacy);
247+
const bool ok_u8_legacy = LegacyParseUInt8(random_string_1, &u8_legacy);
248+
249+
assert(ok_i32 == ok_i32_legacy);
250+
assert(ok_i64 == ok_i64_legacy);
251+
assert(ok_u32 == ok_u32_legacy);
252+
assert(ok_u64 == ok_u64_legacy);
253+
assert(ok_u8 == ok_u8_legacy);
254+
255+
if (ok_i32) {
256+
assert(i32 == i32_legacy);
257+
}
258+
if (ok_i64) {
259+
assert(i64 == i64_legacy);
260+
}
261+
if (ok_u32) {
262+
assert(u32 == u32_legacy);
263+
}
264+
if (ok_u64) {
265+
assert(u64 == u64_legacy);
266+
}
267+
if (ok_u8) {
268+
assert(u8 == u8_legacy);
269+
}
270+
}
136271
}

src/test/util_tests.cpp

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,6 +1474,81 @@ BOOST_AUTO_TEST_CASE(test_ParseInt32)
14741474
BOOST_CHECK(!ParseInt32("32482348723847471234", nullptr));
14751475
}
14761476

1477+
BOOST_AUTO_TEST_CASE(test_ToIntegral)
1478+
{
1479+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("1234").value(), 1'234);
1480+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("0").value(), 0);
1481+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("01234").value(), 1'234);
1482+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("00000000000000001234").value(), 1'234);
1483+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-00000000000000001234").value(), -1'234);
1484+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("00000000000000000000").value(), 0);
1485+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-00000000000000000000").value(), 0);
1486+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-1234").value(), -1'234);
1487+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-1").value(), -1);
1488+
1489+
BOOST_CHECK(!ToIntegral<int32_t>(" 1"));
1490+
BOOST_CHECK(!ToIntegral<int32_t>("1 "));
1491+
BOOST_CHECK(!ToIntegral<int32_t>("1a"));
1492+
BOOST_CHECK(!ToIntegral<int32_t>("1.1"));
1493+
BOOST_CHECK(!ToIntegral<int32_t>("1.9"));
1494+
BOOST_CHECK(!ToIntegral<int32_t>("+01.9"));
1495+
BOOST_CHECK(!ToIntegral<int32_t>(" -1"));
1496+
BOOST_CHECK(!ToIntegral<int32_t>("-1 "));
1497+
BOOST_CHECK(!ToIntegral<int32_t>(" -1 "));
1498+
BOOST_CHECK(!ToIntegral<int32_t>("+1"));
1499+
BOOST_CHECK(!ToIntegral<int32_t>(" +1"));
1500+
BOOST_CHECK(!ToIntegral<int32_t>(" +1 "));
1501+
BOOST_CHECK(!ToIntegral<int32_t>("+-1"));
1502+
BOOST_CHECK(!ToIntegral<int32_t>("-+1"));
1503+
BOOST_CHECK(!ToIntegral<int32_t>("++1"));
1504+
BOOST_CHECK(!ToIntegral<int32_t>("--1"));
1505+
BOOST_CHECK(!ToIntegral<int32_t>(""));
1506+
BOOST_CHECK(!ToIntegral<int32_t>("aap"));
1507+
BOOST_CHECK(!ToIntegral<int32_t>("0x1"));
1508+
BOOST_CHECK(!ToIntegral<int32_t>("-32482348723847471234"));
1509+
BOOST_CHECK(!ToIntegral<int32_t>("32482348723847471234"));
1510+
1511+
BOOST_CHECK(!ToIntegral<int64_t>("-9223372036854775809"));
1512+
BOOST_CHECK_EQUAL(ToIntegral<int64_t>("-9223372036854775808").value(), -9'223'372'036'854'775'807LL - 1LL);
1513+
BOOST_CHECK_EQUAL(ToIntegral<int64_t>("9223372036854775807").value(), 9'223'372'036'854'775'807);
1514+
BOOST_CHECK(!ToIntegral<int64_t>("9223372036854775808"));
1515+
1516+
BOOST_CHECK(!ToIntegral<uint64_t>("-1"));
1517+
BOOST_CHECK_EQUAL(ToIntegral<uint64_t>("0").value(), 0U);
1518+
BOOST_CHECK_EQUAL(ToIntegral<uint64_t>("18446744073709551615").value(), 18'446'744'073'709'551'615ULL);
1519+
BOOST_CHECK(!ToIntegral<uint64_t>("18446744073709551616"));
1520+
1521+
BOOST_CHECK(!ToIntegral<int32_t>("-2147483649"));
1522+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("-2147483648").value(), -2'147'483'648LL);
1523+
BOOST_CHECK_EQUAL(ToIntegral<int32_t>("2147483647").value(), 2'147'483'647);
1524+
BOOST_CHECK(!ToIntegral<int32_t>("2147483648"));
1525+
1526+
BOOST_CHECK(!ToIntegral<uint32_t>("-1"));
1527+
BOOST_CHECK_EQUAL(ToIntegral<uint32_t>("0").value(), 0U);
1528+
BOOST_CHECK_EQUAL(ToIntegral<uint32_t>("4294967295").value(), 4'294'967'295U);
1529+
BOOST_CHECK(!ToIntegral<uint32_t>("4294967296"));
1530+
1531+
BOOST_CHECK(!ToIntegral<int16_t>("-32769"));
1532+
BOOST_CHECK_EQUAL(ToIntegral<int16_t>("-32768").value(), -32'768);
1533+
BOOST_CHECK_EQUAL(ToIntegral<int16_t>("32767").value(), 32'767);
1534+
BOOST_CHECK(!ToIntegral<int16_t>("32768"));
1535+
1536+
BOOST_CHECK(!ToIntegral<uint16_t>("-1"));
1537+
BOOST_CHECK_EQUAL(ToIntegral<uint16_t>("0").value(), 0U);
1538+
BOOST_CHECK_EQUAL(ToIntegral<uint16_t>("65535").value(), 65'535U);
1539+
BOOST_CHECK(!ToIntegral<uint16_t>("65536"));
1540+
1541+
BOOST_CHECK(!ToIntegral<int8_t>("-129"));
1542+
BOOST_CHECK_EQUAL(ToIntegral<int8_t>("-128").value(), -128);
1543+
BOOST_CHECK_EQUAL(ToIntegral<int8_t>("127").value(), 127);
1544+
BOOST_CHECK(!ToIntegral<int8_t>("128"));
1545+
1546+
BOOST_CHECK(!ToIntegral<uint8_t>("-1"));
1547+
BOOST_CHECK_EQUAL(ToIntegral<uint8_t>("0").value(), 0U);
1548+
BOOST_CHECK_EQUAL(ToIntegral<uint8_t>("255").value(), 255U);
1549+
BOOST_CHECK(!ToIntegral<uint8_t>("256"));
1550+
}
1551+
14771552
BOOST_AUTO_TEST_CASE(test_ParseInt64)
14781553
{
14791554
int64_t n;

src/util/strencodings.cpp

Lines changed: 38 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@
1111
#include <algorithm>
1212
#include <cstdlib>
1313
#include <cstring>
14-
#include <errno.h>
15-
#include <limits>
14+
#include <optional>
1615

1716
static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
1817

@@ -282,6 +281,32 @@ std::string DecodeBase32(const std::string& str, bool* pf_invalid)
282281
return std::string((const char*)vchRet.data(), vchRet.size());
283282
}
284283

284+
[[nodiscard]] static bool ParsePrechecks(const std::string&);
285+
286+
namespace {
287+
template <typename T>
288+
bool ParseIntegral(const std::string& str, T* out)
289+
{
290+
static_assert(std::is_integral<T>::value);
291+
if (!ParsePrechecks(str)) {
292+
return false;
293+
}
294+
// Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when
295+
// handling leading +/- for backwards compatibility.
296+
if (str.length() >= 2 && str[0] == '+' && str[1] == '-') {
297+
return false;
298+
}
299+
const std::optional<T> opt_int = ToIntegral<T>((!str.empty() && str[0] == '+') ? str.substr(1) : str);
300+
if (!opt_int) {
301+
return false;
302+
}
303+
if (out != nullptr) {
304+
*out = *opt_int;
305+
}
306+
return true;
307+
}
308+
}; // namespace
309+
285310
[[nodiscard]] static bool ParsePrechecks(const std::string& str)
286311
{
287312
if (str.empty()) // No empty string allowed
@@ -293,95 +318,36 @@ std::string DecodeBase32(const std::string& str, bool* pf_invalid)
293318
return true;
294319
}
295320

296-
bool ParseInt32(const std::string& str, int32_t *out)
321+
bool ParseInt32(const std::string& str, int32_t* out)
297322
{
298-
if (!ParsePrechecks(str))
299-
return false;
300-
char *endp = nullptr;
301-
errno = 0; // strtol will not set errno if valid
302-
long int n = strtol(str.c_str(), &endp, 10);
303-
if(out) *out = (int32_t)n;
304-
// Note that strtol returns a *long int*, so even if strtol doesn't report an over/underflow
305-
// we still have to check that the returned value is within the range of an *int32_t*. On 64-bit
306-
// platforms the size of these types may be different.
307-
return endp && *endp == 0 && !errno &&
308-
n >= std::numeric_limits<int32_t>::min() &&
309-
n <= std::numeric_limits<int32_t>::max();
323+
return ParseIntegral<int32_t>(str, out);
310324
}
311325

312-
bool ParseInt64(const std::string& str, int64_t *out)
326+
bool ParseInt64(const std::string& str, int64_t* out)
313327
{
314-
if (!ParsePrechecks(str))
315-
return false;
316-
char *endp = nullptr;
317-
errno = 0; // strtoll will not set errno if valid
318-
long long int n = strtoll(str.c_str(), &endp, 10);
319-
if(out) *out = (int64_t)n;
320-
// Note that strtoll returns a *long long int*, so even if strtol doesn't report an over/underflow
321-
// we still have to check that the returned value is within the range of an *int64_t*.
322-
return endp && *endp == 0 && !errno &&
323-
n >= std::numeric_limits<int64_t>::min() &&
324-
n <= std::numeric_limits<int64_t>::max();
328+
return ParseIntegral<int64_t>(str, out);
325329
}
326330

327-
bool ParseUInt8(const std::string& str, uint8_t *out)
331+
bool ParseUInt8(const std::string& str, uint8_t* out)
328332
{
329-
uint32_t u32;
330-
if (!ParseUInt32(str, &u32) || u32 > std::numeric_limits<uint8_t>::max()) {
331-
return false;
332-
}
333-
if (out != nullptr) {
334-
*out = static_cast<uint8_t>(u32);
335-
}
336-
return true;
333+
return ParseIntegral<uint8_t>(str, out);
337334
}
338335

339336
bool ParseUInt16(const std::string& str, uint16_t* out)
340337
{
341-
uint32_t u32;
342-
if (!ParseUInt32(str, &u32) || u32 > std::numeric_limits<uint16_t>::max()) {
343-
return false;
344-
}
345-
if (out != nullptr) {
346-
*out = static_cast<uint16_t>(u32);
347-
}
348-
return true;
338+
return ParseIntegral<uint16_t>(str, out);
349339
}
350340

351-
bool ParseUInt32(const std::string& str, uint32_t *out)
341+
bool ParseUInt32(const std::string& str, uint32_t* out)
352342
{
353-
if (!ParsePrechecks(str))
354-
return false;
355-
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoul accepts these by default if they fit in the range
356-
return false;
357-
char *endp = nullptr;
358-
errno = 0; // strtoul will not set errno if valid
359-
unsigned long int n = strtoul(str.c_str(), &endp, 10);
360-
if(out) *out = (uint32_t)n;
361-
// Note that strtoul returns a *unsigned long int*, so even if it doesn't report an over/underflow
362-
// we still have to check that the returned value is within the range of an *uint32_t*. On 64-bit
363-
// platforms the size of these types may be different.
364-
return endp && *endp == 0 && !errno &&
365-
n <= std::numeric_limits<uint32_t>::max();
343+
return ParseIntegral<uint32_t>(str, out);
366344
}
367345

368-
bool ParseUInt64(const std::string& str, uint64_t *out)
346+
bool ParseUInt64(const std::string& str, uint64_t* out)
369347
{
370-
if (!ParsePrechecks(str))
371-
return false;
372-
if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoull accepts these by default if they fit in the range
373-
return false;
374-
char *endp = nullptr;
375-
errno = 0; // strtoull will not set errno if valid
376-
unsigned long long int n = strtoull(str.c_str(), &endp, 10);
377-
if(out) *out = (uint64_t)n;
378-
// Note that strtoull returns a *unsigned long long int*, so even if it doesn't report an over/underflow
379-
// we still have to check that the returned value is within the range of an *uint64_t*.
380-
return endp && *endp == 0 && !errno &&
381-
n <= std::numeric_limits<uint64_t>::max();
348+
return ParseIntegral<uint64_t>(str, out);
382349
}
383350

384-
385351
bool ParseDouble(const std::string& str, double *out)
386352
{
387353
if (!ParsePrechecks(str))

0 commit comments

Comments
 (0)