Skip to content

Commit faa62c0

Browse files
author
MarcoFalke
committed
util: Add ConstevalFormatString
The type is used to wrap a format string once it has been compile-time checked to contain the right number of format specifiers.
1 parent fae7b83 commit faa62c0

File tree

3 files changed

+149
-1
lines changed

3 files changed

+149
-1
lines changed

src/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ add_executable(test_bitcoin
132132
txvalidation_tests.cpp
133133
txvalidationcache_tests.cpp
134134
uint256_tests.cpp
135+
util_string_tests.cpp
135136
util_tests.cpp
136137
util_threadnames_tests.cpp
137138
validation_block_tests.cpp

src/test/util_string_tests.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Copyright (c) 2024-present The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include <util/string.h>
6+
7+
#include <boost/test/unit_test.hpp>
8+
9+
using namespace util;
10+
11+
BOOST_AUTO_TEST_SUITE(util_string_tests)
12+
13+
// Helper to allow compile-time sanity checks while providing the number of
14+
// args directly. Normally PassFmt<sizeof...(Args)> would be used.
15+
template <unsigned NumArgs>
16+
inline void PassFmt(util::ConstevalFormatString<NumArgs> fmt)
17+
{
18+
// This was already executed at compile-time, but is executed again at run-time to avoid -Wunused.
19+
decltype(fmt)::Detail_CheckNumFormatSpecifiers(fmt.fmt);
20+
}
21+
template <unsigned WrongNumArgs>
22+
inline void FailFmtWithError(std::string_view wrong_fmt, std::string_view error)
23+
{
24+
using ErrType = const char*;
25+
auto check_throw{[error](const ErrType& str) { return str == error; }};
26+
BOOST_CHECK_EXCEPTION(util::ConstevalFormatString<WrongNumArgs>::Detail_CheckNumFormatSpecifiers(wrong_fmt), ErrType, check_throw);
27+
}
28+
29+
BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec)
30+
{
31+
PassFmt<0>("");
32+
PassFmt<0>("%%");
33+
PassFmt<1>("%s");
34+
PassFmt<0>("%%s");
35+
PassFmt<0>("s%%");
36+
PassFmt<1>("%%%s");
37+
PassFmt<1>("%s%%");
38+
PassFmt<0>(" 1$s");
39+
PassFmt<1>("%1$s");
40+
PassFmt<1>("%1$s%1$s");
41+
PassFmt<2>("%2$s");
42+
PassFmt<2>("%2$s 4$s %2$s");
43+
PassFmt<129>("%129$s 999$s %2$s");
44+
PassFmt<1>("%02d");
45+
PassFmt<1>("%+2s");
46+
PassFmt<1>("%.6i");
47+
PassFmt<1>("%5.2f");
48+
PassFmt<1>("%#x");
49+
PassFmt<1>("%1$5i");
50+
PassFmt<1>("%1$-5i");
51+
PassFmt<1>("%1$.5i");
52+
// tinyformat accepts almost any "type" spec, even '%', or '_', or '\n'.
53+
PassFmt<1>("%123%");
54+
PassFmt<1>("%123%s");
55+
PassFmt<1>("%_");
56+
PassFmt<1>("%\n");
57+
58+
// The `*` specifier behavior is unsupported and can lead to runtime
59+
// errors when used in a ConstevalFormatString. Please refer to the
60+
// note in the ConstevalFormatString docs.
61+
PassFmt<1>("%*c");
62+
PassFmt<2>("%2$*3$d");
63+
PassFmt<1>("%.*f");
64+
65+
auto err_mix{"Format specifiers must be all positional or all non-positional!"};
66+
FailFmtWithError<1>("%s%1$s", err_mix);
67+
68+
auto err_num{"Format specifier count must match the argument count!"};
69+
FailFmtWithError<1>("", err_num);
70+
FailFmtWithError<0>("%s", err_num);
71+
FailFmtWithError<2>("%s", err_num);
72+
FailFmtWithError<0>("%1$s", err_num);
73+
FailFmtWithError<2>("%1$s", err_num);
74+
75+
auto err_0_pos{"Positional format specifier must have position of at least 1"};
76+
FailFmtWithError<1>("%$s", err_0_pos);
77+
FailFmtWithError<1>("%$", err_0_pos);
78+
FailFmtWithError<0>("%0$", err_0_pos);
79+
FailFmtWithError<0>("%0$s", err_0_pos);
80+
81+
auto err_term{"Format specifier incorrectly terminated by end of string"};
82+
FailFmtWithError<1>("%", err_term);
83+
FailFmtWithError<1>("%1$", err_term);
84+
}
85+
86+
BOOST_AUTO_TEST_SUITE_END()

src/util/string.h

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2019-2022 The Bitcoin Core developers
1+
// Copyright (c) 2019-present The Bitcoin Core developers
22
// Distributed under the MIT software license, see the accompanying
33
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
44

@@ -17,6 +17,67 @@
1717
#include <vector>
1818

1919
namespace util {
20+
/**
21+
* @brief A wrapper for a compile-time partially validated format string
22+
*
23+
* This struct can be used to enforce partial compile-time validation of format
24+
* strings, to reduce the likelihood of tinyformat throwing exceptions at
25+
* run-time. Validation is partial to try and prevent the most common errors
26+
* while avoiding re-implementing the entire parsing logic.
27+
*
28+
* @note Counting of `*` dynamic width and precision fields (such as `%*c`,
29+
* `%2$*3$d`, `%.*f`) is not implemented to minimize code complexity as long as
30+
* they are not used in the codebase. Usage of these fields is not counted and
31+
* can lead to run-time exceptions. Code wanting to use the `*` specifier can
32+
* side-step this struct and call tinyformat directly.
33+
*/
34+
template <unsigned num_params>
35+
struct ConstevalFormatString {
36+
const char* const fmt;
37+
consteval ConstevalFormatString(const char* str) : fmt{str} { Detail_CheckNumFormatSpecifiers(fmt); }
38+
constexpr static void Detail_CheckNumFormatSpecifiers(std::string_view str)
39+
{
40+
unsigned count_normal{0}; // Number of "normal" specifiers, like %s
41+
unsigned count_pos{0}; // Max number in positional specifier, like %8$s
42+
for (auto it{str.begin()}; it < str.end();) {
43+
if (*it != '%') {
44+
++it;
45+
continue;
46+
}
47+
48+
if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string";
49+
if (*it == '%') {
50+
// Percent escape: %%
51+
++it;
52+
continue;
53+
}
54+
55+
unsigned maybe_num{0};
56+
while ('0' <= *it && *it <= '9') {
57+
maybe_num *= 10;
58+
maybe_num += *it - '0';
59+
++it;
60+
};
61+
62+
if (*it == '$') {
63+
// Positional specifier, like %8$s
64+
if (maybe_num == 0) throw "Positional format specifier must have position of at least 1";
65+
count_pos = std::max(count_pos, maybe_num);
66+
if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string";
67+
} else {
68+
// Non-positional specifier, like %s
69+
++count_normal;
70+
++it;
71+
}
72+
// The remainder "[flags][width][.precision][length]type" of the
73+
// specifier is not checked. Parsing continues with the next '%'.
74+
}
75+
if (count_normal && count_pos) throw "Format specifiers must be all positional or all non-positional!";
76+
unsigned count{count_normal | count_pos};
77+
if (num_params != count) throw "Format specifier count must match the argument count!";
78+
}
79+
};
80+
2081
void ReplaceAll(std::string& in_out, const std::string& search, const std::string& substitute);
2182

2283
/** Split a string on any char found in separators, returning a vector.

0 commit comments

Comments
 (0)