Skip to content

Commit befdef8

Browse files
author
MarcoFalke
committed
Merge #16887: Abstract out some of the descriptor Span-parsing helpers
bb36372 test: add unit tests for Span-parsing helpers (Sebastian Falbesoner) 5e69aee Add documenting comments to spanparsing.h (Pieter Wuille) 230d43f Abstract out some of the descriptor Span-parsing helpers (Pieter Wuille) Pull request description: As suggested here: bitcoin/bitcoin#16800 (comment). This moves the Span parsing functions out of the descriptor module, making them more easily usable for other parsers (in particular, in preparation for miniscript parsing). ACKs for top commit: MarcoFalke: ACK bb36372 Tree-SHA512: b5c5c11a9bc3f0a1c2c4cfa22755654ecfb8d4b69da0dc1fb9f04e1556dc0f6ffd87ad153600963279ac465d587d7971b53d240ced802d12693682411ac73deb
2 parents 59f0687 + bb36372 commit befdef8

File tree

5 files changed

+252
-57
lines changed

5 files changed

+252
-57
lines changed

src/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ BITCOIN_CORE_H = \
208208
util/bytevectorhash.h \
209209
util/error.h \
210210
util/fees.h \
211+
util/spanparsing.h \
211212
util/system.h \
212213
util/macros.h \
213214
util/memory.h \
@@ -505,6 +506,7 @@ libbitcoin_util_a_SOURCES = \
505506
util/moneystr.cpp \
506507
util/rbf.cpp \
507508
util/threadnames.cpp \
509+
util/spanparsing.cpp \
508510
util/strencodings.cpp \
509511
util/string.cpp \
510512
util/time.cpp \

src/script/descriptor.cpp

Lines changed: 9 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include <span.h>
1313
#include <util/bip32.h>
14+
#include <util/spanparsing.h>
1415
#include <util/system.h>
1516
#include <util/strencodings.h>
1617

@@ -640,63 +641,6 @@ enum class ParseScriptContext {
640641
P2WSH,
641642
};
642643

643-
/** Parse a constant. If successful, sp is updated to skip the constant and return true. */
644-
bool Const(const std::string& str, Span<const char>& sp)
645-
{
646-
if ((size_t)sp.size() >= str.size() && std::equal(str.begin(), str.end(), sp.begin())) {
647-
sp = sp.subspan(str.size());
648-
return true;
649-
}
650-
return false;
651-
}
652-
653-
/** Parse a function call. If successful, sp is updated to be the function's argument(s). */
654-
bool Func(const std::string& str, Span<const char>& sp)
655-
{
656-
if ((size_t)sp.size() >= str.size() + 2 && sp[str.size()] == '(' && sp[sp.size() - 1] == ')' && std::equal(str.begin(), str.end(), sp.begin())) {
657-
sp = sp.subspan(str.size() + 1, sp.size() - str.size() - 2);
658-
return true;
659-
}
660-
return false;
661-
}
662-
663-
/** Return the expression that sp begins with, and update sp to skip it. */
664-
Span<const char> Expr(Span<const char>& sp)
665-
{
666-
int level = 0;
667-
auto it = sp.begin();
668-
while (it != sp.end()) {
669-
if (*it == '(') {
670-
++level;
671-
} else if (level && *it == ')') {
672-
--level;
673-
} else if (level == 0 && (*it == ')' || *it == ',')) {
674-
break;
675-
}
676-
++it;
677-
}
678-
Span<const char> ret = sp.first(it - sp.begin());
679-
sp = sp.subspan(it - sp.begin());
680-
return ret;
681-
}
682-
683-
/** Split a string on every instance of sep, returning a vector. */
684-
std::vector<Span<const char>> Split(const Span<const char>& sp, char sep)
685-
{
686-
std::vector<Span<const char>> ret;
687-
auto it = sp.begin();
688-
auto start = it;
689-
while (it != sp.end()) {
690-
if (*it == sep) {
691-
ret.emplace_back(start, it);
692-
start = it + 1;
693-
}
694-
++it;
695-
}
696-
ret.emplace_back(start, it);
697-
return ret;
698-
}
699-
700644
/** Parse a key path, being passed a split list of elements (the first element is ignored). */
701645
NODISCARD bool ParseKeyPath(const std::vector<Span<const char>>& split, KeyPath& out, std::string& error)
702646
{
@@ -723,6 +667,8 @@ NODISCARD bool ParseKeyPath(const std::vector<Span<const char>>& split, KeyPath&
723667
/** Parse a public key that excludes origin information. */
724668
std::unique_ptr<PubkeyProvider> ParsePubkeyInner(const Span<const char>& sp, bool permit_uncompressed, FlatSigningProvider& out, std::string& error)
725669
{
670+
using namespace spanparsing;
671+
726672
auto split = Split(sp, '/');
727673
std::string str(split[0].begin(), split[0].end());
728674
if (str.size() == 0) {
@@ -782,6 +728,8 @@ std::unique_ptr<PubkeyProvider> ParsePubkeyInner(const Span<const char>& sp, boo
782728
/** Parse a public key including origin information (if enabled). */
783729
std::unique_ptr<PubkeyProvider> ParsePubkey(const Span<const char>& sp, bool permit_uncompressed, FlatSigningProvider& out, std::string& error)
784730
{
731+
using namespace spanparsing;
732+
785733
auto origin_split = Split(sp, ']');
786734
if (origin_split.size() > 2) {
787735
error = "Multiple ']' characters found for a single pubkey";
@@ -816,6 +764,8 @@ std::unique_ptr<PubkeyProvider> ParsePubkey(const Span<const char>& sp, bool per
816764
/** Parse a script in a particular context. */
817765
std::unique_ptr<DescriptorImpl> ParseScript(Span<const char>& sp, ParseScriptContext ctx, FlatSigningProvider& out, std::string& error)
818766
{
767+
using namespace spanparsing;
768+
819769
auto expr = Expr(sp);
820770
bool sorted_multi = false;
821771
if (Func("pk", expr)) {
@@ -1012,6 +962,8 @@ std::unique_ptr<DescriptorImpl> InferScript(const CScript& script, ParseScriptCo
1012962
/** Check a descriptor checksum, and update desc to be the checksum-less part. */
1013963
bool CheckChecksum(Span<const char>& sp, bool require_checksum, std::string& error, std::string* out_checksum = nullptr)
1014964
{
965+
using namespace spanparsing;
966+
1015967
auto check_split = Split(sp, '#');
1016968
if (check_split.size() > 2) {
1017969
error = "Multiple '#' symbols";

src/test/util_tests.cpp

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <util/strencodings.h>
1313
#include <util/string.h>
1414
#include <util/time.h>
15+
#include <util/spanparsing.h>
1516

1617
#include <stdint.h>
1718
#include <thread>
@@ -1572,4 +1573,127 @@ BOOST_AUTO_TEST_CASE(test_Capitalize)
15721573
BOOST_CHECK_EQUAL(Capitalize("\x00\xfe\xff"), "\x00\xfe\xff");
15731574
}
15741575

1576+
static std::string SpanToStr(Span<const char>& span)
1577+
{
1578+
return std::string(span.begin(), span.end());
1579+
}
1580+
1581+
BOOST_AUTO_TEST_CASE(test_spanparsing)
1582+
{
1583+
using namespace spanparsing;
1584+
std::string input;
1585+
Span<const char> sp;
1586+
bool success;
1587+
1588+
// Const(...): parse a constant, update span to skip it if successful
1589+
input = "MilkToastHoney";
1590+
sp = MakeSpan(input);
1591+
success = Const("", sp); // empty
1592+
BOOST_CHECK(success);
1593+
BOOST_CHECK_EQUAL(SpanToStr(sp), "MilkToastHoney");
1594+
1595+
success = Const("Milk", sp);
1596+
BOOST_CHECK(success);
1597+
BOOST_CHECK_EQUAL(SpanToStr(sp), "ToastHoney");
1598+
1599+
success = Const("Bread", sp);
1600+
BOOST_CHECK(!success);
1601+
1602+
success = Const("Toast", sp);
1603+
BOOST_CHECK(success);
1604+
BOOST_CHECK_EQUAL(SpanToStr(sp), "Honey");
1605+
1606+
success = Const("Honeybadger", sp);
1607+
BOOST_CHECK(!success);
1608+
1609+
success = Const("Honey", sp);
1610+
BOOST_CHECK(success);
1611+
BOOST_CHECK_EQUAL(SpanToStr(sp), "");
1612+
1613+
// Func(...): parse a function call, update span to argument if successful
1614+
input = "Foo(Bar(xy,z()))";
1615+
sp = MakeSpan(input);
1616+
1617+
success = Func("FooBar", sp);
1618+
BOOST_CHECK(!success);
1619+
1620+
success = Func("Foo(", sp);
1621+
BOOST_CHECK(!success);
1622+
1623+
success = Func("Foo", sp);
1624+
BOOST_CHECK(success);
1625+
BOOST_CHECK_EQUAL(SpanToStr(sp), "Bar(xy,z())");
1626+
1627+
success = Func("Bar", sp);
1628+
BOOST_CHECK(success);
1629+
BOOST_CHECK_EQUAL(SpanToStr(sp), "xy,z()");
1630+
1631+
success = Func("xy", sp);
1632+
BOOST_CHECK(!success);
1633+
1634+
// Expr(...): return expression that span begins with, update span to skip it
1635+
Span<const char> result;
1636+
1637+
input = "(n*(n-1))/2";
1638+
sp = MakeSpan(input);
1639+
result = Expr(sp);
1640+
BOOST_CHECK_EQUAL(SpanToStr(result), "(n*(n-1))/2");
1641+
BOOST_CHECK_EQUAL(SpanToStr(sp), "");
1642+
1643+
input = "foo,bar";
1644+
sp = MakeSpan(input);
1645+
result = Expr(sp);
1646+
BOOST_CHECK_EQUAL(SpanToStr(result), "foo");
1647+
BOOST_CHECK_EQUAL(SpanToStr(sp), ",bar");
1648+
1649+
input = "(aaaaa,bbbbb()),c";
1650+
sp = MakeSpan(input);
1651+
result = Expr(sp);
1652+
BOOST_CHECK_EQUAL(SpanToStr(result), "(aaaaa,bbbbb())");
1653+
BOOST_CHECK_EQUAL(SpanToStr(sp), ",c");
1654+
1655+
input = "xyz)foo";
1656+
sp = MakeSpan(input);
1657+
result = Expr(sp);
1658+
BOOST_CHECK_EQUAL(SpanToStr(result), "xyz");
1659+
BOOST_CHECK_EQUAL(SpanToStr(sp), ")foo");
1660+
1661+
input = "((a),(b),(c)),xxx";
1662+
sp = MakeSpan(input);
1663+
result = Expr(sp);
1664+
BOOST_CHECK_EQUAL(SpanToStr(result), "((a),(b),(c))");
1665+
BOOST_CHECK_EQUAL(SpanToStr(sp), ",xxx");
1666+
1667+
// Split(...): split a string on every instance of sep, return vector
1668+
std::vector<Span<const char>> results;
1669+
1670+
input = "xxx";
1671+
results = Split(MakeSpan(input), 'x');
1672+
BOOST_CHECK_EQUAL(results.size(), 4);
1673+
BOOST_CHECK_EQUAL(SpanToStr(results[0]), "");
1674+
BOOST_CHECK_EQUAL(SpanToStr(results[1]), "");
1675+
BOOST_CHECK_EQUAL(SpanToStr(results[2]), "");
1676+
BOOST_CHECK_EQUAL(SpanToStr(results[3]), "");
1677+
1678+
input = "one#two#three";
1679+
results = Split(MakeSpan(input), '-');
1680+
BOOST_CHECK_EQUAL(results.size(), 1);
1681+
BOOST_CHECK_EQUAL(SpanToStr(results[0]), "one#two#three");
1682+
1683+
input = "one#two#three";
1684+
results = Split(MakeSpan(input), '#');
1685+
BOOST_CHECK_EQUAL(results.size(), 3);
1686+
BOOST_CHECK_EQUAL(SpanToStr(results[0]), "one");
1687+
BOOST_CHECK_EQUAL(SpanToStr(results[1]), "two");
1688+
BOOST_CHECK_EQUAL(SpanToStr(results[2]), "three");
1689+
1690+
input = "*foo*bar*";
1691+
results = Split(MakeSpan(input), '*');
1692+
BOOST_CHECK_EQUAL(results.size(), 4);
1693+
BOOST_CHECK_EQUAL(SpanToStr(results[0]), "");
1694+
BOOST_CHECK_EQUAL(SpanToStr(results[1]), "foo");
1695+
BOOST_CHECK_EQUAL(SpanToStr(results[2]), "bar");
1696+
BOOST_CHECK_EQUAL(SpanToStr(results[3]), "");
1697+
}
1698+
15751699
BOOST_AUTO_TEST_SUITE_END()

src/util/spanparsing.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Copyright (c) 2018 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include <util/spanparsing.h>
6+
7+
#include <span.h>
8+
9+
#include <string>
10+
#include <vector>
11+
12+
namespace spanparsing {
13+
14+
bool Const(const std::string& str, Span<const char>& sp)
15+
{
16+
if ((size_t)sp.size() >= str.size() && std::equal(str.begin(), str.end(), sp.begin())) {
17+
sp = sp.subspan(str.size());
18+
return true;
19+
}
20+
return false;
21+
}
22+
23+
bool Func(const std::string& str, Span<const char>& sp)
24+
{
25+
if ((size_t)sp.size() >= str.size() + 2 && sp[str.size()] == '(' && sp[sp.size() - 1] == ')' && std::equal(str.begin(), str.end(), sp.begin())) {
26+
sp = sp.subspan(str.size() + 1, sp.size() - str.size() - 2);
27+
return true;
28+
}
29+
return false;
30+
}
31+
32+
Span<const char> Expr(Span<const char>& sp)
33+
{
34+
int level = 0;
35+
auto it = sp.begin();
36+
while (it != sp.end()) {
37+
if (*it == '(') {
38+
++level;
39+
} else if (level && *it == ')') {
40+
--level;
41+
} else if (level == 0 && (*it == ')' || *it == ',')) {
42+
break;
43+
}
44+
++it;
45+
}
46+
Span<const char> ret = sp.first(it - sp.begin());
47+
sp = sp.subspan(it - sp.begin());
48+
return ret;
49+
}
50+
51+
std::vector<Span<const char>> Split(const Span<const char>& sp, char sep)
52+
{
53+
std::vector<Span<const char>> ret;
54+
auto it = sp.begin();
55+
auto start = it;
56+
while (it != sp.end()) {
57+
if (*it == sep) {
58+
ret.emplace_back(start, it);
59+
start = it + 1;
60+
}
61+
++it;
62+
}
63+
ret.emplace_back(start, it);
64+
return ret;
65+
}
66+
67+
} // namespace spanparsing

src/util/spanparsing.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Copyright (c) 2018 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#ifndef BITCOIN_UTIL_SPANPARSING_H
6+
#define BITCOIN_UTIL_SPANPARSING_H
7+
8+
#include <span.h>
9+
10+
#include <string>
11+
#include <vector>
12+
13+
namespace spanparsing {
14+
15+
/** Parse a constant.
16+
*
17+
* If sp's initial part matches str, sp is updated to skip that part, and true is returned.
18+
* Otherwise sp is unmodified and false is returned.
19+
*/
20+
bool Const(const std::string& str, Span<const char>& sp);
21+
22+
/** Parse a function call.
23+
*
24+
* If sp's initial part matches str + "(", and sp ends with ")", sp is updated to be the
25+
* section between the braces, and true is returned. Otherwise sp is unmodified and false
26+
* is returned.
27+
*/
28+
bool Func(const std::string& str, Span<const char>& sp);
29+
30+
/** Extract the expression that sp begins with.
31+
*
32+
* This function will return the initial part of sp, up to (but not including) the first
33+
* comma or closing brace, skipping ones that are surrounded by braces. So for example,
34+
* for "foo(bar(1),2),3" the initial part "foo(bar(1),2)" will be returned. sp will be
35+
* updated to skip the initial part that is returned.
36+
*/
37+
Span<const char> Expr(Span<const char>& sp);
38+
39+
/** Split a string on every instance of sep, returning a vector.
40+
*
41+
* If sep does not occur in sp, a singleton with the entirety of sp is returned.
42+
*
43+
* Note that this function does not care about braces, so splitting
44+
* "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}.
45+
*/
46+
std::vector<Span<const char>> Split(const Span<const char>& sp, char sep);
47+
48+
} // namespace spanparsing
49+
50+
#endif // BITCOIN_UTIL_SPANPARSING_H

0 commit comments

Comments
 (0)