Skip to content

Commit 8d82edd

Browse files
committed
Merge #19145: Add hash_type MUHASH for gettxoutsetinfo
e987ae5 test: Add test for deterministic UTXO set hash results (Fabian Jahr) 6ccc8fc test: Add test for gettxoutsetinfo RPC with MuHash (Fabian Jahr) 0d3b2f6 rpc: Add hash_type MUHASH to gettxoutsetinfo (Fabian Jahr) 2474645 refactor: Separate hash and stats calculation in coinstats (Fabian Jahr) a1fccea refactor: Improve encapsulation between MuHash3072 and Num3072 (Fabian Jahr) Pull request description: This is another Pr in the series PRs for Coinstatsindex (see overview in #18000). This PR adds the `hash_type` option `muhash` to `gettxoutsetinfo` through which the user can calculate the serialized muhash of the utxo set. This PR does not use the index yet. ACKs for top commit: Sjors: tACK e987ae5 achow101: ACK e987ae5 jonatack: Tested re-ACK e987ae5 per `git diff 3506d90 e987ae5`, reviewed diff, debug built, ran gettxoutsetinfo -signet and help on this branch vs master, at height 23127 both returned `hash_serialized_2` of `2b72d65f3b6efb2311f58374ea2b939abf49684d44f4bafda45faa3b5452a454` and this branch returned `muhash` of `c9f1ff12d345ccf9939c6bbf087e6f7399b6115adee1569287e9c5c43dbb475c` ryanofsky: Code review ACK e987ae5. Looks very good. I left one suggestion to simplify code, but feel free to ignore it here and maybe consider it for later since PR has already had a lot of review. Tree-SHA512: 9a739ce375e73749fa69a467262b60d3e5314ef384e2d7150b3bbc8e4125cd9fd1db95306623bb9a632fcbaf5d9d2bf2f5cc43bf717d4ff5e2c9c4b52dd9296c
2 parents 937dfa8 + e987ae5 commit 8d82edd

File tree

10 files changed

+195
-59
lines changed

10 files changed

+195
-59
lines changed

src/crypto/muhash.cpp

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ namespace {
1717
using limb_t = Num3072::limb_t;
1818
using double_limb_t = Num3072::double_limb_t;
1919
constexpr int LIMB_SIZE = Num3072::LIMB_SIZE;
20-
constexpr int LIMBS = Num3072::LIMBS;
2120
/** 2^3072 - 1103717, the largest 3072-bit safe prime number, is used as the modulus. */
2221
constexpr limb_t MAX_PRIME_DIFF = 1103717;
2322

@@ -123,7 +122,7 @@ inline void square_n_mul(Num3072& in_out, const int sq, const Num3072& mul)
123122

124123
} // namespace
125124

126-
/** Indicates wether d is larger than the modulus. */
125+
/** Indicates whether d is larger than the modulus. */
127126
bool Num3072::IsOverflow() const
128127
{
129128
if (this->limbs[0] <= std::numeric_limits<limb_t>::max() - MAX_PRIME_DIFF) return false;
@@ -276,18 +275,33 @@ void Num3072::Divide(const Num3072& a)
276275
if (this->IsOverflow()) this->FullReduce();
277276
}
278277

279-
Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
280-
Num3072 out{};
281-
uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256();
282-
unsigned char tmp[BYTE_SIZE];
283-
ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, BYTE_SIZE);
278+
Num3072::Num3072(const unsigned char (&data)[BYTE_SIZE]) {
279+
for (int i = 0; i < LIMBS; ++i) {
280+
if (sizeof(limb_t) == 4) {
281+
this->limbs[i] = ReadLE32(data + 4 * i);
282+
} else if (sizeof(limb_t) == 8) {
283+
this->limbs[i] = ReadLE64(data + 8 * i);
284+
}
285+
}
286+
}
287+
288+
void Num3072::ToBytes(unsigned char (&out)[BYTE_SIZE]) {
284289
for (int i = 0; i < LIMBS; ++i) {
285290
if (sizeof(limb_t) == 4) {
286-
out.limbs[i] = ReadLE32(tmp + 4 * i);
291+
WriteLE32(out + i * 4, this->limbs[i]);
287292
} else if (sizeof(limb_t) == 8) {
288-
out.limbs[i] = ReadLE64(tmp + 8 * i);
293+
WriteLE64(out + i * 8, this->limbs[i]);
289294
}
290295
}
296+
}
297+
298+
Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
299+
unsigned char tmp[Num3072::BYTE_SIZE];
300+
301+
uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256();
302+
ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, Num3072::BYTE_SIZE);
303+
Num3072 out{tmp};
304+
291305
return out;
292306
}
293307

@@ -301,14 +315,8 @@ void MuHash3072::Finalize(uint256& out) noexcept
301315
m_numerator.Divide(m_denominator);
302316
m_denominator.SetToOne(); // Needed to keep the MuHash object valid
303317

304-
unsigned char data[384];
305-
for (int i = 0; i < LIMBS; ++i) {
306-
if (sizeof(limb_t) == 4) {
307-
WriteLE32(data + i * 4, m_numerator.limbs[i]);
308-
} else if (sizeof(limb_t) == 8) {
309-
WriteLE64(data + i * 8, m_numerator.limbs[i]);
310-
}
311-
}
318+
unsigned char data[Num3072::BYTE_SIZE];
319+
m_numerator.ToBytes(data);
312320

313321
out = (CHashWriter(SER_DISK, 0) << data).GetSHA256();
314322
}

src/crypto/muhash.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class Num3072
2222
Num3072 GetInverse() const;
2323

2424
public:
25+
static constexpr size_t BYTE_SIZE = 384;
2526

2627
#ifdef HAVE___INT128
2728
typedef unsigned __int128 double_limb_t;
@@ -48,8 +49,10 @@ class Num3072
4849
void Divide(const Num3072& a);
4950
void SetToOne();
5051
void Square();
52+
void ToBytes(unsigned char (&out)[BYTE_SIZE]);
5153

5254
Num3072() { this->SetToOne(); };
55+
Num3072(const unsigned char (&data)[BYTE_SIZE]);
5356

5457
SERIALIZE_METHODS(Num3072, obj)
5558
{
@@ -78,7 +81,7 @@ class Num3072
7881
* arbitrary subset of the update operations, allowing them to be
7982
* efficiently combined later.
8083
*
81-
* Muhash does not support checking if an element is already part of the
84+
* MuHash does not support checking if an element is already part of the
8285
* set. That is why this class does not enforce the use of a set as the
8386
* data it represents because there is no efficient way to do so.
8487
* It is possible to add elements more than once and also to remove
@@ -91,8 +94,6 @@ class Num3072
9194
class MuHash3072
9295
{
9396
private:
94-
static constexpr size_t BYTE_SIZE = 384;
95-
9697
Num3072 m_numerator;
9798
Num3072 m_denominator;
9899

src/node/coinstats.cpp

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <node/coinstats.h>
77

88
#include <coins.h>
9+
#include <crypto/muhash.h>
910
#include <hash.h>
1011
#include <serialize.h>
1112
#include <uint256.h>
@@ -24,31 +25,47 @@ static uint64_t GetBogoSize(const CScript& scriptPubKey)
2425
scriptPubKey.size() /* scriptPubKey */;
2526
}
2627

27-
static void ApplyStats(CCoinsStats& stats, CHashWriter& ss, const uint256& hash, const std::map<uint32_t, Coin>& outputs)
28+
static void ApplyHash(CCoinsStats& stats, CHashWriter& ss, const uint256& hash, const std::map<uint32_t, Coin>& outputs, std::map<uint32_t, Coin>::const_iterator it)
2829
{
29-
assert(!outputs.empty());
30-
ss << hash;
31-
ss << VARINT(outputs.begin()->second.nHeight * 2 + outputs.begin()->second.fCoinBase ? 1u : 0u);
32-
stats.nTransactions++;
33-
for (const auto& output : outputs) {
34-
ss << VARINT(output.first + 1);
35-
ss << output.second.out.scriptPubKey;
36-
ss << VARINT_MODE(output.second.out.nValue, VarIntMode::NONNEGATIVE_SIGNED);
37-
stats.nTransactionOutputs++;
38-
stats.nTotalAmount += output.second.out.nValue;
39-
stats.nBogoSize += GetBogoSize(output.second.out.scriptPubKey);
30+
if (it == outputs.begin()) {
31+
ss << hash;
32+
ss << VARINT(it->second.nHeight * 2 + it->second.fCoinBase ? 1u : 0u);
33+
}
34+
35+
ss << VARINT(it->first + 1);
36+
ss << it->second.out.scriptPubKey;
37+
ss << VARINT_MODE(it->second.out.nValue, VarIntMode::NONNEGATIVE_SIGNED);
38+
39+
if (it == std::prev(outputs.end())) {
40+
ss << VARINT(0u);
4041
}
41-
ss << VARINT(0u);
4242
}
4343

44-
static void ApplyStats(CCoinsStats& stats, std::nullptr_t, const uint256& hash, const std::map<uint32_t, Coin>& outputs)
44+
static void ApplyHash(CCoinsStats& stats, std::nullptr_t, const uint256& hash, const std::map<uint32_t, Coin>& outputs, std::map<uint32_t, Coin>::const_iterator it) {}
45+
46+
static void ApplyHash(CCoinsStats& stats, MuHash3072& muhash, const uint256& hash, const std::map<uint32_t, Coin>& outputs, std::map<uint32_t, Coin>::const_iterator it)
47+
{
48+
COutPoint outpoint = COutPoint(hash, it->first);
49+
Coin coin = it->second;
50+
51+
CDataStream ss(SER_DISK, PROTOCOL_VERSION);
52+
ss << outpoint;
53+
ss << static_cast<uint32_t>(coin.nHeight * 2 + coin.fCoinBase);
54+
ss << coin.out;
55+
muhash.Insert(MakeUCharSpan(ss));
56+
}
57+
58+
template <typename T>
59+
static void ApplyStats(CCoinsStats& stats, T& hash_obj, const uint256& hash, const std::map<uint32_t, Coin>& outputs)
4560
{
4661
assert(!outputs.empty());
4762
stats.nTransactions++;
48-
for (const auto& output : outputs) {
63+
for (auto it = outputs.begin(); it != outputs.end(); ++it) {
64+
ApplyHash(stats, hash_obj, hash, outputs, it);
65+
4966
stats.nTransactionOutputs++;
50-
stats.nTotalAmount += output.second.out.nValue;
51-
stats.nBogoSize += GetBogoSize(output.second.out.scriptPubKey);
67+
stats.nTotalAmount += it->second.out.nValue;
68+
stats.nBogoSize += GetBogoSize(it->second.out.scriptPubKey);
5269
}
5370
}
5471

@@ -104,6 +121,10 @@ bool GetUTXOStats(CCoinsView* view, CCoinsStats& stats, CoinStatsHashType hash_t
104121
CHashWriter ss(SER_GETHASH, PROTOCOL_VERSION);
105122
return GetUTXOStats(view, stats, ss, interruption_point);
106123
}
124+
case(CoinStatsHashType::MUHASH): {
125+
MuHash3072 muhash;
126+
return GetUTXOStats(view, stats, muhash, interruption_point);
127+
}
107128
case(CoinStatsHashType::NONE): {
108129
return GetUTXOStats(view, stats, nullptr, interruption_point);
109130
}
@@ -116,10 +137,18 @@ static void PrepareHash(CHashWriter& ss, const CCoinsStats& stats)
116137
{
117138
ss << stats.hashBlock;
118139
}
140+
// MuHash does not need the prepare step
141+
static void PrepareHash(MuHash3072& muhash, CCoinsStats& stats) {}
119142
static void PrepareHash(std::nullptr_t, CCoinsStats& stats) {}
120143

121144
static void FinalizeHash(CHashWriter& ss, CCoinsStats& stats)
122145
{
123146
stats.hashSerialized = ss.GetHash();
124147
}
148+
static void FinalizeHash(MuHash3072& muhash, CCoinsStats& stats)
149+
{
150+
uint256 out;
151+
muhash.Finalize(out);
152+
stats.hashSerialized = out;
153+
}
125154
static void FinalizeHash(std::nullptr_t, CCoinsStats& stats) {}

src/node/coinstats.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class CCoinsView;
1616

1717
enum class CoinStatsHashType {
1818
HASH_SERIALIZED,
19+
MUHASH,
1920
NONE,
2021
};
2122

src/rpc/blockchain.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,13 +1026,26 @@ static RPCHelpMan pruneblockchain()
10261026
};
10271027
}
10281028

1029+
CoinStatsHashType ParseHashType(const std::string& hash_type_input)
1030+
{
1031+
if (hash_type_input == "hash_serialized_2") {
1032+
return CoinStatsHashType::HASH_SERIALIZED;
1033+
} else if (hash_type_input == "muhash") {
1034+
return CoinStatsHashType::MUHASH;
1035+
} else if (hash_type_input == "none") {
1036+
return CoinStatsHashType::NONE;
1037+
} else {
1038+
throw JSONRPCError(RPC_INVALID_PARAMETER, strprintf("%s is not a valid hash_type", hash_type_input));
1039+
}
1040+
}
1041+
10291042
static RPCHelpMan gettxoutsetinfo()
10301043
{
10311044
return RPCHelpMan{"gettxoutsetinfo",
10321045
"\nReturns statistics about the unspent transaction output set.\n"
10331046
"Note this call may take some time.\n",
10341047
{
1035-
{"hash_type", RPCArg::Type::STR, /* default */ "hash_serialized_2", "Which UTXO set hash should be calculated. Options: 'hash_serialized_2' (the legacy algorithm), 'none'."},
1048+
{"hash_type", RPCArg::Type::STR, /* default */ "hash_serialized_2", "Which UTXO set hash should be calculated. Options: 'hash_serialized_2' (the legacy algorithm), 'muhash', 'none'."},
10361049
},
10371050
RPCResult{
10381051
RPCResult::Type::OBJ, "", "",
@@ -1042,7 +1055,8 @@ static RPCHelpMan gettxoutsetinfo()
10421055
{RPCResult::Type::NUM, "transactions", "The number of transactions with unspent outputs"},
10431056
{RPCResult::Type::NUM, "txouts", "The number of unspent transaction outputs"},
10441057
{RPCResult::Type::NUM, "bogosize", "A meaningless metric for UTXO set size"},
1045-
{RPCResult::Type::STR_HEX, "hash_serialized_2", "The serialized hash (only present if 'hash_serialized_2' hash_type is chosen)"},
1058+
{RPCResult::Type::STR_HEX, "hash_serialized_2", /* optional */ true, "The serialized hash (only present if 'hash_serialized_2' hash_type is chosen)"},
1059+
{RPCResult::Type::STR_HEX, "muhash", /* optional */ true, "The serialized hash (only present if 'muhash' hash_type is chosen)"},
10461060
{RPCResult::Type::NUM, "disk_size", "The estimated size of the chainstate on disk"},
10471061
{RPCResult::Type::STR_AMOUNT, "total_amount", "The total amount"},
10481062
}},
@@ -1057,7 +1071,7 @@ static RPCHelpMan gettxoutsetinfo()
10571071
CCoinsStats stats;
10581072
::ChainstateActive().ForceFlushStateToDisk();
10591073

1060-
const CoinStatsHashType hash_type = ParseHashType(request.params[0], CoinStatsHashType::HASH_SERIALIZED);
1074+
const CoinStatsHashType hash_type{request.params[0].isNull() ? CoinStatsHashType::HASH_SERIALIZED : ParseHashType(request.params[0].get_str())};
10611075

10621076
CCoinsView* coins_view = WITH_LOCK(cs_main, return &ChainstateActive().CoinsDB());
10631077
NodeContext& node = EnsureNodeContext(request.context);
@@ -1070,6 +1084,9 @@ static RPCHelpMan gettxoutsetinfo()
10701084
if (hash_type == CoinStatsHashType::HASH_SERIALIZED) {
10711085
ret.pushKV("hash_serialized_2", stats.hashSerialized.GetHex());
10721086
}
1087+
if (hash_type == CoinStatsHashType::MUHASH) {
1088+
ret.pushKV("muhash", stats.hashSerialized.GetHex());
1089+
}
10731090
ret.pushKV("disk_size", stats.nDiskSize);
10741091
ret.pushKV("total_amount", ValueFromAmount(stats.nTotalAmount));
10751092
} else {

src/rpc/util.cpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -113,23 +113,6 @@ std::vector<unsigned char> ParseHexO(const UniValue& o, std::string strKey)
113113
return ParseHexV(find_value(o, strKey), strKey);
114114
}
115115

116-
CoinStatsHashType ParseHashType(const UniValue& param, const CoinStatsHashType default_type)
117-
{
118-
if (param.isNull()) {
119-
return default_type;
120-
} else {
121-
std::string hash_type_input = param.get_str();
122-
123-
if (hash_type_input == "hash_serialized_2") {
124-
return CoinStatsHashType::HASH_SERIALIZED;
125-
} else if (hash_type_input == "none") {
126-
return CoinStatsHashType::NONE;
127-
} else {
128-
throw JSONRPCError(RPC_INVALID_PARAMETER, strprintf("%d is not a valid hash_type", hash_type_input));
129-
}
130-
}
131-
}
132-
133116
std::string HelpExampleCli(const std::string& methodname, const std::string& args)
134117
{
135118
return "> bitcoin-cli " + methodname + " " + args + "\n";

src/rpc/util.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,6 @@ extern uint256 ParseHashO(const UniValue& o, std::string strKey);
7777
extern std::vector<unsigned char> ParseHexV(const UniValue& v, std::string strName);
7878
extern std::vector<unsigned char> ParseHexO(const UniValue& o, std::string strKey);
7979

80-
CoinStatsHashType ParseHashType(const UniValue& param, const CoinStatsHashType default_type);
81-
8280
extern CAmount AmountFromValue(const UniValue& value);
8381
extern std::string HelpExampleCli(const std::string& methodname, const std::string& args);
8482
extern std::string HelpExampleRpc(const std::string& methodname, const std::string& args);
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) 2020-2021 The Bitcoin Core developers
3+
# Distributed under the MIT software license, see the accompanying
4+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
5+
"""Test UTXO set hash value calculation in gettxoutsetinfo."""
6+
7+
import struct
8+
9+
from test_framework.blocktools import create_transaction
10+
from test_framework.messages import (
11+
CBlock,
12+
COutPoint,
13+
FromHex,
14+
)
15+
from test_framework.muhash import MuHash3072
16+
from test_framework.test_framework import BitcoinTestFramework
17+
from test_framework.util import assert_equal
18+
19+
class UTXOSetHashTest(BitcoinTestFramework):
20+
def set_test_params(self):
21+
self.num_nodes = 1
22+
self.setup_clean_chain = True
23+
24+
def skip_test_if_missing_module(self):
25+
self.skip_if_no_wallet()
26+
27+
def test_deterministic_hash_results(self):
28+
self.log.info("Test deterministic UTXO set hash results")
29+
30+
# These depend on the setup_clean_chain option, the chain loaded from the cache
31+
assert_equal(self.nodes[0].gettxoutsetinfo()['hash_serialized_2'], "b32ec1dda5a53cd025b95387aad344a801825fe46a60ff952ce26528f01d3be8")
32+
assert_equal(self.nodes[0].gettxoutsetinfo("muhash")['muhash'], "dd5ad2a105c2d29495f577245c357409002329b9f4d6182c0af3dc2f462555c8")
33+
34+
def test_muhash_implementation(self):
35+
self.log.info("Test MuHash implementation consistency")
36+
37+
node = self.nodes[0]
38+
39+
# Generate 100 blocks and remove the first since we plan to spend its
40+
# coinbase
41+
block_hashes = node.generate(100)
42+
blocks = list(map(lambda block: FromHex(CBlock(), node.getblock(block, False)), block_hashes))
43+
spending = blocks.pop(0)
44+
45+
# Create a spending transaction and mine a block which includes it
46+
tx = create_transaction(node, spending.vtx[0].rehash(), node.getnewaddress(), amount=49)
47+
txid = node.sendrawtransaction(hexstring=tx.serialize_with_witness().hex(), maxfeerate=0)
48+
49+
tx_block = node.generateblock(output=node.getnewaddress(), transactions=[txid])
50+
blocks.append(FromHex(CBlock(), node.getblock(tx_block['hash'], False)))
51+
52+
# Serialize the outputs that should be in the UTXO set and add them to
53+
# a MuHash object
54+
muhash = MuHash3072()
55+
56+
for height, block in enumerate(blocks):
57+
# The Genesis block coinbase is not part of the UTXO set and we
58+
# spent the first mined block
59+
height += 2
60+
61+
for tx in block.vtx:
62+
for n, tx_out in enumerate(tx.vout):
63+
coinbase = 1 if not tx.vin[0].prevout.hash else 0
64+
65+
# Skip witness commitment
66+
if (coinbase and n > 0):
67+
continue
68+
69+
data = COutPoint(int(tx.rehash(), 16), n).serialize()
70+
data += struct.pack("<i", height * 2 + coinbase)
71+
data += tx_out.serialize()
72+
73+
muhash.insert(data)
74+
75+
finalized = muhash.digest()
76+
node_muhash = node.gettxoutsetinfo("muhash")['muhash']
77+
78+
assert_equal(finalized[::-1].hex(), node_muhash)
79+
80+
def run_test(self):
81+
self.test_deterministic_hash_results()
82+
self.test_muhash_implementation()
83+
84+
85+
if __name__ == '__main__':
86+
UTXOSetHashTest().main()

0 commit comments

Comments
 (0)