Skip to content

Commit 5abbc9a

Browse files
committed
Merge bitcoin/bitcoin#24832: index: Verify the block filter hash when reading the filter from disk.
e734228 Update GCSFilter benchmarks (Calvin Kim) aee9a81 Add GCSFilterDecodeSkipCheck benchmark (Patrick Strateman) 299023c Add GCSFilterDecode and GCSBlockFilterGetHash benchmarks. (Patrick Strateman) b0a53d5 Make sanity check in GCSFilter constructor optional (Patrick Strateman) Pull request description: This PR picks up the abandoned #19280 BlockFilterIndex was depending on `GolombRiceDecode()` during the filter decode to sanity check that the filter wasn't corrupt. However, we can check for corruption by ensuring that the encoded blockfilter's hash matches up with the one stored in the index database. Benchmarks that were added in #19280 showed that checking the hash is much faster. The benchmarks were changed to nanobench and the relevant benchmarks were like below, showing a clear win for the hash check method. ``` | ns/elem | elem/s | err% | ins/elem | bra/elem | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|---------------:|--------:|----------:|:---------- | 531.40 | 1,881,819.43 | 0.3% | 3,527.01 | 411.00 | 0.2% | 0.01 | `DecodeCheckedGCSFilter` | 258,220.50 | 3,872.66 | 0.1% | 2,990,092.00 | 586,706.00 | 1.7% | 0.01 | `DecodeGCSFilter` | 13,036.77 | 76,706.09 | 0.3% | 64,238.24 | 513.04 | 0.2% | 0.01 | `BlockFilterGetHash` ``` ACKs for top commit: mzumsande: Code Review ACK e734228 theStack: Code-review ACK e734228 stickies-v: ACK e734228 ryanofsky: Code review ACK e734228, with caveat that I mostly paid attention to the main code, not the changes to the benchmark. Only changes since last review were changes to the benchmark code. Tree-SHA512: 02b86eab7b554e1a57a15b17a4d6d71faa91b556c637b0da29f0c9ee76597a110be8e3b4d0c158d4cab04af0623de18b764837be0ec2a72afcfe1ad9c78a83c6
2 parents d571cf2 + e734228 commit 5abbc9a

File tree

5 files changed

+80
-28
lines changed

5 files changed

+80
-28
lines changed

src/bench/gcs_filter.cpp

Lines changed: 62 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,39 +5,84 @@
55
#include <bench/bench.h>
66
#include <blockfilter.h>
77

8-
static void ConstructGCSFilter(benchmark::Bench& bench)
8+
static const GCSFilter::ElementSet GenerateGCSTestElements()
99
{
1010
GCSFilter::ElementSet elements;
11-
for (int i = 0; i < 10000; ++i) {
11+
12+
// Testing the benchmarks with different number of elements show that a filter
13+
// with at least 100,000 elements results in benchmarks that have the same
14+
// ns/op. This makes it easy to reason about how long (in nanoseconds) a single
15+
// filter element takes to process.
16+
for (int i = 0; i < 100000; ++i) {
1217
GCSFilter::Element element(32);
1318
element[0] = static_cast<unsigned char>(i);
1419
element[1] = static_cast<unsigned char>(i >> 8);
1520
elements.insert(std::move(element));
1621
}
1722

23+
return elements;
24+
}
25+
26+
static void GCSBlockFilterGetHash(benchmark::Bench& bench)
27+
{
28+
auto elements = GenerateGCSTestElements();
29+
30+
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
31+
BlockFilter block_filter(BlockFilterType::BASIC, {}, filter.GetEncoded(), /*skip_decode_check=*/false);
32+
33+
bench.run([&] {
34+
block_filter.GetHash();
35+
});
36+
}
37+
38+
static void GCSFilterConstruct(benchmark::Bench& bench)
39+
{
40+
auto elements = GenerateGCSTestElements();
41+
1842
uint64_t siphash_k0 = 0;
19-
bench.batch(elements.size()).unit("elem").run([&] {
20-
GCSFilter filter({siphash_k0, 0, 20, 1 << 20}, elements);
43+
bench.run([&]{
44+
GCSFilter filter({siphash_k0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
2145

2246
siphash_k0++;
2347
});
2448
}
2549

26-
static void MatchGCSFilter(benchmark::Bench& bench)
50+
static void GCSFilterDecode(benchmark::Bench& bench)
2751
{
28-
GCSFilter::ElementSet elements;
29-
for (int i = 0; i < 10000; ++i) {
30-
GCSFilter::Element element(32);
31-
element[0] = static_cast<unsigned char>(i);
32-
element[1] = static_cast<unsigned char>(i >> 8);
33-
elements.insert(std::move(element));
34-
}
35-
GCSFilter filter({0, 0, 20, 1 << 20}, elements);
52+
auto elements = GenerateGCSTestElements();
3653

37-
bench.unit("elem").run([&] {
38-
filter.Match(GCSFilter::Element());
54+
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
55+
auto encoded = filter.GetEncoded();
56+
57+
bench.run([&] {
58+
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/false);
3959
});
4060
}
4161

42-
BENCHMARK(ConstructGCSFilter);
43-
BENCHMARK(MatchGCSFilter);
62+
static void GCSFilterDecodeSkipCheck(benchmark::Bench& bench)
63+
{
64+
auto elements = GenerateGCSTestElements();
65+
66+
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
67+
auto encoded = filter.GetEncoded();
68+
69+
bench.run([&] {
70+
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, encoded, /*skip_decode_check=*/true);
71+
});
72+
}
73+
74+
static void GCSFilterMatch(benchmark::Bench& bench)
75+
{
76+
auto elements = GenerateGCSTestElements();
77+
78+
GCSFilter filter({0, 0, BASIC_FILTER_P, BASIC_FILTER_M}, elements);
79+
80+
bench.run([&] {
81+
filter.Match(GCSFilter::Element());
82+
});
83+
}
84+
BENCHMARK(GCSBlockFilterGetHash);
85+
BENCHMARK(GCSFilterConstruct);
86+
BENCHMARK(GCSFilterDecode);
87+
BENCHMARK(GCSFilterDecodeSkipCheck);
88+
BENCHMARK(GCSFilterMatch);

src/blockfilter.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ GCSFilter::GCSFilter(const Params& params)
4747
: m_params(params), m_N(0), m_F(0), m_encoded{0}
4848
{}
4949

50-
GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter)
50+
GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter, bool skip_decode_check)
5151
: m_params(params), m_encoded(std::move(encoded_filter))
5252
{
5353
SpanReader stream{GCS_SER_TYPE, GCS_SER_VERSION, m_encoded};
@@ -59,6 +59,8 @@ GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_fi
5959
}
6060
m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_params.m_M);
6161

62+
if (skip_decode_check) return;
63+
6264
// Verify that the encoded filter contains exactly N elements. If it has too much or too little
6365
// data, a std::ios_base::failure exception will be raised.
6466
BitStreamReader<SpanReader> bitreader{stream};
@@ -219,14 +221,14 @@ static GCSFilter::ElementSet BasicFilterElements(const CBlock& block,
219221
}
220222

221223
BlockFilter::BlockFilter(BlockFilterType filter_type, const uint256& block_hash,
222-
std::vector<unsigned char> filter)
224+
std::vector<unsigned char> filter, bool skip_decode_check)
223225
: m_filter_type(filter_type), m_block_hash(block_hash)
224226
{
225227
GCSFilter::Params params;
226228
if (!BuildParams(params)) {
227229
throw std::invalid_argument("unknown filter_type");
228230
}
229-
m_filter = GCSFilter(params, std::move(filter));
231+
m_filter = GCSFilter(params, std::move(filter), skip_decode_check);
230232
}
231233

232234
BlockFilter::BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo)

src/blockfilter.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class GCSFilter
5959
explicit GCSFilter(const Params& params = Params());
6060

6161
/** Reconstructs an already-created filter from an encoding. */
62-
GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter);
62+
GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter, bool skip_decode_check);
6363

6464
/** Builds a new filter from the params and set of elements. */
6565
GCSFilter(const Params& params, const ElementSet& elements);
@@ -122,7 +122,7 @@ class BlockFilter
122122

123123
//! Reconstruct a BlockFilter from parts.
124124
BlockFilter(BlockFilterType filter_type, const uint256& block_hash,
125-
std::vector<unsigned char> filter);
125+
std::vector<unsigned char> filter, bool skip_decode_check);
126126

127127
//! Construct a new BlockFilter of the specified type from a block.
128128
BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo);
@@ -164,7 +164,7 @@ class BlockFilter
164164
if (!BuildParams(params)) {
165165
throw std::ios_base::failure("unknown filter_type");
166166
}
167-
m_filter = GCSFilter(params, std::move(encoded_filter));
167+
m_filter = GCSFilter(params, std::move(encoded_filter), /*skip_decode_check=*/false);
168168
}
169169
};
170170

src/index/blockfilterindex.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <map>
66

77
#include <dbwrapper.h>
8+
#include <hash.h>
89
#include <index/blockfilterindex.h>
910
#include <node/blockstorage.h>
1011
#include <util/system.h>
@@ -143,18 +144,22 @@ bool BlockFilterIndex::CommitInternal(CDBBatch& batch)
143144
return BaseIndex::CommitInternal(batch);
144145
}
145146

146-
bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const
147+
bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const
147148
{
148149
CAutoFile filein(m_filter_fileseq->Open(pos, true), SER_DISK, CLIENT_VERSION);
149150
if (filein.IsNull()) {
150151
return false;
151152
}
152153

154+
// Check that the hash of the encoded_filter matches the one stored in the db.
153155
uint256 block_hash;
154156
std::vector<uint8_t> encoded_filter;
155157
try {
156158
filein >> block_hash >> encoded_filter;
157-
filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter));
159+
uint256 result;
160+
CHash256().Write(encoded_filter).Finalize(result);
161+
if (result != hash) return error("Checksum mismatch in filter decode.");
162+
filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true);
158163
}
159164
catch (const std::exception& e) {
160165
return error("%s: Failed to deserialize block filter from disk: %s", __func__, e.what());
@@ -381,7 +386,7 @@ bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter&
381386
return false;
382387
}
383388

384-
return ReadFilterFromDisk(entry.pos, filter_out);
389+
return ReadFilterFromDisk(entry.pos, entry.hash, filter_out);
385390
}
386391

387392
bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out)
@@ -425,7 +430,7 @@ bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* st
425430
filters_out.resize(entries.size());
426431
auto filter_pos_it = filters_out.begin();
427432
for (const auto& entry : entries) {
428-
if (!ReadFilterFromDisk(entry.pos, *filter_pos_it)) {
433+
if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) {
429434
return false;
430435
}
431436
++filter_pos_it;

src/index/blockfilterindex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class BlockFilterIndex final : public BaseIndex
3131
FlatFilePos m_next_filter_pos;
3232
std::unique_ptr<FlatFileSeq> m_filter_fileseq;
3333

34-
bool ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const;
34+
bool ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const;
3535
size_t WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter);
3636

3737
Mutex m_cs_headers_cache;

0 commit comments

Comments
 (0)