Skip to content

Commit cf70b55

Browse files
Jim Posenjimpo
authored andcommitted
blockfilter: Implement GCSFilter constructors.
1 parent c454f0a commit cf70b55

File tree

3 files changed

+157
-0
lines changed

3 files changed

+157
-0
lines changed

src/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ libbitcoin_server_a_SOURCES = \
220220
addrman.cpp \
221221
bloom.cpp \
222222
blockencodings.cpp \
223+
blockfilter.cpp \
223224
chain.cpp \
224225
checkpoints.cpp \
225226
consensus/tx_verify.cpp \

src/blockfilter.cpp

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
// Copyright (c) 2018 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include <blockfilter.h>
6+
#include <hash.h>
7+
#include <streams.h>
8+
9+
/// SerType used to serialize parameters in GCS filter encoding.
10+
static constexpr int GCS_SER_TYPE = SER_NETWORK;
11+
12+
/// Protocol version used to serialize parameters in GCS filter encoding.
13+
static constexpr int GCS_SER_VERSION = 0;
14+
15+
template <typename OStream>
16+
static void GolombRiceEncode(BitStreamWriter<OStream>& bitwriter, uint8_t P, uint64_t x)
17+
{
18+
// Write quotient as unary-encoded: q 1's followed by one 0.
19+
uint64_t q = x >> P;
20+
while (q > 0) {
21+
int nbits = q <= 64 ? static_cast<int>(q) : 64;
22+
bitwriter.Write(~0ULL, nbits);
23+
q -= nbits;
24+
}
25+
bitwriter.Write(0, 1);
26+
27+
// Write the remainder in P bits. Since the remainder is just the bottom
28+
// P bits of x, there is no need to mask first.
29+
bitwriter.Write(x, P);
30+
}
31+
32+
template <typename IStream>
33+
static uint64_t GolombRiceDecode(BitStreamReader<IStream>& bitreader, uint8_t P)
34+
{
35+
// Read unary-encoded quotient: q 1's followed by one 0.
36+
uint64_t q = 0;
37+
while (bitreader.Read(1) == 1) {
38+
++q;
39+
}
40+
41+
uint64_t r = bitreader.Read(P);
42+
43+
return (q << P) + r;
44+
}
45+
46+
// Map a value x that is uniformly distributed in the range [0, 2^64) to a
47+
// value uniformly distributed in [0, n) by returning the upper 64 bits of
48+
// x * n.
49+
//
50+
// See: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
51+
static uint64_t MapIntoRange(uint64_t x, uint64_t n)
52+
{
53+
// To perform the calculation on 64-bit numbers without losing the
54+
// result to overflow, split the numbers into the most significant and
55+
// least significant 32 bits and perform multiplication piece-wise.
56+
//
57+
// See: https://stackoverflow.com/a/26855440
58+
uint64_t x_hi = x >> 32;
59+
uint64_t x_lo = x & 0xFFFFFFFF;
60+
uint64_t n_hi = n >> 32;
61+
uint64_t n_lo = n & 0xFFFFFFFF;
62+
63+
uint64_t ac = x_hi * n_hi;
64+
uint64_t ad = x_hi * n_lo;
65+
uint64_t bc = x_lo * n_hi;
66+
uint64_t bd = x_lo * n_lo;
67+
68+
uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF);
69+
uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32);
70+
return upper64;
71+
}
72+
73+
uint64_t GCSFilter::HashToRange(const Element& element) const
74+
{
75+
uint64_t hash = CSipHasher(m_siphash_k0, m_siphash_k1)
76+
.Write(element.data(), element.size())
77+
.Finalize();
78+
return MapIntoRange(hash, m_F);
79+
}
80+
81+
std::vector<uint64_t> GCSFilter::BuildHashedSet(const ElementSet& elements) const
82+
{
83+
std::vector<uint64_t> hashed_elements;
84+
hashed_elements.reserve(elements.size());
85+
for (const Element& element : elements) {
86+
hashed_elements.push_back(HashToRange(element));
87+
}
88+
std::sort(hashed_elements.begin(), hashed_elements.end());
89+
return hashed_elements;
90+
}
91+
92+
GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M)
93+
: m_siphash_k0(siphash_k0), m_siphash_k1(siphash_k1), m_P(P), m_M(M), m_N(0), m_F(0)
94+
{}
95+
96+
GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M,
97+
std::vector<unsigned char> encoded_filter)
98+
: GCSFilter(siphash_k0, siphash_k1, P, M)
99+
{
100+
m_encoded = std::move(encoded_filter);
101+
102+
VectorReader stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0);
103+
104+
uint64_t N = ReadCompactSize(stream);
105+
m_N = static_cast<uint32_t>(N);
106+
if (m_N != N) {
107+
throw std::ios_base::failure("N must be <2^32");
108+
}
109+
m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_M);
110+
111+
// Verify that the encoded filter contains exactly N elements. If it has too much or too little
112+
// data, a std::ios_base::failure exception will be raised.
113+
BitStreamReader<VectorReader> bitreader(stream);
114+
for (uint64_t i = 0; i < m_N; ++i) {
115+
GolombRiceDecode(bitreader, m_P);
116+
}
117+
if (!stream.empty()) {
118+
throw std::ios_base::failure("encoded_filter contains excess data");
119+
}
120+
}
121+
122+
GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M,
123+
const ElementSet& elements)
124+
: GCSFilter(siphash_k0, siphash_k1, P, M)
125+
{
126+
size_t N = elements.size();
127+
m_N = static_cast<uint32_t>(N);
128+
if (m_N != N) {
129+
throw std::invalid_argument("N must be <2^32");
130+
}
131+
m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_M);
132+
133+
CVectorWriter stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0);
134+
135+
WriteCompactSize(stream, m_N);
136+
137+
if (elements.empty()) {
138+
return;
139+
}
140+
141+
BitStreamWriter<CVectorWriter> bitwriter(stream);
142+
143+
uint64_t last_value = 0;
144+
for (uint64_t value : BuildHashedSet(elements)) {
145+
uint64_t delta = value - last_value;
146+
GolombRiceEncode(bitwriter, m_P, delta);
147+
last_value = value;
148+
}
149+
150+
bitwriter.Flush();
151+
}

src/blockfilter.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ class GCSFilter
3131
uint64_t m_F; //!< Range of element hashes, F = N * M
3232
std::vector<unsigned char> m_encoded;
3333

34+
/** Hash a data element to an integer in the range [0, N * M). */
35+
uint64_t HashToRange(const Element& element) const;
36+
37+
std::vector<uint64_t> BuildHashedSet(const ElementSet& elements) const;
38+
3439
public:
3540

3641
/** Constructs an empty filter. */

0 commit comments

Comments
 (0)