|
| 1 | +// Copyright (c) 2018 The Bitcoin Core developers |
| 2 | +// Distributed under the MIT software license, see the accompanying |
| 3 | +// file COPYING or http://www.opensource.org/licenses/mit-license.php. |
| 4 | + |
| 5 | +#include <blockfilter.h> |
| 6 | +#include <hash.h> |
| 7 | +#include <streams.h> |
| 8 | + |
| 9 | +/// SerType used to serialize parameters in GCS filter encoding. |
| 10 | +static constexpr int GCS_SER_TYPE = SER_NETWORK; |
| 11 | + |
| 12 | +/// Protocol version used to serialize parameters in GCS filter encoding. |
| 13 | +static constexpr int GCS_SER_VERSION = 0; |
| 14 | + |
| 15 | +template <typename OStream> |
| 16 | +static void GolombRiceEncode(BitStreamWriter<OStream>& bitwriter, uint8_t P, uint64_t x) |
| 17 | +{ |
| 18 | + // Write quotient as unary-encoded: q 1's followed by one 0. |
| 19 | + uint64_t q = x >> P; |
| 20 | + while (q > 0) { |
| 21 | + int nbits = q <= 64 ? static_cast<int>(q) : 64; |
| 22 | + bitwriter.Write(~0ULL, nbits); |
| 23 | + q -= nbits; |
| 24 | + } |
| 25 | + bitwriter.Write(0, 1); |
| 26 | + |
| 27 | + // Write the remainder in P bits. Since the remainder is just the bottom |
| 28 | + // P bits of x, there is no need to mask first. |
| 29 | + bitwriter.Write(x, P); |
| 30 | +} |
| 31 | + |
| 32 | +template <typename IStream> |
| 33 | +static uint64_t GolombRiceDecode(BitStreamReader<IStream>& bitreader, uint8_t P) |
| 34 | +{ |
| 35 | + // Read unary-encoded quotient: q 1's followed by one 0. |
| 36 | + uint64_t q = 0; |
| 37 | + while (bitreader.Read(1) == 1) { |
| 38 | + ++q; |
| 39 | + } |
| 40 | + |
| 41 | + uint64_t r = bitreader.Read(P); |
| 42 | + |
| 43 | + return (q << P) + r; |
| 44 | +} |
| 45 | + |
| 46 | +// Map a value x that is uniformly distributed in the range [0, 2^64) to a |
| 47 | +// value uniformly distributed in [0, n) by returning the upper 64 bits of |
| 48 | +// x * n. |
| 49 | +// |
| 50 | +// See: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ |
| 51 | +static uint64_t MapIntoRange(uint64_t x, uint64_t n) |
| 52 | +{ |
| 53 | + // To perform the calculation on 64-bit numbers without losing the |
| 54 | + // result to overflow, split the numbers into the most significant and |
| 55 | + // least significant 32 bits and perform multiplication piece-wise. |
| 56 | + // |
| 57 | + // See: https://stackoverflow.com/a/26855440 |
| 58 | + uint64_t x_hi = x >> 32; |
| 59 | + uint64_t x_lo = x & 0xFFFFFFFF; |
| 60 | + uint64_t n_hi = n >> 32; |
| 61 | + uint64_t n_lo = n & 0xFFFFFFFF; |
| 62 | + |
| 63 | + uint64_t ac = x_hi * n_hi; |
| 64 | + uint64_t ad = x_hi * n_lo; |
| 65 | + uint64_t bc = x_lo * n_hi; |
| 66 | + uint64_t bd = x_lo * n_lo; |
| 67 | + |
| 68 | + uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF); |
| 69 | + uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32); |
| 70 | + return upper64; |
| 71 | +} |
| 72 | + |
| 73 | +uint64_t GCSFilter::HashToRange(const Element& element) const |
| 74 | +{ |
| 75 | + uint64_t hash = CSipHasher(m_siphash_k0, m_siphash_k1) |
| 76 | + .Write(element.data(), element.size()) |
| 77 | + .Finalize(); |
| 78 | + return MapIntoRange(hash, m_F); |
| 79 | +} |
| 80 | + |
| 81 | +std::vector<uint64_t> GCSFilter::BuildHashedSet(const ElementSet& elements) const |
| 82 | +{ |
| 83 | + std::vector<uint64_t> hashed_elements; |
| 84 | + hashed_elements.reserve(elements.size()); |
| 85 | + for (const Element& element : elements) { |
| 86 | + hashed_elements.push_back(HashToRange(element)); |
| 87 | + } |
| 88 | + std::sort(hashed_elements.begin(), hashed_elements.end()); |
| 89 | + return hashed_elements; |
| 90 | +} |
| 91 | + |
| 92 | +GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M) |
| 93 | + : m_siphash_k0(siphash_k0), m_siphash_k1(siphash_k1), m_P(P), m_M(M), m_N(0), m_F(0) |
| 94 | +{} |
| 95 | + |
| 96 | +GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M, |
| 97 | + std::vector<unsigned char> encoded_filter) |
| 98 | + : GCSFilter(siphash_k0, siphash_k1, P, M) |
| 99 | +{ |
| 100 | + m_encoded = std::move(encoded_filter); |
| 101 | + |
| 102 | + VectorReader stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0); |
| 103 | + |
| 104 | + uint64_t N = ReadCompactSize(stream); |
| 105 | + m_N = static_cast<uint32_t>(N); |
| 106 | + if (m_N != N) { |
| 107 | + throw std::ios_base::failure("N must be <2^32"); |
| 108 | + } |
| 109 | + m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_M); |
| 110 | + |
| 111 | + // Verify that the encoded filter contains exactly N elements. If it has too much or too little |
| 112 | + // data, a std::ios_base::failure exception will be raised. |
| 113 | + BitStreamReader<VectorReader> bitreader(stream); |
| 114 | + for (uint64_t i = 0; i < m_N; ++i) { |
| 115 | + GolombRiceDecode(bitreader, m_P); |
| 116 | + } |
| 117 | + if (!stream.empty()) { |
| 118 | + throw std::ios_base::failure("encoded_filter contains excess data"); |
| 119 | + } |
| 120 | +} |
| 121 | + |
| 122 | +GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M, |
| 123 | + const ElementSet& elements) |
| 124 | + : GCSFilter(siphash_k0, siphash_k1, P, M) |
| 125 | +{ |
| 126 | + size_t N = elements.size(); |
| 127 | + m_N = static_cast<uint32_t>(N); |
| 128 | + if (m_N != N) { |
| 129 | + throw std::invalid_argument("N must be <2^32"); |
| 130 | + } |
| 131 | + m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_M); |
| 132 | + |
| 133 | + CVectorWriter stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0); |
| 134 | + |
| 135 | + WriteCompactSize(stream, m_N); |
| 136 | + |
| 137 | + if (elements.empty()) { |
| 138 | + return; |
| 139 | + } |
| 140 | + |
| 141 | + BitStreamWriter<CVectorWriter> bitwriter(stream); |
| 142 | + |
| 143 | + uint64_t last_value = 0; |
| 144 | + for (uint64_t value : BuildHashedSet(elements)) { |
| 145 | + uint64_t delta = value - last_value; |
| 146 | + GolombRiceEncode(bitwriter, m_P, delta); |
| 147 | + last_value = value; |
| 148 | + } |
| 149 | + |
| 150 | + bitwriter.Flush(); |
| 151 | +} |
0 commit comments