Merge #18190: tests: Add fuzzing harness for Golomb-Rice coding (GolombRiceEncode/GolombRiceDecode)

MarcoFalke · MarcoFalke · commit c4c3f110eb93 · 2020-04-20T15:32:41.000-04:00
69749fb tests: Add fuzzing harness for Golomb-Rice coding (GolombRiceEncode/GolombRiceDecode) (practicalswift) Pull request description: Add fuzzing harness for Golomb-Rice coding (`GolombRiceEncode`/`GolombRiceDecode`). Test this PR using: ``` $ make distclean $ ./autogen.sh $ CC=clang CXX=clang++ ./configure --enable-fuzz \ --with-sanitizers=address,fuzzer,undefined $ make $ src/test/fuzz/golomb_rice … ``` Top commit has no ACKs. Tree-SHA512: 1b26512301b8c22ab3b804d9b9e4baf933f26f8c05e462d583863badcec7e694548a34849a0d7c4ff7d58b19f6338b51819976ecf642bc4659b04ef71182d748
diff --git a/src/Makefile.am b/src/Makefile.am
@@ -215,6 +215,7 @@ BITCOIN_CORE_H = \
   util/check.h \
   util/error.h \
   util/fees.h \
+  util/golombrice.h \
   util/spanparsing.h \
   util/system.h \
   util/macros.h \
diff --git a/src/Makefile.test.include b/src/Makefile.test.include
@@ -41,6 +41,7 @@ FUZZ_TARGETS = \
   test/fuzz/flat_file_pos_deserialize \
   test/fuzz/flatfile \
   test/fuzz/float \
+  test/fuzz/golomb_rice \
   test/fuzz/hex \
   test/fuzz/http_request \
   test/fuzz/integer \
@@ -517,6 +518,12 @@ test_fuzz_float_LDADD = $(FUZZ_SUITE_LD_COMMON)
 test_fuzz_float_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(LIBTOOL_APP_LDFLAGS)
 test_fuzz_float_SOURCES = test/fuzz/float.cpp
 
+test_fuzz_golomb_rice_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
+test_fuzz_golomb_rice_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
+test_fuzz_golomb_rice_LDADD = $(FUZZ_SUITE_LD_COMMON)
+test_fuzz_golomb_rice_LDFLAGS = $(RELDFLAGS) $(AM_LDFLAGS) $(LIBTOOL_APP_LDFLAGS)
+test_fuzz_golomb_rice_SOURCES = test/fuzz/golomb_rice.cpp
+
 test_fuzz_hex_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES)
 test_fuzz_hex_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
 test_fuzz_hex_LDADD = $(FUZZ_SUITE_LD_COMMON)
diff --git a/src/blockfilter.cpp b/src/blockfilter.cpp
@@ -12,6 +12,7 @@
 #include <primitives/transaction.h>
 #include <script/script.h>
 #include <streams.h>
+#include <util/golombrice.h>
 
 /// SerType used to serialize parameters in GCS filter encoding.
 static constexpr int GCS_SER_TYPE = SER_NETWORK;
@@ -23,37 +24,6 @@ static const std::map<BlockFilterType, std::string> g_filter_types = {
     {BlockFilterType::BASIC, "basic"},
 };
 
-template <typename OStream>
-static void GolombRiceEncode(BitStreamWriter<OStream>& bitwriter, uint8_t P, uint64_t x)
-{
-    // Write quotient as unary-encoded: q 1's followed by one 0.
-    uint64_t q = x >> P;
-    while (q > 0) {
-        int nbits = q <= 64 ? static_cast<int>(q) : 64;
-        bitwriter.Write(~0ULL, nbits);
-        q -= nbits;
-    }
-    bitwriter.Write(0, 1);
-
-    // Write the remainder in P bits. Since the remainder is just the bottom
-    // P bits of x, there is no need to mask first.
-    bitwriter.Write(x, P);
-}
-
-template <typename IStream>
-static uint64_t GolombRiceDecode(BitStreamReader<IStream>& bitreader, uint8_t P)
-{
-    // Read unary-encoded quotient: q 1's followed by one 0.
-    uint64_t q = 0;
-    while (bitreader.Read(1) == 1) {
-        ++q;
-    }
-
-    uint64_t r = bitreader.Read(P);
-
-    return (q << P) + r;
-}
-
 // Map a value x that is uniformly distributed in the range [0, 2^64) to a
 // value uniformly distributed in [0, n) by returning the upper 64 bits of
 // x * n.
diff --git a/src/test/fuzz/golomb_rice.cpp b/src/test/fuzz/golomb_rice.cpp
@@ -0,0 +1,112 @@
+// Copyright (c) 2020 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <blockfilter.h>
+#include <serialize.h>
+#include <streams.h>
+#include <test/fuzz/fuzz.h>
+#include <test/fuzz/FuzzedDataProvider.h>
+#include <test/fuzz/util.h>
+#include <util/bytevectorhash.h>
+#include <util/golombrice.h>
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iosfwd>
+#include <unordered_set>
+#include <vector>
+
+namespace {
+uint64_t MapIntoRange(const uint64_t x, const uint64_t n)
+{
+    const uint64_t x_hi = x >> 32;
+    const uint64_t x_lo = x & 0xFFFFFFFF;
+    const uint64_t n_hi = n >> 32;
+    const uint64_t n_lo = n & 0xFFFFFFFF;
+    const uint64_t ac = x_hi * n_hi;
+    const uint64_t ad = x_hi * n_lo;
+    const uint64_t bc = x_lo * n_hi;
+    const uint64_t bd = x_lo * n_lo;
+    const uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF);
+    const uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32);
+    return upper64;
+}
+
+uint64_t HashToRange(const std::vector<uint8_t>& element, const uint64_t f)
+{
+    const uint64_t hash = CSipHasher(0x0706050403020100ULL, 0x0F0E0D0C0B0A0908ULL)
+                              .Write(element.data(), element.size())
+                              .Finalize();
+    return MapIntoRange(hash, f);
+}
+
+std::vector<uint64_t> BuildHashedSet(const std::unordered_set<std::vector<uint8_t>, ByteVectorHash>& elements, const uint64_t f)
+{
+    std::vector<uint64_t> hashed_elements;
+    hashed_elements.reserve(elements.size());
+    for (const std::vector<uint8_t>& element : elements) {
+        hashed_elements.push_back(HashToRange(element, f));
+    }
+    std::sort(hashed_elements.begin(), hashed_elements.end());
+    return hashed_elements;
+}
+} // namespace
+
+void test_one_input(const std::vector<uint8_t>& buffer)
+{
+    FuzzedDataProvider fuzzed_data_provider(buffer.data(), buffer.size());
+    std::vector<uint8_t> golomb_rice_data;
+    std::vector<uint64_t> encoded_deltas;
+    {
+        std::unordered_set<std::vector<uint8_t>, ByteVectorHash> elements;
+        const int n = fuzzed_data_provider.ConsumeIntegralInRange<int>(0, 512);
+        for (int i = 0; i < n; ++i) {
+            elements.insert(ConsumeRandomLengthByteVector(fuzzed_data_provider, 16));
+        }
+        CVectorWriter stream(SER_NETWORK, 0, golomb_rice_data, 0);
+        WriteCompactSize(stream, static_cast<uint32_t>(elements.size()));
+        BitStreamWriter<CVectorWriter> bitwriter(stream);
+        if (!elements.empty()) {
+            uint64_t last_value = 0;
+            for (const uint64_t value : BuildHashedSet(elements, static_cast<uint64_t>(elements.size()) * static_cast<uint64_t>(BASIC_FILTER_M))) {
+                const uint64_t delta = value - last_value;
+                encoded_deltas.push_back(delta);
+                GolombRiceEncode(bitwriter, BASIC_FILTER_P, delta);
+                last_value = value;
+            }
+        }
+        bitwriter.Flush();
+    }
+
+    std::vector<uint64_t> decoded_deltas;
+    {
+        VectorReader stream{SER_NETWORK, 0, golomb_rice_data, 0};
+        BitStreamReader<VectorReader> bitreader(stream);
+        const uint32_t n = static_cast<uint32_t>(ReadCompactSize(stream));
+        for (uint32_t i = 0; i < n; ++i) {
+            decoded_deltas.push_back(GolombRiceDecode(bitreader, BASIC_FILTER_P));
+        }
+    }
+
+    assert(encoded_deltas == decoded_deltas);
+
+    {
+        const std::vector<uint8_t> random_bytes = ConsumeRandomLengthByteVector(fuzzed_data_provider, 1024);
+        VectorReader stream{SER_NETWORK, 0, random_bytes, 0};
+        uint32_t n;
+        try {
+            n = static_cast<uint32_t>(ReadCompactSize(stream));
+        } catch (const std::ios_base::failure&) {
+            return;
+        }
+        BitStreamReader<VectorReader> bitreader(stream);
+        for (uint32_t i = 0; i < std::min<uint32_t>(n, 1024); ++i) {
+            try {
+                (void)GolombRiceDecode(bitreader, BASIC_FILTER_P);
+            } catch (const std::ios_base::failure&) {
+            }
+        }
+    }
+}
diff --git a/src/util/golombrice.h b/src/util/golombrice.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2018-2019 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_UTIL_GOLOMBRICE_H
+#define BITCOIN_UTIL_GOLOMBRICE_H
+
+#include <streams.h>
+
+#include <cstdint>
+
+template <typename OStream>
+void GolombRiceEncode(BitStreamWriter<OStream>& bitwriter, uint8_t P, uint64_t x)
+{
+    // Write quotient as unary-encoded: q 1's followed by one 0.
+    uint64_t q = x >> P;
+    while (q > 0) {
+        int nbits = q <= 64 ? static_cast<int>(q) : 64;
+        bitwriter.Write(~0ULL, nbits);
+        q -= nbits;
+    }
+    bitwriter.Write(0, 1);
+
+    // Write the remainder in P bits. Since the remainder is just the bottom
+    // P bits of x, there is no need to mask first.
+    bitwriter.Write(x, P);
+}
+
+template <typename IStream>
+uint64_t GolombRiceDecode(BitStreamReader<IStream>& bitreader, uint8_t P)
+{
+    // Read unary-encoded quotient: q 1's followed by one 0.
+    uint64_t q = 0;
+    while (bitreader.Read(1) == 1) {
+        ++q;
+    }
+
+    uint64_t r = bitreader.Read(P);
+
+    return (q << P) + r;
+}
+
+#endif // BITCOIN_UTIL_GOLOMBRICE_H