Merge bitcoin/bitcoin#21966: Remove double serialization; use software encoder for fee estimation

laanwj · laanwj · commit 707ba8692b00 · 2021-05-26T10:16:41.000+02:00
66545da Remove support for double serialization (Pieter Wuille) fff1cae Convert uses of double-serialization to {En,De}codeDouble (Pieter Wuille) afd964d Convert existing float encoding tests (Pieter Wuille) bda33f9 Add unit tests for serfloat module (Pieter Wuille) 2be4cd9 Add platform-independent float encoder/decoder (Pieter Wuille) e40224d Remove unused float serialization (MarcoFalke) Pull request description: Based on #21981. This adds a software-based platform-independent float/double encoder/decoder (platform independent in the sense that it only uses arithmetic and library calls, but never inspects the binary representation). This should strengthen our guarantee that encoded float/double values are portable across platforms. It then removes the functionality to serialize doubles from serialize.h, and replaces its only (non-test) use for fee estimation data serialization with the software encoder. At least on x86/ARM, the only difference should be how certain NaN values are encoded/decoded (but not *whether* they are NaN or not). It comes with tests that verify on is_iec559 platforms (which are the only ones we support, at least for now) that the serialized bytes exactly match the binary representation of floats in memory (for non-NaN). ACKs for top commit: laanwj: Code review re-ACK 66545da practicalswift: cr re-ACK 66545da Tree-SHA512: 62ad9adc26e28707b2eb12a919feefd4fd10cf9032652dbb1ca1cc97638ac21de89e240858e80d293d5112685c623e58affa3d316a9783ff0e6d291977a141f5
diff --git a/src/Makefile.am b/src/Makefile.am
@@ -253,6 +253,7 @@ BITCOIN_CORE_H = \
   util/moneystr.h \
   util/rbf.h \
   util/readwritefile.h \
+  util/serfloat.h \
   util/settings.h \
   util/sock.h \
   util/spanparsing.h \
@@ -594,6 +595,7 @@ libbitcoin_util_a_SOURCES = \
   util/settings.cpp \
   util/thread.cpp \
   util/threadnames.cpp \
+  util/serfloat.cpp \
   util/spanparsing.cpp \
   util/strencodings.cpp \
   util/string.cpp \
diff --git a/src/Makefile.test.include b/src/Makefile.test.include
@@ -121,6 +121,7 @@ BITCOIN_TESTS =\
   test/script_tests.cpp \
   test/script_standard_tests.cpp \
   test/scriptnum_tests.cpp \
+  test/serfloat_tests.cpp \
   test/serialize_tests.cpp \
   test/settings_tests.cpp \
   test/sighash_tests.cpp \
diff --git a/src/compat/assumptions.h b/src/compat/assumptions.h
@@ -36,11 +36,6 @@ static_assert(std::numeric_limits<double>::is_iec559, "IEEE 754 double assumed")
 // Example(s): Everywhere :-)
 static_assert(std::numeric_limits<unsigned char>::digits == 8, "8-bit byte assumed");
 
-// Assumption: We assume floating-point widths.
-// Example(s): Type punning in serialization code (ser_{float,double}_to_uint{32,64}).
-static_assert(sizeof(float) == 4, "32-bit float assumed");
-static_assert(sizeof(double) == 8, "64-bit double assumed");
-
 // Assumption: We assume integer widths.
 // Example(s): GetSizeOfCompactSize and WriteCompactSize in the serialization
 //             code.
diff --git a/src/policy/fees.cpp b/src/policy/fees.cpp
@@ -10,6 +10,7 @@
 #include <logging.h>
 #include <streams.h>
 #include <txmempool.h>
+#include <util/serfloat.h>
 #include <util/system.h>
 
 static const char* FEE_ESTIMATES_FILENAME = "fee_estimates.dat";
@@ -26,6 +27,25 @@ std::string StringForFeeEstimateHorizon(FeeEstimateHorizon horizon)
     assert(false);
 }
 
+namespace {
+
+struct EncodedDoubleFormatter
+{
+    template<typename Stream> void Ser(Stream &s, double v)
+    {
+        s << EncodeDouble(v);
+    }
+
+    template<typename Stream> void Unser(Stream& s, double& v)
+    {
+        uint64_t encoded;
+        s >> encoded;
+        v = DecodeDouble(encoded);
+    }
+};
+
+} // namespace
+
 /**
  * We will instantiate an instance of this class to track transactions that were
  * included in a block. We will lump transactions into a bucket according to their
@@ -356,12 +376,12 @@ double TxConfirmStats::EstimateMedianVal(int confTarget, double sufficientTxVal,
 
 void TxConfirmStats::Write(CAutoFile& fileout) const
 {
-    fileout << decay;
+    fileout << Using<EncodedDoubleFormatter>(decay);
     fileout << scale;
-    fileout << m_feerate_avg;
-    fileout << txCtAvg;
-    fileout << confAvg;
-    fileout << failAvg;
+    fileout << Using<VectorFormatter<EncodedDoubleFormatter>>(m_feerate_avg);
+    fileout << Using<VectorFormatter<EncodedDoubleFormatter>>(txCtAvg);
+    fileout << Using<VectorFormatter<VectorFormatter<EncodedDoubleFormatter>>>(confAvg);
+    fileout << Using<VectorFormatter<VectorFormatter<EncodedDoubleFormatter>>>(failAvg);
 }
 
 void TxConfirmStats::Read(CAutoFile& filein, int nFileVersion, size_t numBuckets)
@@ -372,7 +392,7 @@ void TxConfirmStats::Read(CAutoFile& filein, int nFileVersion, size_t numBuckets
     size_t maxConfirms, maxPeriods;
 
     // The current version will store the decay with each individual TxConfirmStats and also keep a scale factor
-    filein >> decay;
+    filein >> Using<EncodedDoubleFormatter>(decay);
     if (decay <= 0 || decay >= 1) {
         throw std::runtime_error("Corrupt estimates file. Decay must be between 0 and 1 (non-inclusive)");
     }
@@ -381,15 +401,15 @@ void TxConfirmStats::Read(CAutoFile& filein, int nFileVersion, size_t numBuckets
         throw std::runtime_error("Corrupt estimates file. Scale must be non-zero");
     }
 
-    filein >> m_feerate_avg;
+    filein >> Using<VectorFormatter<EncodedDoubleFormatter>>(m_feerate_avg);
     if (m_feerate_avg.size() != numBuckets) {
         throw std::runtime_error("Corrupt estimates file. Mismatch in feerate average bucket count");
     }
-    filein >> txCtAvg;
+    filein >> Using<VectorFormatter<EncodedDoubleFormatter>>(txCtAvg);
     if (txCtAvg.size() != numBuckets) {
         throw std::runtime_error("Corrupt estimates file. Mismatch in tx count bucket count");
     }
-    filein >> confAvg;
+    filein >> Using<VectorFormatter<VectorFormatter<EncodedDoubleFormatter>>>(confAvg);
     maxPeriods = confAvg.size();
     maxConfirms = scale * maxPeriods;
 
@@ -402,7 +422,7 @@ void TxConfirmStats::Read(CAutoFile& filein, int nFileVersion, size_t numBuckets
         }
     }
 
-    filein >> failAvg;
+    filein >> Using<VectorFormatter<VectorFormatter<EncodedDoubleFormatter>>>(failAvg);
     if (maxPeriods != failAvg.size()) {
         throw std::runtime_error("Corrupt estimates file. Mismatch in confirms tracked for failures");
     }
@@ -884,7 +904,7 @@ bool CBlockPolicyEstimator::Write(CAutoFile& fileout) const
         else {
             fileout << historicalFirst << historicalBest;
         }
-        fileout << buckets;
+        fileout << Using<VectorFormatter<EncodedDoubleFormatter>>(buckets);
         feeStats->Write(fileout);
         shortStats->Write(fileout);
         longStats->Write(fileout);
@@ -920,7 +940,7 @@ bool CBlockPolicyEstimator::Read(CAutoFile& filein)
                 throw std::runtime_error("Corrupt estimates file. Historical block range for estimates is invalid");
             }
             std::vector<double> fileBuckets;
-            filein >> fileBuckets;
+            filein >> Using<VectorFormatter<EncodedDoubleFormatter>>(fileBuckets);
             size_t numBuckets = fileBuckets.size();
             if (numBuckets <= 1 || numBuckets > 1000) {
                 throw std::runtime_error("Corrupt estimates file. Must have between 2 and 1000 feerate buckets");
diff --git a/src/serialize.h b/src/serialize.h
@@ -122,34 +122,6 @@ template<typename Stream> inline uint64_t ser_readdata64(Stream &s)
     s.read((char*)&obj, 8);
     return le64toh(obj);
 }
-inline uint64_t ser_double_to_uint64(double x)
-{
-    uint64_t tmp;
-    std::memcpy(&tmp, &x, sizeof(x));
-    static_assert(sizeof(tmp) == sizeof(x), "double and uint64_t assumed to have the same size");
-    return tmp;
-}
-inline uint32_t ser_float_to_uint32(float x)
-{
-    uint32_t tmp;
-    std::memcpy(&tmp, &x, sizeof(x));
-    static_assert(sizeof(tmp) == sizeof(x), "float and uint32_t assumed to have the same size");
-    return tmp;
-}
-inline double ser_uint64_to_double(uint64_t y)
-{
-    double tmp;
-    std::memcpy(&tmp, &y, sizeof(y));
-    static_assert(sizeof(tmp) == sizeof(y), "double and uint64_t assumed to have the same size");
-    return tmp;
-}
-inline float ser_uint32_to_float(uint32_t y)
-{
-    float tmp;
-    std::memcpy(&tmp, &y, sizeof(y));
-    static_assert(sizeof(tmp) == sizeof(y), "float and uint32_t assumed to have the same size");
-    return tmp;
-}
 
 
 /////////////////////////////////////////////////////////////////
@@ -234,8 +206,6 @@ template<typename Stream> inline void Serialize(Stream& s, int32_t a ) { ser_wri
 template<typename Stream> inline void Serialize(Stream& s, uint32_t a) { ser_writedata32(s, a); }
 template<typename Stream> inline void Serialize(Stream& s, int64_t a ) { ser_writedata64(s, a); }
 template<typename Stream> inline void Serialize(Stream& s, uint64_t a) { ser_writedata64(s, a); }
-template<typename Stream> inline void Serialize(Stream& s, float a   ) { ser_writedata32(s, ser_float_to_uint32(a)); }
-template<typename Stream> inline void Serialize(Stream& s, double a  ) { ser_writedata64(s, ser_double_to_uint64(a)); }
 template<typename Stream, int N> inline void Serialize(Stream& s, const char (&a)[N]) { s.write(a, N); }
 template<typename Stream, int N> inline void Serialize(Stream& s, const unsigned char (&a)[N]) { s.write(CharCast(a), N); }
 template<typename Stream> inline void Serialize(Stream& s, const Span<const unsigned char>& span) { s.write(CharCast(span.data()), span.size()); }
@@ -252,8 +222,6 @@ template<typename Stream> inline void Unserialize(Stream& s, int32_t& a ) { a =
 template<typename Stream> inline void Unserialize(Stream& s, uint32_t& a) { a = ser_readdata32(s); }
 template<typename Stream> inline void Unserialize(Stream& s, int64_t& a ) { a = ser_readdata64(s); }
 template<typename Stream> inline void Unserialize(Stream& s, uint64_t& a) { a = ser_readdata64(s); }
-template<typename Stream> inline void Unserialize(Stream& s, float& a   ) { a = ser_uint32_to_float(ser_readdata32(s)); }
-template<typename Stream> inline void Unserialize(Stream& s, double& a  ) { a = ser_uint64_to_double(ser_readdata64(s)); }
 template<typename Stream, int N> inline void Unserialize(Stream& s, char (&a)[N]) { s.read(a, N); }
 template<typename Stream, int N> inline void Unserialize(Stream& s, unsigned char (&a)[N]) { s.read(CharCast(a), N); }
 template<typename Stream> inline void Unserialize(Stream& s, Span<unsigned char>& span) { s.read(CharCast(span.data()), span.size()); }
diff --git a/src/test/fuzz/float.cpp b/src/test/fuzz/float.cpp
@@ -3,14 +3,14 @@
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
 #include <memusage.h>
-#include <serialize.h>
-#include <streams.h>
 #include <test/fuzz/FuzzedDataProvider.h>
 #include <test/fuzz/fuzz.h>
+#include <util/serfloat.h>
 #include <version.h>
 
 #include <cassert>
-#include <cstdint>
+#include <cmath>
+#include <limits>
 
 FUZZ_TARGET(float)
 {
@@ -19,24 +19,17 @@ FUZZ_TARGET(float)
     {
         const double d = fuzzed_data_provider.ConsumeFloatingPoint<double>();
         (void)memusage::DynamicUsage(d);
-        assert(ser_uint64_to_double(ser_double_to_uint64(d)) == d);
 
-        CDataStream stream(SER_NETWORK, INIT_PROTO_VERSION);
-        stream << d;
-        double d_deserialized;
-        stream >> d_deserialized;
-        assert(d == d_deserialized);
-    }
-
-    {
-        const float f = fuzzed_data_provider.ConsumeFloatingPoint<float>();
-        (void)memusage::DynamicUsage(f);
-        assert(ser_uint32_to_float(ser_float_to_uint32(f)) == f);
-
-        CDataStream stream(SER_NETWORK, INIT_PROTO_VERSION);
-        stream << f;
-        float f_deserialized;
-        stream >> f_deserialized;
-        assert(f == f_deserialized);
+        uint64_t encoded = EncodeDouble(d);
+        if constexpr (std::numeric_limits<double>::is_iec559) {
+            if (!std::isnan(d)) {
+                uint64_t encoded_in_memory;
+                std::copy((const unsigned char*)&d, (const unsigned char*)(&d + 1), (unsigned char*)&encoded_in_memory);
+                assert(encoded_in_memory == encoded);
+            }
+        }
+        double d_deserialized = DecodeDouble(encoded);
+        assert(std::isnan(d) == std::isnan(d_deserialized));
+        assert(std::isnan(d) || d == d_deserialized);
     }
 }
diff --git a/src/test/fuzz/util.h b/src/test/fuzz/util.h
@@ -513,8 +513,6 @@ void WriteToStream(FuzzedDataProvider& fuzzed_data_provider, Stream& stream) noe
                 WRITE_TO_STREAM_CASE(uint32_t, fuzzed_data_provider.ConsumeIntegral<uint32_t>()),
                 WRITE_TO_STREAM_CASE(int64_t, fuzzed_data_provider.ConsumeIntegral<int64_t>()),
                 WRITE_TO_STREAM_CASE(uint64_t, fuzzed_data_provider.ConsumeIntegral<uint64_t>()),
-                WRITE_TO_STREAM_CASE(float, fuzzed_data_provider.ConsumeFloatingPoint<float>()),
-                WRITE_TO_STREAM_CASE(double, fuzzed_data_provider.ConsumeFloatingPoint<double>()),
                 WRITE_TO_STREAM_CASE(std::string, fuzzed_data_provider.ConsumeRandomLengthString(32)),
                 WRITE_TO_STREAM_CASE(std::vector<char>, ConsumeRandomLengthIntegralVector<char>(fuzzed_data_provider)));
         } catch (const std::ios_base::failure&) {
@@ -545,8 +543,6 @@ void ReadFromStream(FuzzedDataProvider& fuzzed_data_provider, Stream& stream) no
                 READ_FROM_STREAM_CASE(uint32_t),
                 READ_FROM_STREAM_CASE(int64_t),
                 READ_FROM_STREAM_CASE(uint64_t),
-                READ_FROM_STREAM_CASE(float),
-                READ_FROM_STREAM_CASE(double),
                 READ_FROM_STREAM_CASE(std::string),
                 READ_FROM_STREAM_CASE(std::vector<char>));
         } catch (const std::ios_base::failure&) {
diff --git a/src/test/serfloat_tests.cpp b/src/test/serfloat_tests.cpp
@@ -0,0 +1,129 @@
+// Copyright (c) 2014-2020 The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include <hash.h>
+#include <test/util/setup_common.h>
+#include <util/serfloat.h>
+#include <serialize.h>
+#include <streams.h>
+
+#include <boost/test/unit_test.hpp>
+
+#include <cmath>
+#include <limits>
+
+BOOST_FIXTURE_TEST_SUITE(serfloat_tests, BasicTestingSetup)
+
+namespace {
+
+uint64_t TestDouble(double f) {
+    uint64_t i = EncodeDouble(f);
+    double f2 = DecodeDouble(i);
+    if (std::isnan(f)) {
+        // NaN is not guaranteed to round-trip exactly.
+        BOOST_CHECK(std::isnan(f2));
+    } else {
+        // Everything else is.
+        BOOST_CHECK(!std::isnan(f2));
+        uint64_t i2 = EncodeDouble(f2);
+        BOOST_CHECK_EQUAL(f, f2);
+        BOOST_CHECK_EQUAL(i, i2);
+    }
+    return i;
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(double_serfloat_tests) {
+    BOOST_CHECK_EQUAL(TestDouble(0.0), 0);
+    BOOST_CHECK_EQUAL(TestDouble(-0.0), 0x8000000000000000);
+    BOOST_CHECK_EQUAL(TestDouble(std::numeric_limits<double>::infinity()), 0x7ff0000000000000);
+    BOOST_CHECK_EQUAL(TestDouble(-std::numeric_limits<double>::infinity()), 0xfff0000000000000);
+    BOOST_CHECK_EQUAL(TestDouble(0.5), 0x3fe0000000000000ULL);
+    BOOST_CHECK_EQUAL(TestDouble(1.0), 0x3ff0000000000000ULL);
+    BOOST_CHECK_EQUAL(TestDouble(2.0), 0x4000000000000000ULL);
+    BOOST_CHECK_EQUAL(TestDouble(4.0), 0x4010000000000000ULL);
+    BOOST_CHECK_EQUAL(TestDouble(785.066650390625), 0x4088888880000000ULL);
+
+    // Roundtrip test on IEC559-compatible systems
+    if (std::numeric_limits<double>::is_iec559) {
+        BOOST_CHECK_EQUAL(sizeof(double), 8);
+        BOOST_CHECK_EQUAL(sizeof(uint64_t), 8);
+        // Test extreme values
+        TestDouble(std::numeric_limits<double>::min());
+        TestDouble(-std::numeric_limits<double>::min());
+        TestDouble(std::numeric_limits<double>::max());
+        TestDouble(-std::numeric_limits<double>::max());
+        TestDouble(std::numeric_limits<double>::lowest());
+        TestDouble(-std::numeric_limits<double>::lowest());
+        TestDouble(std::numeric_limits<double>::quiet_NaN());
+        TestDouble(-std::numeric_limits<double>::quiet_NaN());
+        TestDouble(std::numeric_limits<double>::signaling_NaN());
+        TestDouble(-std::numeric_limits<double>::signaling_NaN());
+        TestDouble(std::numeric_limits<double>::denorm_min());
+        TestDouble(-std::numeric_limits<double>::denorm_min());
+        // Test exact encoding: on currently supported platforms, EncodeDouble
+        // should produce exactly the same as the in-memory representation for non-NaN.
+        for (int j = 0; j < 1000; ++j) {
+            // Iterate over 9 specific bits exhaustively; the others are chosen randomly.
+            // These specific bits are the sign bit, and the 2 top and bottom bits of
+            // exponent and mantissa in the IEEE754 binary64 format.
+            for (int x = 0; x < 512; ++x) {
+                uint64_t v = InsecureRandBits(64);
+                v &= ~(uint64_t{1} << 0);
+                if (x & 1) v |= (uint64_t{1} << 0);
+                v &= ~(uint64_t{1} << 1);
+                if (x & 2) v |= (uint64_t{1} << 1);
+                v &= ~(uint64_t{1} << 50);
+                if (x & 4) v |= (uint64_t{1} << 50);
+                v &= ~(uint64_t{1} << 51);
+                if (x & 8) v |= (uint64_t{1} << 51);
+                v &= ~(uint64_t{1} << 52);
+                if (x & 16) v |= (uint64_t{1} << 52);
+                v &= ~(uint64_t{1} << 53);
+                if (x & 32) v |= (uint64_t{1} << 53);
+                v &= ~(uint64_t{1} << 61);
+                if (x & 64) v |= (uint64_t{1} << 61);
+                v &= ~(uint64_t{1} << 62);
+                if (x & 128) v |= (uint64_t{1} << 62);
+                v &= ~(uint64_t{1} << 63);
+                if (x & 256) v |= (uint64_t{1} << 63);
+                double f;
+                memcpy(&f, &v, 8);
+                uint64_t v2 = TestDouble(f);
+                if (!std::isnan(f)) BOOST_CHECK_EQUAL(v, v2);
+            }
+        }
+    }
+}
+
+/*
+Python code to generate the below hashes:
+
+    def reversed_hex(x):
+        return binascii.hexlify(''.join(reversed(x)))
+    def dsha256(x):
+        return hashlib.sha256(hashlib.sha256(x).digest()).digest()
+
+    reversed_hex(dsha256(''.join(struct.pack('<d', x) for x in range(0,1000)))) == '43d0c82591953c4eafe114590d392676a01585d25b25d433557f0d7878b23f96'
+*/
+BOOST_AUTO_TEST_CASE(doubles)
+{
+    CDataStream ss(SER_DISK, 0);
+    // encode
+    for (int i = 0; i < 1000; i++) {
+        ss << EncodeDouble(i);
+    }
+    BOOST_CHECK(Hash(ss) == uint256S("43d0c82591953c4eafe114590d392676a01585d25b25d433557f0d7878b23f96"));
+
+    // decode
+    for (int i = 0; i < 1000; i++) {
+        uint64_t val;
+        ss >> val;
+        double j = DecodeDouble(val);
+        BOOST_CHECK_MESSAGE(i == j, "decoded:" << j << " expected:" << i);
+    }
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/test/serialize_tests.cpp b/src/test/serialize_tests.cpp
diff --git a/src/util/serfloat.cpp b/src/util/serfloat.cpp
diff --git a/src/util/serfloat.h b/src/util/serfloat.h