Skip to content

Commit 707ba86

Browse files
committed
Merge bitcoin/bitcoin#21966: Remove double serialization; use software encoder for fee estimation
66545da Remove support for double serialization (Pieter Wuille) fff1cae Convert uses of double-serialization to {En,De}codeDouble (Pieter Wuille) afd964d Convert existing float encoding tests (Pieter Wuille) bda33f9 Add unit tests for serfloat module (Pieter Wuille) 2be4cd9 Add platform-independent float encoder/decoder (Pieter Wuille) e40224d Remove unused float serialization (MarcoFalke) Pull request description: Based on #21981. This adds a software-based platform-independent float/double encoder/decoder (platform independent in the sense that it only uses arithmetic and library calls, but never inspects the binary representation). This should strengthen our guarantee that encoded float/double values are portable across platforms. It then removes the functionality to serialize doubles from serialize.h, and replaces its only (non-test) use for fee estimation data serialization with the software encoder. At least on x86/ARM, the only difference should be how certain NaN values are encoded/decoded (but not *whether* they are NaN or not). It comes with tests that verify on is_iec559 platforms (which are the only ones we support, at least for now) that the serialized bytes exactly match the binary representation of floats in memory (for non-NaN). ACKs for top commit: laanwj: Code review re-ACK 66545da practicalswift: cr re-ACK 66545da Tree-SHA512: 62ad9adc26e28707b2eb12a919feefd4fd10cf9032652dbb1ca1cc97638ac21de89e240858e80d293d5112685c623e58affa3d316a9783ff0e6d291977a141f5
2 parents 7aa41fc + 66545da commit 707ba86

File tree

11 files changed

+258
-160
lines changed

11 files changed

+258
-160
lines changed

src/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ BITCOIN_CORE_H = \
253253
util/moneystr.h \
254254
util/rbf.h \
255255
util/readwritefile.h \
256+
util/serfloat.h \
256257
util/settings.h \
257258
util/sock.h \
258259
util/spanparsing.h \
@@ -594,6 +595,7 @@ libbitcoin_util_a_SOURCES = \
594595
util/settings.cpp \
595596
util/thread.cpp \
596597
util/threadnames.cpp \
598+
util/serfloat.cpp \
597599
util/spanparsing.cpp \
598600
util/strencodings.cpp \
599601
util/string.cpp \

src/Makefile.test.include

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ BITCOIN_TESTS =\
121121
test/script_tests.cpp \
122122
test/script_standard_tests.cpp \
123123
test/scriptnum_tests.cpp \
124+
test/serfloat_tests.cpp \
124125
test/serialize_tests.cpp \
125126
test/settings_tests.cpp \
126127
test/sighash_tests.cpp \

src/compat/assumptions.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,6 @@ static_assert(std::numeric_limits<double>::is_iec559, "IEEE 754 double assumed")
3636
// Example(s): Everywhere :-)
3737
static_assert(std::numeric_limits<unsigned char>::digits == 8, "8-bit byte assumed");
3838

39-
// Assumption: We assume floating-point widths.
40-
// Example(s): Type punning in serialization code (ser_{float,double}_to_uint{32,64}).
41-
static_assert(sizeof(float) == 4, "32-bit float assumed");
42-
static_assert(sizeof(double) == 8, "64-bit double assumed");
43-
4439
// Assumption: We assume integer widths.
4540
// Example(s): GetSizeOfCompactSize and WriteCompactSize in the serialization
4641
// code.

src/policy/fees.cpp

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <logging.h>
1111
#include <streams.h>
1212
#include <txmempool.h>
13+
#include <util/serfloat.h>
1314
#include <util/system.h>
1415

1516
static const char* FEE_ESTIMATES_FILENAME = "fee_estimates.dat";
@@ -26,6 +27,25 @@ std::string StringForFeeEstimateHorizon(FeeEstimateHorizon horizon)
2627
assert(false);
2728
}
2829

30+
namespace {
31+
32+
struct EncodedDoubleFormatter
33+
{
34+
template<typename Stream> void Ser(Stream &s, double v)
35+
{
36+
s << EncodeDouble(v);
37+
}
38+
39+
template<typename Stream> void Unser(Stream& s, double& v)
40+
{
41+
uint64_t encoded;
42+
s >> encoded;
43+
v = DecodeDouble(encoded);
44+
}
45+
};
46+
47+
} // namespace
48+
2949
/**
3050
* We will instantiate an instance of this class to track transactions that were
3151
* included in a block. We will lump transactions into a bucket according to their
@@ -356,12 +376,12 @@ double TxConfirmStats::EstimateMedianVal(int confTarget, double sufficientTxVal,
356376

357377
void TxConfirmStats::Write(CAutoFile& fileout) const
358378
{
359-
fileout << decay;
379+
fileout << Using<EncodedDoubleFormatter>(decay);
360380
fileout << scale;
361-
fileout << m_feerate_avg;
362-
fileout << txCtAvg;
363-
fileout << confAvg;
364-
fileout << failAvg;
381+
fileout << Using<VectorFormatter<EncodedDoubleFormatter>>(m_feerate_avg);
382+
fileout << Using<VectorFormatter<EncodedDoubleFormatter>>(txCtAvg);
383+
fileout << Using<VectorFormatter<VectorFormatter<EncodedDoubleFormatter>>>(confAvg);
384+
fileout << Using<VectorFormatter<VectorFormatter<EncodedDoubleFormatter>>>(failAvg);
365385
}
366386

367387
void TxConfirmStats::Read(CAutoFile& filein, int nFileVersion, size_t numBuckets)
@@ -372,7 +392,7 @@ void TxConfirmStats::Read(CAutoFile& filein, int nFileVersion, size_t numBuckets
372392
size_t maxConfirms, maxPeriods;
373393

374394
// The current version will store the decay with each individual TxConfirmStats and also keep a scale factor
375-
filein >> decay;
395+
filein >> Using<EncodedDoubleFormatter>(decay);
376396
if (decay <= 0 || decay >= 1) {
377397
throw std::runtime_error("Corrupt estimates file. Decay must be between 0 and 1 (non-inclusive)");
378398
}
@@ -381,15 +401,15 @@ void TxConfirmStats::Read(CAutoFile& filein, int nFileVersion, size_t numBuckets
381401
throw std::runtime_error("Corrupt estimates file. Scale must be non-zero");
382402
}
383403

384-
filein >> m_feerate_avg;
404+
filein >> Using<VectorFormatter<EncodedDoubleFormatter>>(m_feerate_avg);
385405
if (m_feerate_avg.size() != numBuckets) {
386406
throw std::runtime_error("Corrupt estimates file. Mismatch in feerate average bucket count");
387407
}
388-
filein >> txCtAvg;
408+
filein >> Using<VectorFormatter<EncodedDoubleFormatter>>(txCtAvg);
389409
if (txCtAvg.size() != numBuckets) {
390410
throw std::runtime_error("Corrupt estimates file. Mismatch in tx count bucket count");
391411
}
392-
filein >> confAvg;
412+
filein >> Using<VectorFormatter<VectorFormatter<EncodedDoubleFormatter>>>(confAvg);
393413
maxPeriods = confAvg.size();
394414
maxConfirms = scale * maxPeriods;
395415

@@ -402,7 +422,7 @@ void TxConfirmStats::Read(CAutoFile& filein, int nFileVersion, size_t numBuckets
402422
}
403423
}
404424

405-
filein >> failAvg;
425+
filein >> Using<VectorFormatter<VectorFormatter<EncodedDoubleFormatter>>>(failAvg);
406426
if (maxPeriods != failAvg.size()) {
407427
throw std::runtime_error("Corrupt estimates file. Mismatch in confirms tracked for failures");
408428
}
@@ -884,7 +904,7 @@ bool CBlockPolicyEstimator::Write(CAutoFile& fileout) const
884904
else {
885905
fileout << historicalFirst << historicalBest;
886906
}
887-
fileout << buckets;
907+
fileout << Using<VectorFormatter<EncodedDoubleFormatter>>(buckets);
888908
feeStats->Write(fileout);
889909
shortStats->Write(fileout);
890910
longStats->Write(fileout);
@@ -920,7 +940,7 @@ bool CBlockPolicyEstimator::Read(CAutoFile& filein)
920940
throw std::runtime_error("Corrupt estimates file. Historical block range for estimates is invalid");
921941
}
922942
std::vector<double> fileBuckets;
923-
filein >> fileBuckets;
943+
filein >> Using<VectorFormatter<EncodedDoubleFormatter>>(fileBuckets);
924944
size_t numBuckets = fileBuckets.size();
925945
if (numBuckets <= 1 || numBuckets > 1000) {
926946
throw std::runtime_error("Corrupt estimates file. Must have between 2 and 1000 feerate buckets");

src/serialize.h

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -122,34 +122,6 @@ template<typename Stream> inline uint64_t ser_readdata64(Stream &s)
122122
s.read((char*)&obj, 8);
123123
return le64toh(obj);
124124
}
125-
inline uint64_t ser_double_to_uint64(double x)
126-
{
127-
uint64_t tmp;
128-
std::memcpy(&tmp, &x, sizeof(x));
129-
static_assert(sizeof(tmp) == sizeof(x), "double and uint64_t assumed to have the same size");
130-
return tmp;
131-
}
132-
inline uint32_t ser_float_to_uint32(float x)
133-
{
134-
uint32_t tmp;
135-
std::memcpy(&tmp, &x, sizeof(x));
136-
static_assert(sizeof(tmp) == sizeof(x), "float and uint32_t assumed to have the same size");
137-
return tmp;
138-
}
139-
inline double ser_uint64_to_double(uint64_t y)
140-
{
141-
double tmp;
142-
std::memcpy(&tmp, &y, sizeof(y));
143-
static_assert(sizeof(tmp) == sizeof(y), "double and uint64_t assumed to have the same size");
144-
return tmp;
145-
}
146-
inline float ser_uint32_to_float(uint32_t y)
147-
{
148-
float tmp;
149-
std::memcpy(&tmp, &y, sizeof(y));
150-
static_assert(sizeof(tmp) == sizeof(y), "float and uint32_t assumed to have the same size");
151-
return tmp;
152-
}
153125

154126

155127
/////////////////////////////////////////////////////////////////
@@ -234,8 +206,6 @@ template<typename Stream> inline void Serialize(Stream& s, int32_t a ) { ser_wri
234206
template<typename Stream> inline void Serialize(Stream& s, uint32_t a) { ser_writedata32(s, a); }
235207
template<typename Stream> inline void Serialize(Stream& s, int64_t a ) { ser_writedata64(s, a); }
236208
template<typename Stream> inline void Serialize(Stream& s, uint64_t a) { ser_writedata64(s, a); }
237-
template<typename Stream> inline void Serialize(Stream& s, float a ) { ser_writedata32(s, ser_float_to_uint32(a)); }
238-
template<typename Stream> inline void Serialize(Stream& s, double a ) { ser_writedata64(s, ser_double_to_uint64(a)); }
239209
template<typename Stream, int N> inline void Serialize(Stream& s, const char (&a)[N]) { s.write(a, N); }
240210
template<typename Stream, int N> inline void Serialize(Stream& s, const unsigned char (&a)[N]) { s.write(CharCast(a), N); }
241211
template<typename Stream> inline void Serialize(Stream& s, const Span<const unsigned char>& span) { s.write(CharCast(span.data()), span.size()); }
@@ -252,8 +222,6 @@ template<typename Stream> inline void Unserialize(Stream& s, int32_t& a ) { a =
252222
template<typename Stream> inline void Unserialize(Stream& s, uint32_t& a) { a = ser_readdata32(s); }
253223
template<typename Stream> inline void Unserialize(Stream& s, int64_t& a ) { a = ser_readdata64(s); }
254224
template<typename Stream> inline void Unserialize(Stream& s, uint64_t& a) { a = ser_readdata64(s); }
255-
template<typename Stream> inline void Unserialize(Stream& s, float& a ) { a = ser_uint32_to_float(ser_readdata32(s)); }
256-
template<typename Stream> inline void Unserialize(Stream& s, double& a ) { a = ser_uint64_to_double(ser_readdata64(s)); }
257225
template<typename Stream, int N> inline void Unserialize(Stream& s, char (&a)[N]) { s.read(a, N); }
258226
template<typename Stream, int N> inline void Unserialize(Stream& s, unsigned char (&a)[N]) { s.read(CharCast(a), N); }
259227
template<typename Stream> inline void Unserialize(Stream& s, Span<unsigned char>& span) { s.read(CharCast(span.data()), span.size()); }

src/test/fuzz/float.cpp

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
44

55
#include <memusage.h>
6-
#include <serialize.h>
7-
#include <streams.h>
86
#include <test/fuzz/FuzzedDataProvider.h>
97
#include <test/fuzz/fuzz.h>
8+
#include <util/serfloat.h>
109
#include <version.h>
1110

1211
#include <cassert>
13-
#include <cstdint>
12+
#include <cmath>
13+
#include <limits>
1414

1515
FUZZ_TARGET(float)
1616
{
@@ -19,24 +19,17 @@ FUZZ_TARGET(float)
1919
{
2020
const double d = fuzzed_data_provider.ConsumeFloatingPoint<double>();
2121
(void)memusage::DynamicUsage(d);
22-
assert(ser_uint64_to_double(ser_double_to_uint64(d)) == d);
2322

24-
CDataStream stream(SER_NETWORK, INIT_PROTO_VERSION);
25-
stream << d;
26-
double d_deserialized;
27-
stream >> d_deserialized;
28-
assert(d == d_deserialized);
29-
}
30-
31-
{
32-
const float f = fuzzed_data_provider.ConsumeFloatingPoint<float>();
33-
(void)memusage::DynamicUsage(f);
34-
assert(ser_uint32_to_float(ser_float_to_uint32(f)) == f);
35-
36-
CDataStream stream(SER_NETWORK, INIT_PROTO_VERSION);
37-
stream << f;
38-
float f_deserialized;
39-
stream >> f_deserialized;
40-
assert(f == f_deserialized);
23+
uint64_t encoded = EncodeDouble(d);
24+
if constexpr (std::numeric_limits<double>::is_iec559) {
25+
if (!std::isnan(d)) {
26+
uint64_t encoded_in_memory;
27+
std::copy((const unsigned char*)&d, (const unsigned char*)(&d + 1), (unsigned char*)&encoded_in_memory);
28+
assert(encoded_in_memory == encoded);
29+
}
30+
}
31+
double d_deserialized = DecodeDouble(encoded);
32+
assert(std::isnan(d) == std::isnan(d_deserialized));
33+
assert(std::isnan(d) || d == d_deserialized);
4134
}
4235
}

src/test/fuzz/util.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -513,8 +513,6 @@ void WriteToStream(FuzzedDataProvider& fuzzed_data_provider, Stream& stream) noe
513513
WRITE_TO_STREAM_CASE(uint32_t, fuzzed_data_provider.ConsumeIntegral<uint32_t>()),
514514
WRITE_TO_STREAM_CASE(int64_t, fuzzed_data_provider.ConsumeIntegral<int64_t>()),
515515
WRITE_TO_STREAM_CASE(uint64_t, fuzzed_data_provider.ConsumeIntegral<uint64_t>()),
516-
WRITE_TO_STREAM_CASE(float, fuzzed_data_provider.ConsumeFloatingPoint<float>()),
517-
WRITE_TO_STREAM_CASE(double, fuzzed_data_provider.ConsumeFloatingPoint<double>()),
518516
WRITE_TO_STREAM_CASE(std::string, fuzzed_data_provider.ConsumeRandomLengthString(32)),
519517
WRITE_TO_STREAM_CASE(std::vector<char>, ConsumeRandomLengthIntegralVector<char>(fuzzed_data_provider)));
520518
} catch (const std::ios_base::failure&) {
@@ -545,8 +543,6 @@ void ReadFromStream(FuzzedDataProvider& fuzzed_data_provider, Stream& stream) no
545543
READ_FROM_STREAM_CASE(uint32_t),
546544
READ_FROM_STREAM_CASE(int64_t),
547545
READ_FROM_STREAM_CASE(uint64_t),
548-
READ_FROM_STREAM_CASE(float),
549-
READ_FROM_STREAM_CASE(double),
550546
READ_FROM_STREAM_CASE(std::string),
551547
READ_FROM_STREAM_CASE(std::vector<char>));
552548
} catch (const std::ios_base::failure&) {

src/test/serfloat_tests.cpp

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
// Copyright (c) 2014-2020 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include <hash.h>
6+
#include <test/util/setup_common.h>
7+
#include <util/serfloat.h>
8+
#include <serialize.h>
9+
#include <streams.h>
10+
11+
#include <boost/test/unit_test.hpp>
12+
13+
#include <cmath>
14+
#include <limits>
15+
16+
BOOST_FIXTURE_TEST_SUITE(serfloat_tests, BasicTestingSetup)
17+
18+
namespace {
19+
20+
uint64_t TestDouble(double f) {
21+
uint64_t i = EncodeDouble(f);
22+
double f2 = DecodeDouble(i);
23+
if (std::isnan(f)) {
24+
// NaN is not guaranteed to round-trip exactly.
25+
BOOST_CHECK(std::isnan(f2));
26+
} else {
27+
// Everything else is.
28+
BOOST_CHECK(!std::isnan(f2));
29+
uint64_t i2 = EncodeDouble(f2);
30+
BOOST_CHECK_EQUAL(f, f2);
31+
BOOST_CHECK_EQUAL(i, i2);
32+
}
33+
return i;
34+
}
35+
36+
} // namespace
37+
38+
BOOST_AUTO_TEST_CASE(double_serfloat_tests) {
39+
BOOST_CHECK_EQUAL(TestDouble(0.0), 0);
40+
BOOST_CHECK_EQUAL(TestDouble(-0.0), 0x8000000000000000);
41+
BOOST_CHECK_EQUAL(TestDouble(std::numeric_limits<double>::infinity()), 0x7ff0000000000000);
42+
BOOST_CHECK_EQUAL(TestDouble(-std::numeric_limits<double>::infinity()), 0xfff0000000000000);
43+
BOOST_CHECK_EQUAL(TestDouble(0.5), 0x3fe0000000000000ULL);
44+
BOOST_CHECK_EQUAL(TestDouble(1.0), 0x3ff0000000000000ULL);
45+
BOOST_CHECK_EQUAL(TestDouble(2.0), 0x4000000000000000ULL);
46+
BOOST_CHECK_EQUAL(TestDouble(4.0), 0x4010000000000000ULL);
47+
BOOST_CHECK_EQUAL(TestDouble(785.066650390625), 0x4088888880000000ULL);
48+
49+
// Roundtrip test on IEC559-compatible systems
50+
if (std::numeric_limits<double>::is_iec559) {
51+
BOOST_CHECK_EQUAL(sizeof(double), 8);
52+
BOOST_CHECK_EQUAL(sizeof(uint64_t), 8);
53+
// Test extreme values
54+
TestDouble(std::numeric_limits<double>::min());
55+
TestDouble(-std::numeric_limits<double>::min());
56+
TestDouble(std::numeric_limits<double>::max());
57+
TestDouble(-std::numeric_limits<double>::max());
58+
TestDouble(std::numeric_limits<double>::lowest());
59+
TestDouble(-std::numeric_limits<double>::lowest());
60+
TestDouble(std::numeric_limits<double>::quiet_NaN());
61+
TestDouble(-std::numeric_limits<double>::quiet_NaN());
62+
TestDouble(std::numeric_limits<double>::signaling_NaN());
63+
TestDouble(-std::numeric_limits<double>::signaling_NaN());
64+
TestDouble(std::numeric_limits<double>::denorm_min());
65+
TestDouble(-std::numeric_limits<double>::denorm_min());
66+
// Test exact encoding: on currently supported platforms, EncodeDouble
67+
// should produce exactly the same as the in-memory representation for non-NaN.
68+
for (int j = 0; j < 1000; ++j) {
69+
// Iterate over 9 specific bits exhaustively; the others are chosen randomly.
70+
// These specific bits are the sign bit, and the 2 top and bottom bits of
71+
// exponent and mantissa in the IEEE754 binary64 format.
72+
for (int x = 0; x < 512; ++x) {
73+
uint64_t v = InsecureRandBits(64);
74+
v &= ~(uint64_t{1} << 0);
75+
if (x & 1) v |= (uint64_t{1} << 0);
76+
v &= ~(uint64_t{1} << 1);
77+
if (x & 2) v |= (uint64_t{1} << 1);
78+
v &= ~(uint64_t{1} << 50);
79+
if (x & 4) v |= (uint64_t{1} << 50);
80+
v &= ~(uint64_t{1} << 51);
81+
if (x & 8) v |= (uint64_t{1} << 51);
82+
v &= ~(uint64_t{1} << 52);
83+
if (x & 16) v |= (uint64_t{1} << 52);
84+
v &= ~(uint64_t{1} << 53);
85+
if (x & 32) v |= (uint64_t{1} << 53);
86+
v &= ~(uint64_t{1} << 61);
87+
if (x & 64) v |= (uint64_t{1} << 61);
88+
v &= ~(uint64_t{1} << 62);
89+
if (x & 128) v |= (uint64_t{1} << 62);
90+
v &= ~(uint64_t{1} << 63);
91+
if (x & 256) v |= (uint64_t{1} << 63);
92+
double f;
93+
memcpy(&f, &v, 8);
94+
uint64_t v2 = TestDouble(f);
95+
if (!std::isnan(f)) BOOST_CHECK_EQUAL(v, v2);
96+
}
97+
}
98+
}
99+
}
100+
101+
/*
102+
Python code to generate the below hashes:
103+
104+
def reversed_hex(x):
105+
return binascii.hexlify(''.join(reversed(x)))
106+
def dsha256(x):
107+
return hashlib.sha256(hashlib.sha256(x).digest()).digest()
108+
109+
reversed_hex(dsha256(''.join(struct.pack('<d', x) for x in range(0,1000)))) == '43d0c82591953c4eafe114590d392676a01585d25b25d433557f0d7878b23f96'
110+
*/
111+
BOOST_AUTO_TEST_CASE(doubles)
112+
{
113+
CDataStream ss(SER_DISK, 0);
114+
// encode
115+
for (int i = 0; i < 1000; i++) {
116+
ss << EncodeDouble(i);
117+
}
118+
BOOST_CHECK(Hash(ss) == uint256S("43d0c82591953c4eafe114590d392676a01585d25b25d433557f0d7878b23f96"));
119+
120+
// decode
121+
for (int i = 0; i < 1000; i++) {
122+
uint64_t val;
123+
ss >> val;
124+
double j = DecodeDouble(val);
125+
BOOST_CHECK_MESSAGE(i == j, "decoded:" << j << " expected:" << i);
126+
}
127+
}
128+
129+
BOOST_AUTO_TEST_SUITE_END()

0 commit comments

Comments
 (0)