Skip to content

Commit 7b45c5e

Browse files
committed
Merge bitcoin/bitcoin#20516: Well-defined CAddress disk serialization, and addrv2 anchors.dat
f8866e8 Add roundtrip fuzz tests for CAddress serialization (Pieter Wuille) e2f0548 Use addrv2 serialization in anchors.dat (Pieter Wuille) 8cd8f37 Introduce well-defined CAddress disk serialization (Pieter Wuille) Pull request description: Alternative to #20509. This makes the `CAddress` disk serialization format well defined, and uses it to enable addrv2 support in anchors.dat (in a way that's compatible with older software). The new format is: - The first 4 bytes store a format version number. Its low 19 bits are ignored (as those historically stored the `CLIENT_VERSION`), but its high 13 bits specify the actual serialization: - 0x00000000: LE64 encoding for `nServices`, V1 encoding for `CService` (like pre-BIP155 network serialization). - 0x20000000: CompactSize encoding for `nServices`, V2 encoding for `CService` (like BIP155 network serialization). - Any other value triggers an unsupported format error on deserialization, and can be used for future format changes. - The `ADDRV2_FORMAT` flag in the stream's version does not determine the actual serialization format; it only sets whether or not V2 encoding is permitted. ACKs for top commit: achow101: ACK f8866e8 laanwj: Code review ACK f8866e8 vasild: ACK f8866e8 jonatack: ACK f8866e8 tested rebased to master and built/run/restarted with DEBUG_ADDRMAN, peers.dat and anchors ser/deser seems fine hebasto: ACK f8866e8, tested on Linux Mint 20.1 (x86_64). Tree-SHA512: 3898f8a8c51783a46dd0aae03fa10060521f5dd6e79315fe95ba807689e78f202388ffa28c40bf156c6f7b1fc2ce806b155dcbe56027df73d039a55331723796
2 parents 922abe8 + f8866e8 commit 7b45c5e

File tree

3 files changed

+119
-30
lines changed

3 files changed

+119
-30
lines changed

src/addrdb.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ bool SerializeDB(Stream& stream, const Data& data)
2323
{
2424
// Write and commit header, data
2525
try {
26-
CHashWriter hasher(SER_DISK, CLIENT_VERSION);
26+
CHashWriter hasher(stream.GetType(), stream.GetVersion());
2727
stream << Params().MessageStart() << data;
2828
hasher << Params().MessageStart() << data;
2929
stream << hasher.GetHash();
@@ -35,7 +35,7 @@ bool SerializeDB(Stream& stream, const Data& data)
3535
}
3636

3737
template <typename Data>
38-
bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data)
38+
bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data, int version)
3939
{
4040
// Generate random temporary filename
4141
uint16_t randv = 0;
@@ -45,7 +45,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data
4545
// open temp output file, and associate with CAutoFile
4646
fs::path pathTmp = gArgs.GetDataDirNet() / tmpfn;
4747
FILE *file = fsbridge::fopen(pathTmp, "wb");
48-
CAutoFile fileout(file, SER_DISK, CLIENT_VERSION);
48+
CAutoFile fileout(file, SER_DISK, version);
4949
if (fileout.IsNull()) {
5050
fileout.fclose();
5151
remove(pathTmp);
@@ -106,11 +106,11 @@ bool DeserializeDB(Stream& stream, Data& data, bool fCheckSum = true)
106106
}
107107

108108
template <typename Data>
109-
bool DeserializeFileDB(const fs::path& path, Data& data)
109+
bool DeserializeFileDB(const fs::path& path, Data& data, int version)
110110
{
111111
// open input file, and associate with CAutoFile
112112
FILE* file = fsbridge::fopen(path, "rb");
113-
CAutoFile filein(file, SER_DISK, CLIENT_VERSION);
113+
CAutoFile filein(file, SER_DISK, version);
114114
if (filein.IsNull()) {
115115
LogPrintf("Missing or invalid file %s\n", path.string());
116116
return false;
@@ -125,12 +125,12 @@ CBanDB::CBanDB(fs::path ban_list_path) : m_ban_list_path(std::move(ban_list_path
125125

126126
bool CBanDB::Write(const banmap_t& banSet)
127127
{
128-
return SerializeFileDB("banlist", m_ban_list_path, banSet);
128+
return SerializeFileDB("banlist", m_ban_list_path, banSet, CLIENT_VERSION);
129129
}
130130

131131
bool CBanDB::Read(banmap_t& banSet)
132132
{
133-
return DeserializeFileDB(m_ban_list_path, banSet);
133+
return DeserializeFileDB(m_ban_list_path, banSet, CLIENT_VERSION);
134134
}
135135

136136
CAddrDB::CAddrDB()
@@ -140,12 +140,12 @@ CAddrDB::CAddrDB()
140140

141141
bool CAddrDB::Write(const CAddrMan& addr)
142142
{
143-
return SerializeFileDB("peers", pathAddr, addr);
143+
return SerializeFileDB("peers", pathAddr, addr, CLIENT_VERSION);
144144
}
145145

146146
bool CAddrDB::Read(CAddrMan& addr)
147147
{
148-
return DeserializeFileDB(pathAddr, addr);
148+
return DeserializeFileDB(pathAddr, addr, CLIENT_VERSION);
149149
}
150150

151151
bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers)
@@ -161,13 +161,13 @@ bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers)
161161
void DumpAnchors(const fs::path& anchors_db_path, const std::vector<CAddress>& anchors)
162162
{
163163
LOG_TIME_SECONDS(strprintf("Flush %d outbound block-relay-only peer addresses to anchors.dat", anchors.size()));
164-
SerializeFileDB("anchors", anchors_db_path, anchors);
164+
SerializeFileDB("anchors", anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT);
165165
}
166166

167167
std::vector<CAddress> ReadAnchors(const fs::path& anchors_db_path)
168168
{
169169
std::vector<CAddress> anchors;
170-
if (DeserializeFileDB(anchors_db_path, anchors)) {
170+
if (DeserializeFileDB(anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT)) {
171171
LogPrintf("Loaded %i addresses from %s\n", anchors.size(), anchors_db_path.filename());
172172
} else {
173173
anchors.clear();

src/protocol.h

Lines changed: 72 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <netaddress.h>
1414
#include <primitives/transaction.h>
1515
#include <serialize.h>
16+
#include <streams.h>
1617
#include <uint256.h>
1718
#include <version.h>
1819

@@ -358,43 +359,103 @@ class CAddress : public CService
358359
{
359360
static constexpr uint32_t TIME_INIT{100000000};
360361

362+
/** Historically, CAddress disk serialization stored the CLIENT_VERSION, optionally OR'ed with
363+
* the ADDRV2_FORMAT flag to indicate V2 serialization. The first field has since been
364+
* disentangled from client versioning, and now instead:
365+
* - The low bits (masked by DISK_VERSION_IGNORE_MASK) store the fixed value DISK_VERSION_INIT,
366+
* (in case any code exists that treats it as a client version) but are ignored on
367+
* deserialization.
368+
* - The high bits (masked by ~DISK_VERSION_IGNORE_MASK) store actual serialization information.
369+
* Only 0 or DISK_VERSION_ADDRV2 (equal to the historical ADDRV2_FORMAT) are valid now, and
370+
* any other value triggers a deserialization failure. Other values can be added later if
371+
* needed.
372+
*
373+
* For disk deserialization, ADDRV2_FORMAT in the stream version signals that ADDRV2
374+
* deserialization is permitted, but the actual format is determined by the high bits in the
375+
* stored version field. For network serialization, the stream version having ADDRV2_FORMAT or
376+
* not determines the actual format used (as it has no embedded version number).
377+
*/
378+
static constexpr uint32_t DISK_VERSION_INIT{220000};
379+
static constexpr uint32_t DISK_VERSION_IGNORE_MASK{0b00000000'00000111'11111111'11111111};
380+
/** The version number written in disk serialized addresses to indicate V2 serializations.
381+
* It must be exactly 1<<29, as that is the value that historical versions used for this
382+
* (they used their internal ADDRV2_FORMAT flag here). */
383+
static constexpr uint32_t DISK_VERSION_ADDRV2{1 << 29};
384+
static_assert((DISK_VERSION_INIT & ~DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_INIT must be covered by DISK_VERSION_IGNORE_MASK");
385+
static_assert((DISK_VERSION_ADDRV2 & DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_ADDRV2 must not be covered by DISK_VERSION_IGNORE_MASK");
386+
361387
public:
362388
CAddress() : CService{} {};
363389
CAddress(CService ipIn, ServiceFlags nServicesIn) : CService{ipIn}, nServices{nServicesIn} {};
364390
CAddress(CService ipIn, ServiceFlags nServicesIn, uint32_t nTimeIn) : CService{ipIn}, nTime{nTimeIn}, nServices{nServicesIn} {};
365391

366392
SERIALIZE_METHODS(CAddress, obj)
367393
{
368-
SER_READ(obj, obj.nTime = TIME_INIT);
369-
int nVersion = s.GetVersion();
394+
// CAddress has a distinct network serialization and a disk serialization, but it should never
395+
// be hashed (except through CHashWriter in addrdb.cpp, which sets SER_DISK), and it's
396+
// ambiguous what that would mean. Make sure no code relying on that is introduced:
397+
assert(!(s.GetType() & SER_GETHASH));
398+
bool use_v2;
399+
bool store_time;
370400
if (s.GetType() & SER_DISK) {
371-
READWRITE(nVersion);
372-
}
373-
if ((s.GetType() & SER_DISK) ||
374-
(nVersion != INIT_PROTO_VERSION && !(s.GetType() & SER_GETHASH))) {
401+
// In the disk serialization format, the encoding (v1 or v2) is determined by a flag version
402+
// that's part of the serialization itself. ADDRV2_FORMAT in the stream version only determines
403+
// whether V2 is chosen/permitted at all.
404+
uint32_t stored_format_version = DISK_VERSION_INIT;
405+
if (s.GetVersion() & ADDRV2_FORMAT) stored_format_version |= DISK_VERSION_ADDRV2;
406+
READWRITE(stored_format_version);
407+
stored_format_version &= ~DISK_VERSION_IGNORE_MASK; // ignore low bits
408+
if (stored_format_version == 0) {
409+
use_v2 = false;
410+
} else if (stored_format_version == DISK_VERSION_ADDRV2 && (s.GetVersion() & ADDRV2_FORMAT)) {
411+
// Only support v2 deserialization if ADDRV2_FORMAT is set.
412+
use_v2 = true;
413+
} else {
414+
throw std::ios_base::failure("Unsupported CAddress disk format version");
415+
}
416+
store_time = true;
417+
} else {
418+
// In the network serialization format, the encoding (v1 or v2) is determined directly by
419+
// the value of ADDRV2_FORMAT in the stream version, as no explicitly encoded version
420+
// exists in the stream.
421+
assert(s.GetType() & SER_NETWORK);
422+
use_v2 = s.GetVersion() & ADDRV2_FORMAT;
375423
// The only time we serialize a CAddress object without nTime is in
376424
// the initial VERSION messages which contain two CAddress records.
377425
// At that point, the serialization version is INIT_PROTO_VERSION.
378426
// After the version handshake, serialization version is >=
379427
// MIN_PEER_PROTO_VERSION and all ADDR messages are serialized with
380428
// nTime.
381-
READWRITE(obj.nTime);
429+
store_time = s.GetVersion() != INIT_PROTO_VERSION;
382430
}
383-
if (nVersion & ADDRV2_FORMAT) {
431+
432+
SER_READ(obj, obj.nTime = TIME_INIT);
433+
if (store_time) READWRITE(obj.nTime);
434+
// nServices is serialized as CompactSize in V2; as uint64_t in V1.
435+
if (use_v2) {
384436
uint64_t services_tmp;
385437
SER_WRITE(obj, services_tmp = obj.nServices);
386438
READWRITE(Using<CompactSizeFormatter<false>>(services_tmp));
387439
SER_READ(obj, obj.nServices = static_cast<ServiceFlags>(services_tmp));
388440
} else {
389441
READWRITE(Using<CustomUintFormatter<8>>(obj.nServices));
390442
}
391-
READWRITEAS(CService, obj);
443+
// Invoke V1/V2 serializer for CService parent object.
444+
OverrideStream<Stream> os(&s, s.GetType(), use_v2 ? ADDRV2_FORMAT : 0);
445+
SerReadWriteMany(os, ser_action, ReadWriteAsHelper<CService>(obj));
392446
}
393447

394-
// disk and network only
448+
//! Always included in serialization, except in the network format on INIT_PROTO_VERSION.
395449
uint32_t nTime{TIME_INIT};
396-
450+
//! Serialized as uint64_t in V1, and as CompactSize in V2.
397451
ServiceFlags nServices{NODE_NONE};
452+
453+
friend bool operator==(const CAddress& a, const CAddress& b)
454+
{
455+
return a.nTime == b.nTime &&
456+
a.nServices == b.nServices &&
457+
static_cast<const CService&>(a) == static_cast<const CService&>(b);
458+
}
398459
};
399460

400461
/** getdata message type flags */

src/test/fuzz/deserialize.cpp

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ struct invalid_fuzzing_input_exception : public std::exception {
5353
};
5454

5555
template <typename T>
56-
CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION)
56+
CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK)
5757
{
58-
CDataStream ds(SER_NETWORK, version);
58+
CDataStream ds(ser_type, version);
5959
ds << obj;
6060
return ds;
6161
}
@@ -69,9 +69,9 @@ T Deserialize(CDataStream ds)
6969
}
7070

7171
template <typename T>
72-
void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional<int> protocol_version = std::nullopt)
72+
void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional<int> protocol_version = std::nullopt, const int ser_type = SER_NETWORK)
7373
{
74-
CDataStream ds(buffer, SER_NETWORK, INIT_PROTO_VERSION);
74+
CDataStream ds(buffer, ser_type, INIT_PROTO_VERSION);
7575
if (protocol_version) {
7676
ds.SetVersion(*protocol_version);
7777
} else {
@@ -92,9 +92,9 @@ void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optio
9292
}
9393

9494
template <typename T>
95-
void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION)
95+
void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK)
9696
{
97-
assert(Deserialize<T>(Serialize(obj, version)) == obj);
97+
assert(Deserialize<T>(Serialize(obj, version, ser_type)) == obj);
9898
}
9999

100100
} // namespace
@@ -251,9 +251,37 @@ FUZZ_TARGET_DESERIALIZE(messageheader_deserialize, {
251251
DeserializeFromFuzzingInput(buffer, mh);
252252
(void)mh.IsCommandValid();
253253
})
254-
FUZZ_TARGET_DESERIALIZE(address_deserialize, {
254+
FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_notime, {
255255
CAddress a;
256-
DeserializeFromFuzzingInput(buffer, a);
256+
DeserializeFromFuzzingInput(buffer, a, INIT_PROTO_VERSION);
257+
// A CAddress without nTime (as is expected under INIT_PROTO_VERSION) will roundtrip
258+
// in all 5 formats (with/without nTime, v1/v2, network/disk)
259+
AssertEqualAfterSerializeDeserialize(a, INIT_PROTO_VERSION);
260+
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
261+
AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
262+
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
263+
AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
264+
})
265+
FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_withtime, {
266+
CAddress a;
267+
DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION);
268+
// A CAddress in V1 mode will roundtrip in all 4 formats that have nTime.
269+
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
270+
AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
271+
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
272+
AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
273+
})
274+
FUZZ_TARGET_DESERIALIZE(address_deserialize_v2, {
275+
CAddress a;
276+
DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION | ADDRV2_FORMAT);
277+
// A CAddress in V2 mode will roundtrip in both V2 formats, and also in the V1 formats
278+
// with time if it's V1 compatible.
279+
if (a.IsAddrV1Compatible()) {
280+
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
281+
AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
282+
}
283+
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
284+
AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
257285
})
258286
FUZZ_TARGET_DESERIALIZE(inv_deserialize, {
259287
CInv i;

0 commit comments

Comments
 (0)