Skip to content

Commit 8cd8f37

Browse files
committed
Introduce well-defined CAddress disk serialization
Before this commit, CAddress disk serialization was messy. It stored CLIENT_VERSION in the first 4 bytes, optionally OR'ed with ADDRV2_FORMAT. - All bits except ADDRV2_FORMAT were ignored, making it hard to use for actual future format changes. - ADDRV2_FORMAT determines whether or not nServices is serialized in LE64 format or in CompactSize format. - Whether or not the embedded CService is serialized in V1 or V2 format is determined by the stream's version having ADDRV2_FORMAT (as opposed to the nServices encoding, which is determined by the disk version). To improve the situation, this commit introduces the following disk serialization format, compatible with earlier versions, but better defined for future changes: - The first 4 bytes store a format version number. Its low 19 bits are ignored (as it historically stored the CLIENT_VERSION), but its high 13 bits specify the serialization exactly: - 0x00000000: LE64 encoding for nServices, V1 encoding for CService - 0x20000000: CompactSize encoding for nServices, V2 encoding for CService - Any other value triggers an unsupported format error on deserialization, and can be used for future format changes. - The ADDRV2_FORMAT flag in the stream's version does not impact the actual serialization format; it only determines whether V2 encoding is permitted; whether it's actually enabled depends solely on the disk version number. Operationally the changes to the deserializer are: - Failure when the stored format version number is unexpected. - The embedded CService's format is determined by the stored format version number rather than the stream's version number. These do no introduce incompatibilities, as no code versions exist that write any value other than 0 or 0x20000000 in the top 13 bits, and no code paths where the stream's version differs from the stored version.
1 parent b295395 commit 8cd8f37

File tree

1 file changed

+65
-11
lines changed

1 file changed

+65
-11
lines changed

src/protocol.h

Lines changed: 65 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <netaddress.h>
1414
#include <primitives/transaction.h>
1515
#include <serialize.h>
16+
#include <streams.h>
1617
#include <uint256.h>
1718
#include <version.h>
1819

@@ -358,42 +359,95 @@ class CAddress : public CService
358359
{
359360
static constexpr uint32_t TIME_INIT{100000000};
360361

362+
/** Historically, CAddress disk serialization stored the CLIENT_VERSION, optionally OR'ed with
363+
* the ADDRV2_FORMAT flag to indicate V2 serialization. The first field has since been
364+
* disentangled from client versioning, and now instead:
365+
* - The low bits (masked by DISK_VERSION_IGNORE_MASK) store the fixed value DISK_VERSION_INIT,
366+
* (in case any code exists that treats it as a client version) but are ignored on
367+
* deserialization.
368+
* - The high bits (masked by ~DISK_VERSION_IGNORE_MASK) store actual serialization information.
369+
* Only 0 or DISK_VERSION_ADDRV2 (equal to the historical ADDRV2_FORMAT) are valid now, and
370+
* any other value triggers a deserialization failure. Other values can be added later if
371+
* needed.
372+
*
373+
* For disk deserialization, ADDRV2_FORMAT in the stream version signals that ADDRV2
374+
* deserialization is permitted, but the actual format is determined by the high bits in the
375+
* stored version field. For network serialization, the stream version having ADDRV2_FORMAT or
376+
* not determines the actual format used (as it has no embedded version number).
377+
*/
378+
static constexpr uint32_t DISK_VERSION_INIT{220000};
379+
static constexpr uint32_t DISK_VERSION_IGNORE_MASK{0b00000000'00000111'11111111'11111111};
380+
/** The version number written in disk serialized addresses to indicate V2 serializations.
381+
* It must be exactly 1<<29, as that is the value that historical versions used for this
382+
* (they used their internal ADDRV2_FORMAT flag here). */
383+
static constexpr uint32_t DISK_VERSION_ADDRV2{1 << 29};
384+
static_assert((DISK_VERSION_INIT & ~DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_INIT must be covered by DISK_VERSION_IGNORE_MASK");
385+
static_assert((DISK_VERSION_ADDRV2 & DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_ADDRV2 must not be covered by DISK_VERSION_IGNORE_MASK");
386+
361387
public:
362388
CAddress() : CService{} {};
363389
CAddress(CService ipIn, ServiceFlags nServicesIn) : CService{ipIn}, nServices{nServicesIn} {};
364390
CAddress(CService ipIn, ServiceFlags nServicesIn, uint32_t nTimeIn) : CService{ipIn}, nTime{nTimeIn}, nServices{nServicesIn} {};
365391

366392
SERIALIZE_METHODS(CAddress, obj)
367393
{
368-
SER_READ(obj, obj.nTime = TIME_INIT);
369-
int nVersion = s.GetVersion();
394+
// CAddress has a distinct network serialization and a disk serialization, but it should never
395+
// be hashed (except through CHashWriter in addrdb.cpp, which sets SER_DISK), and it's
396+
// ambiguous what that would mean. Make sure no code relying on that is introduced:
397+
assert(!(s.GetType() & SER_GETHASH));
398+
bool use_v2;
399+
bool store_time;
370400
if (s.GetType() & SER_DISK) {
371-
READWRITE(nVersion);
372-
}
373-
if ((s.GetType() & SER_DISK) ||
374-
(nVersion != INIT_PROTO_VERSION && !(s.GetType() & SER_GETHASH))) {
401+
// In the disk serialization format, the encoding (v1 or v2) is determined by a flag version
402+
// that's part of the serialization itself. ADDRV2_FORMAT in the stream version only determines
403+
// whether V2 is chosen/permitted at all.
404+
uint32_t stored_format_version = DISK_VERSION_INIT;
405+
if (s.GetVersion() & ADDRV2_FORMAT) stored_format_version |= DISK_VERSION_ADDRV2;
406+
READWRITE(stored_format_version);
407+
stored_format_version &= ~DISK_VERSION_IGNORE_MASK; // ignore low bits
408+
if (stored_format_version == 0) {
409+
use_v2 = false;
410+
} else if (stored_format_version == DISK_VERSION_ADDRV2 && (s.GetVersion() & ADDRV2_FORMAT)) {
411+
// Only support v2 deserialization if ADDRV2_FORMAT is set.
412+
use_v2 = true;
413+
} else {
414+
throw std::ios_base::failure("Unsupported CAddress disk format version");
415+
}
416+
store_time = true;
417+
} else {
418+
// In the network serialization format, the encoding (v1 or v2) is determined directly by
419+
// the value of ADDRV2_FORMAT in the stream version, as no explicitly encoded version
420+
// exists in the stream.
421+
assert(s.GetType() & SER_NETWORK);
422+
use_v2 = s.GetVersion() & ADDRV2_FORMAT;
375423
// The only time we serialize a CAddress object without nTime is in
376424
// the initial VERSION messages which contain two CAddress records.
377425
// At that point, the serialization version is INIT_PROTO_VERSION.
378426
// After the version handshake, serialization version is >=
379427
// MIN_PEER_PROTO_VERSION and all ADDR messages are serialized with
380428
// nTime.
381-
READWRITE(obj.nTime);
429+
store_time = s.GetVersion() != INIT_PROTO_VERSION;
382430
}
383-
if (nVersion & ADDRV2_FORMAT) {
431+
432+
SER_READ(obj, obj.nTime = TIME_INIT);
433+
if (store_time) READWRITE(obj.nTime);
434+
// nServices is serialized as CompactSize in V2; as uint64_t in V1.
435+
if (use_v2) {
384436
uint64_t services_tmp;
385437
SER_WRITE(obj, services_tmp = obj.nServices);
386438
READWRITE(Using<CompactSizeFormatter<false>>(services_tmp));
387439
SER_READ(obj, obj.nServices = static_cast<ServiceFlags>(services_tmp));
388440
} else {
389441
READWRITE(Using<CustomUintFormatter<8>>(obj.nServices));
390442
}
391-
READWRITEAS(CService, obj);
443+
// Invoke V1/V2 serializer for CService parent object.
444+
OverrideStream<Stream> os(&s, s.GetType(), use_v2 ? ADDRV2_FORMAT : 0);
445+
SerReadWriteMany(os, ser_action, ReadWriteAsHelper<CService>(obj));
392446
}
393447

394-
// disk and network only
448+
//! Always included in serialization, except in the network format on INIT_PROTO_VERSION.
395449
uint32_t nTime{TIME_INIT};
396-
450+
//! Serialized as uint64_t in V1, and as CompactSize in V2.
397451
ServiceFlags nServices{NODE_NONE};
398452
};
399453

0 commit comments

Comments
 (0)