Skip to content

Commit b847f49

Browse files
committed
Merge #20557: addrman: Fix new table bucketing during unserialization
4676a4f [addrman] Don't repeat "Bucketing method was updated" log multiple times (John Newbery) 4362923 [addrman] Improve serialization comments (John Newbery) ac3547e [addrman] Improve variable naming/code style of touched code. (John Newbery) a5c9b04 [addrman] Don't rebucket new table entries unnecessarily (John Newbery) 8062d92 [addrman] Rename asmap version to asmap checksum (John Newbery) 009b8e0 [addrman] Improve variable naming/code style of touched code. (John Newbery) b4c5fda [addrman] Fix new table bucketing during unserialization (John Newbery) Pull request description: This fixes three issues in addrman unserialization. 1. An addrman entry can appear in up to 8 new table buckets. We store this entry->bucket indexing during shutdown so that on restart we can restore the entries to their correct buckets. Commit ec45646 broke the deserialization code so that each entry could only be put in up to one new bucket. 2. Unserialization may result in an entry appearing in a 9th bucket. If the entry already appears in 8 buckets don't try to place it in another bucket. 3. We unnecessarily rebucket when reading a peers.dat with file version 1. Don't do that. ACKs for top commit: vasild: ACK 4676a4f glozow: re-ACK bitcoin/bitcoin@4676a4f, changes were a rename, comments, and removing repeat-logging. naumenkogs: ACK 4676a4f laanwj: Code review ACK 4676a4f dhruv: ACK 4676a4f ryanofsky: Code review ACK 4676a4f. I'm not previously familiar with this code but all the changes here do make sense and seem like improvements. Left some notes and comments, but they aren't important so feel to ignore. Tree-SHA512: b228984f6dec5910be23c3740ae20258da33bcf66ceb7edb10e5a53163450f743bab349e47f09808b7e8d40f27143119ec3e0981d7e678aa494d8559a1c99c23
2 parents 3641ec1 + 4676a4f commit b847f49

File tree

1 file changed

+54
-40
lines changed

1 file changed

+54
-40
lines changed

src/addrman.h

Lines changed: 54 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -335,22 +335,20 @@ friend class CAddrManTest;
335335
* * nNew
336336
* * nTried
337337
* * number of "new" buckets XOR 2**30
338-
* * all nNew addrinfos in vvNew
339-
* * all nTried addrinfos in vvTried
340-
* * for each bucket:
338+
* * all new addresses (total count: nNew)
339+
* * all tried addresses (total count: nTried)
340+
* * for each new bucket:
341341
* * number of elements
342-
* * for each element: index
342+
* * for each element: index in the serialized "all new addresses"
343+
* * asmap checksum
343344
*
344345
* 2**30 is xorred with the number of buckets to make addrman deserializer v0 detect it
345346
* as incompatible. This is necessary because it did not check the version number on
346347
* deserialization.
347348
*
348-
* Notice that vvTried, mapAddr and vVector are never encoded explicitly;
349+
* vvNew, vvTried, mapInfo, mapAddr and vRandom are never encoded explicitly;
349350
* they are instead reconstructed from the other information.
350351
*
351-
* vvNew is serialized, but only used if ADDRMAN_UNKNOWN_BUCKET_COUNT didn't change,
352-
* otherwise it is reconstructed as well.
353-
*
354352
* This format is more complex, but significantly smaller (at most 1.5 MiB), and supports
355353
* changes to the ADDRMAN_ parameters without breaking the on-disk structure.
356354
*
@@ -413,13 +411,13 @@ friend class CAddrManTest;
413411
}
414412
}
415413
}
416-
// Store asmap version after bucket entries so that it
414+
// Store asmap checksum after bucket entries so that it
417415
// can be ignored by older clients for backward compatibility.
418-
uint256 asmap_version;
416+
uint256 asmap_checksum;
419417
if (m_asmap.size() != 0) {
420-
asmap_version = SerializeHash(m_asmap);
418+
asmap_checksum = SerializeHash(m_asmap);
421419
}
422-
s << asmap_version;
420+
s << asmap_checksum;
423421
}
424422

425423
template <typename Stream>
@@ -500,47 +498,63 @@ friend class CAddrManTest;
500498
nTried -= nLost;
501499

502500
// Store positions in the new table buckets to apply later (if possible).
503-
std::map<int, int> entryToBucket; // Represents which entry belonged to which bucket when serializing
504-
505-
for (int bucket = 0; bucket < nUBuckets; bucket++) {
506-
int nSize = 0;
507-
s >> nSize;
508-
for (int n = 0; n < nSize; n++) {
509-
int nIndex = 0;
510-
s >> nIndex;
511-
if (nIndex >= 0 && nIndex < nNew) {
512-
entryToBucket[nIndex] = bucket;
501+
// An entry may appear in up to ADDRMAN_NEW_BUCKETS_PER_ADDRESS buckets,
502+
// so we store all bucket-entry_index pairs to iterate through later.
503+
std::vector<std::pair<int, int>> bucket_entries;
504+
505+
for (int bucket = 0; bucket < nUBuckets; ++bucket) {
506+
int num_entries{0};
507+
s >> num_entries;
508+
for (int n = 0; n < num_entries; ++n) {
509+
int entry_index{0};
510+
s >> entry_index;
511+
if (entry_index >= 0 && entry_index < nNew) {
512+
bucket_entries.emplace_back(bucket, entry_index);
513513
}
514514
}
515515
}
516516

517-
uint256 supplied_asmap_version;
517+
// If the bucket count and asmap checksum haven't changed, then attempt
518+
// to restore the entries to the buckets/positions they were in before
519+
// serialization.
520+
uint256 supplied_asmap_checksum;
518521
if (m_asmap.size() != 0) {
519-
supplied_asmap_version = SerializeHash(m_asmap);
522+
supplied_asmap_checksum = SerializeHash(m_asmap);
520523
}
521-
uint256 serialized_asmap_version;
524+
uint256 serialized_asmap_checksum;
522525
if (format >= Format::V2_ASMAP) {
523-
s >> serialized_asmap_version;
526+
s >> serialized_asmap_checksum;
524527
}
528+
const bool restore_bucketing{nUBuckets == ADDRMAN_NEW_BUCKET_COUNT &&
529+
serialized_asmap_checksum == supplied_asmap_checksum};
525530

526-
for (int n = 0; n < nNew; n++) {
527-
CAddrInfo &info = mapInfo[n];
528-
int bucket = entryToBucket[n];
529-
int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
530-
if (format >= Format::V2_ASMAP && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 &&
531-
info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) {
531+
if (!restore_bucketing) {
532+
LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
533+
}
534+
535+
for (auto bucket_entry : bucket_entries) {
536+
int bucket{bucket_entry.first};
537+
const int entry_index{bucket_entry.second};
538+
CAddrInfo& info = mapInfo[entry_index];
539+
540+
// The entry shouldn't appear in more than
541+
// ADDRMAN_NEW_BUCKETS_PER_ADDRESS. If it has already, just skip
542+
// this bucket_entry.
543+
if (info.nRefCount >= ADDRMAN_NEW_BUCKETS_PER_ADDRESS) continue;
544+
545+
int bucket_position = info.GetBucketPosition(nKey, true, bucket);
546+
if (restore_bucketing && vvNew[bucket][bucket_position] == -1) {
532547
// Bucketing has not changed, using existing bucket positions for the new table
533-
vvNew[bucket][nUBucketPos] = n;
534-
info.nRefCount++;
548+
vvNew[bucket][bucket_position] = entry_index;
549+
++info.nRefCount;
535550
} else {
536-
// In case the new table data cannot be used (format unknown, bucket count wrong or new asmap),
551+
// In case the new table data cannot be used (bucket count wrong or new asmap),
537552
// try to give them a reference based on their primary source address.
538-
LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
539553
bucket = info.GetNewBucket(nKey, m_asmap);
540-
nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
541-
if (vvNew[bucket][nUBucketPos] == -1) {
542-
vvNew[bucket][nUBucketPos] = n;
543-
info.nRefCount++;
554+
bucket_position = info.GetBucketPosition(nKey, true, bucket);
555+
if (vvNew[bucket][bucket_position] == -1) {
556+
vvNew[bucket][bucket_position] = entry_index;
557+
++info.nRefCount;
544558
}
545559
}
546560
}

0 commit comments

Comments
 (0)