Skip to content

Commit 0bff433

Browse files
laanwjgades
authored andcommitted
Merge bitcoin#20557: addrman: Fix new table bucketing during unserialization
4676a4f [addrman] Don't repeat "Bucketing method was updated" log multiple times (John Newbery) 4362923 [addrman] Improve serialization comments (John Newbery) ac3547e [addrman] Improve variable naming/code style of touched code. (John Newbery) a5c9b04 [addrman] Don't rebucket new table entries unnecessarily (John Newbery) 8062d92 [addrman] Rename asmap version to asmap checksum (John Newbery) 009b8e0 [addrman] Improve variable naming/code style of touched code. (John Newbery) b4c5fda [addrman] Fix new table bucketing during unserialization (John Newbery) Pull request description: This fixes three issues in addrman unserialization. 1. An addrman entry can appear in up to 8 new table buckets. We store this entry->bucket indexing during shutdown so that on restart we can restore the entries to their correct buckets. Commit ec45646 broke the deserialization code so that each entry could only be put in up to one new bucket. 2. Unserialization may result in an entry appearing in a 9th bucket. If the entry already appears in 8 buckets don't try to place it in another bucket. 3. We unnecessarily rebucket when reading a peers.dat with file version 1. Don't do that. ACKs for top commit: vasild: ACK 4676a4f glozow: re-ACK bitcoin@4676a4f, changes were a rename, comments, and removing repeat-logging. naumenkogs: ACK 4676a4f laanwj: Code review ACK 4676a4f dhruv: ACK 4676a4f ryanofsky: Code review ACK 4676a4f. I'm not previously familiar with this code but all the changes here do make sense and seem like improvements. Left some notes and comments, but they aren't important so feel to ignore. Tree-SHA512: b228984f6dec5910be23c3740ae20258da33bcf66ceb7edb10e5a53163450f743bab349e47f09808b7e8d40f27143119ec3e0981d7e678aa494d8559a1c99c23
1 parent 46540e7 commit 0bff433

File tree

1 file changed

+54
-40
lines changed

1 file changed

+54
-40
lines changed

src/addrman.h

Lines changed: 54 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -315,22 +315,20 @@ class CAddrMan
315315
* * nNew
316316
* * nTried
317317
* * number of "new" buckets XOR 2**30
318-
* * all nNew addrinfos in vvNew
319-
* * all nTried addrinfos in vvTried
320-
* * for each bucket:
318+
* * all new addresses (total count: nNew)
319+
* * all tried addresses (total count: nTried)
320+
* * for each new bucket:
321321
* * number of elements
322-
* * for each element: index
322+
* * for each element: index in the serialized "all new addresses"
323+
* * asmap checksum
323324
*
324325
* 2**30 is xorred with the number of buckets to make addrman deserializer v0 detect it
325326
* as incompatible. This is necessary because it did not check the version number on
326327
* deserialization.
327328
*
328-
* Notice that vvTried, mapAddr and vVector are never encoded explicitly;
329+
* vvNew, vvTried, mapInfo, mapAddr and vRandom are never encoded explicitly;
329330
* they are instead reconstructed from the other information.
330331
*
331-
* vvNew is serialized, but only used if ADDRMAN_UNKNOWN_BUCKET_COUNT didn't change,
332-
* otherwise it is reconstructed as well.
333-
*
334332
* This format is more complex, but significantly smaller (at most 1.5 MiB), and supports
335333
* changes to the ADDRMAN_ parameters without breaking the on-disk structure.
336334
*
@@ -388,13 +386,13 @@ class CAddrMan
388386
}
389387
}
390388
}
391-
// Store asmap version after bucket entries so that it
389+
// Store asmap checksum after bucket entries so that it
392390
// can be ignored by older clients for backward compatibility.
393-
uint256 asmap_version;
391+
uint256 asmap_checksum;
394392
if (m_asmap.size() != 0) {
395-
asmap_version = SerializeHash(m_asmap);
393+
asmap_checksum = SerializeHash(m_asmap);
396394
}
397-
s << asmap_version;
395+
s << asmap_checksum;
398396
}
399397

400398
template <typename Stream>
@@ -477,47 +475,63 @@ class CAddrMan
477475
nTried -= nLost;
478476

479477
// Store positions in the new table buckets to apply later (if possible).
480-
std::map<int, int> entryToBucket; // Represents which entry belonged to which bucket when serializing
481-
482-
for (int bucket = 0; bucket < nUBuckets; bucket++) {
483-
int nSize = 0;
484-
s >> nSize;
485-
for (int n = 0; n < nSize; n++) {
486-
int nIndex = 0;
487-
s >> nIndex;
488-
if (nIndex >= 0 && nIndex < nNew) {
489-
entryToBucket[nIndex] = bucket;
478+
// An entry may appear in up to ADDRMAN_NEW_BUCKETS_PER_ADDRESS buckets,
479+
// so we store all bucket-entry_index pairs to iterate through later.
480+
std::vector<std::pair<int, int>> bucket_entries;
481+
482+
for (int bucket = 0; bucket < nUBuckets; ++bucket) {
483+
int num_entries{0};
484+
s >> num_entries;
485+
for (int n = 0; n < num_entries; ++n) {
486+
int entry_index{0};
487+
s >> entry_index;
488+
if (entry_index >= 0 && entry_index < nNew) {
489+
bucket_entries.emplace_back(bucket, entry_index);
490490
}
491491
}
492492
}
493493

494-
uint256 supplied_asmap_version;
494+
// If the bucket count and asmap checksum haven't changed, then attempt
495+
// to restore the entries to the buckets/positions they were in before
496+
// serialization.
497+
uint256 supplied_asmap_checksum;
495498
if (m_asmap.size() != 0) {
496-
supplied_asmap_version = SerializeHash(m_asmap);
499+
supplied_asmap_checksum = SerializeHash(m_asmap);
497500
}
498-
uint256 serialized_asmap_version;
501+
uint256 serialized_asmap_checksum;
499502
if (format >= Format::V2_ASMAP) {
500-
s >> serialized_asmap_version;
503+
s >> serialized_asmap_checksum;
501504
}
505+
const bool restore_bucketing{nUBuckets == ADDRMAN_NEW_BUCKET_COUNT &&
506+
serialized_asmap_checksum == supplied_asmap_checksum};
502507

503-
for (int n = 0; n < nNew; n++) {
504-
CAddrInfo &info = mapInfo[n];
505-
int bucket = entryToBucket[n];
506-
int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
507-
if (format >= Format::V2_ASMAP && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 &&
508-
info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) {
508+
if (!restore_bucketing) {
509+
LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
510+
}
511+
512+
for (auto bucket_entry : bucket_entries) {
513+
int bucket{bucket_entry.first};
514+
const int entry_index{bucket_entry.second};
515+
CAddrInfo& info = mapInfo[entry_index];
516+
517+
// The entry shouldn't appear in more than
518+
// ADDRMAN_NEW_BUCKETS_PER_ADDRESS. If it has already, just skip
519+
// this bucket_entry.
520+
if (info.nRefCount >= ADDRMAN_NEW_BUCKETS_PER_ADDRESS) continue;
521+
522+
int bucket_position = info.GetBucketPosition(nKey, true, bucket);
523+
if (restore_bucketing && vvNew[bucket][bucket_position] == -1) {
509524
// Bucketing has not changed, using existing bucket positions for the new table
510-
vvNew[bucket][nUBucketPos] = n;
511-
info.nRefCount++;
525+
vvNew[bucket][bucket_position] = entry_index;
526+
++info.nRefCount;
512527
} else {
513-
// In case the new table data cannot be used (format unknown, bucket count wrong or new asmap),
528+
// In case the new table data cannot be used (bucket count wrong or new asmap),
514529
// try to give them a reference based on their primary source address.
515-
LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
516530
bucket = info.GetNewBucket(nKey, m_asmap);
517-
nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
518-
if (vvNew[bucket][nUBucketPos] == -1) {
519-
vvNew[bucket][nUBucketPos] = n;
520-
info.nRefCount++;
531+
bucket_position = info.GetBucketPosition(nKey, true, bucket);
532+
if (vvNew[bucket][bucket_position] == -1) {
533+
vvNew[bucket][bucket_position] = entry_index;
534+
++info.nRefCount;
521535
}
522536
}
523537
}

0 commit comments

Comments
 (0)