Skip to content

Commit a820e79

Browse files
committed
Merge bitcoin/bitcoin#22740: [addrman] Move serialization code to cpp
85b15dd [refactor] [addrman] Update constant comments (John Newbery) af9638a [move-only] Extract constants from addrman .h to .cpp (Amiti Uttarwar) 7dc443a [addrman] Change addrman #define constants to be constexprs (Amiti Uttarwar) a65053f [addrman] Move CAddrMan::Unserialize to cpp file (John Newbery) 1622543 [addrman] Move CAddrMan::Serialize to cpp file (John Newbery) Pull request description: Moving the serialization code from the header to the cpp helps clarify interfaces vs internals, as well as speed up the compilation of the whole program with a smaller header file. ACKs for top commit: jnewbery: Code review ACK 85b15dd 0xB10C: Code review ACK 85b15dd mzumsande: Code Review ACK 85b15dd (+ performed some light testing) Tree-SHA512: a1aac25155601dd0ffd073b37388d9062c3d82c499821bd7ee883286cbc5dc0c7ae87f127c127778dae290006b98166640dc974d1953f3f34c53a67cf7b21613
2 parents 81f4a3e + 85b15dd commit a820e79

File tree

2 files changed

+311
-308
lines changed

2 files changed

+311
-308
lines changed

src/addrman.cpp

Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,27 @@
1515
#include <unordered_map>
1616
#include <unordered_set>
1717

18+
/** Over how many buckets entries with tried addresses from a single group (/16 for IPv4) are spread */
19+
static constexpr uint32_t ADDRMAN_TRIED_BUCKETS_PER_GROUP{8};
20+
/** Over how many buckets entries with new addresses originating from a single group are spread */
21+
static constexpr uint32_t ADDRMAN_NEW_BUCKETS_PER_SOURCE_GROUP{64};
22+
/** Maximum number of times an address can be added to the new table */
23+
static constexpr int32_t ADDRMAN_NEW_BUCKETS_PER_ADDRESS{8};
24+
/** How old addresses can maximally be */
25+
static constexpr int64_t ADDRMAN_HORIZON_DAYS{30};
26+
/** After how many failed attempts we give up on a new node */
27+
static constexpr int32_t ADDRMAN_RETRIES{3};
28+
/** How many successive failures are allowed ... */
29+
static constexpr int32_t ADDRMAN_MAX_FAILURES{10};
30+
/** ... in at least this many days */
31+
static constexpr int64_t ADDRMAN_MIN_FAIL_DAYS{7};
32+
/** How recent a successful connection should be before we allow an address to be evicted from tried */
33+
static constexpr int64_t ADDRMAN_REPLACEMENT_HOURS{4};
34+
/** The maximum number of tried addr collisions to store */
35+
static constexpr size_t ADDRMAN_SET_TRIED_COLLISION_SIZE{10};
36+
/** The maximum time we'll spend trying to resolve a tried table collision, in seconds */
37+
static constexpr int64_t ADDRMAN_TEST_WINDOW{40*60}; // 40 minutes
38+
1839
int CAddrInfo::GetTriedBucket(const uint256& nKey, const std::vector<bool> &asmap) const
1940
{
2041
uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetKey()).GetCheapHash();
@@ -94,6 +115,285 @@ CAddrMan::CAddrMan(bool deterministic, int32_t consistency_check_ratio)
94115
}
95116
}
96117

118+
template <typename Stream>
119+
void CAddrMan::Serialize(Stream& s_) const
120+
{
121+
LOCK(cs);
122+
123+
/**
124+
* Serialized format.
125+
* * format version byte (@see `Format`)
126+
* * lowest compatible format version byte. This is used to help old software decide
127+
* whether to parse the file. For example:
128+
* * Bitcoin Core version N knows how to parse up to format=3. If a new format=4 is
129+
* introduced in version N+1 that is compatible with format=3 and it is known that
130+
* version N will be able to parse it, then version N+1 will write
131+
* (format=4, lowest_compatible=3) in the first two bytes of the file, and so
132+
* version N will still try to parse it.
133+
* * Bitcoin Core version N+2 introduces a new incompatible format=5. It will write
134+
* (format=5, lowest_compatible=5) and so any versions that do not know how to parse
135+
* format=5 will not try to read the file.
136+
* * nKey
137+
* * nNew
138+
* * nTried
139+
* * number of "new" buckets XOR 2**30
140+
* * all new addresses (total count: nNew)
141+
* * all tried addresses (total count: nTried)
142+
* * for each new bucket:
143+
* * number of elements
144+
* * for each element: index in the serialized "all new addresses"
145+
* * asmap checksum
146+
*
147+
* 2**30 is xorred with the number of buckets to make addrman deserializer v0 detect it
148+
* as incompatible. This is necessary because it did not check the version number on
149+
* deserialization.
150+
*
151+
* vvNew, vvTried, mapInfo, mapAddr and vRandom are never encoded explicitly;
152+
* they are instead reconstructed from the other information.
153+
*
154+
* This format is more complex, but significantly smaller (at most 1.5 MiB), and supports
155+
* changes to the ADDRMAN_ parameters without breaking the on-disk structure.
156+
*
157+
* We don't use SERIALIZE_METHODS since the serialization and deserialization code has
158+
* very little in common.
159+
*/
160+
161+
// Always serialize in the latest version (FILE_FORMAT).
162+
163+
OverrideStream<Stream> s(&s_, s_.GetType(), s_.GetVersion() | ADDRV2_FORMAT);
164+
165+
s << static_cast<uint8_t>(FILE_FORMAT);
166+
167+
// Increment `lowest_compatible` iff a newly introduced format is incompatible with
168+
// the previous one.
169+
static constexpr uint8_t lowest_compatible = Format::V3_BIP155;
170+
s << static_cast<uint8_t>(INCOMPATIBILITY_BASE + lowest_compatible);
171+
172+
s << nKey;
173+
s << nNew;
174+
s << nTried;
175+
176+
int nUBuckets = ADDRMAN_NEW_BUCKET_COUNT ^ (1 << 30);
177+
s << nUBuckets;
178+
std::unordered_map<int, int> mapUnkIds;
179+
int nIds = 0;
180+
for (const auto& entry : mapInfo) {
181+
mapUnkIds[entry.first] = nIds;
182+
const CAddrInfo &info = entry.second;
183+
if (info.nRefCount) {
184+
assert(nIds != nNew); // this means nNew was wrong, oh ow
185+
s << info;
186+
nIds++;
187+
}
188+
}
189+
nIds = 0;
190+
for (const auto& entry : mapInfo) {
191+
const CAddrInfo &info = entry.second;
192+
if (info.fInTried) {
193+
assert(nIds != nTried); // this means nTried was wrong, oh ow
194+
s << info;
195+
nIds++;
196+
}
197+
}
198+
for (int bucket = 0; bucket < ADDRMAN_NEW_BUCKET_COUNT; bucket++) {
199+
int nSize = 0;
200+
for (int i = 0; i < ADDRMAN_BUCKET_SIZE; i++) {
201+
if (vvNew[bucket][i] != -1)
202+
nSize++;
203+
}
204+
s << nSize;
205+
for (int i = 0; i < ADDRMAN_BUCKET_SIZE; i++) {
206+
if (vvNew[bucket][i] != -1) {
207+
int nIndex = mapUnkIds[vvNew[bucket][i]];
208+
s << nIndex;
209+
}
210+
}
211+
}
212+
// Store asmap checksum after bucket entries so that it
213+
// can be ignored by older clients for backward compatibility.
214+
uint256 asmap_checksum;
215+
if (m_asmap.size() != 0) {
216+
asmap_checksum = SerializeHash(m_asmap);
217+
}
218+
s << asmap_checksum;
219+
}
220+
221+
template <typename Stream>
222+
void CAddrMan::Unserialize(Stream& s_)
223+
{
224+
LOCK(cs);
225+
226+
assert(vRandom.empty());
227+
228+
Format format;
229+
s_ >> Using<CustomUintFormatter<1>>(format);
230+
231+
int stream_version = s_.GetVersion();
232+
if (format >= Format::V3_BIP155) {
233+
// Add ADDRV2_FORMAT to the version so that the CNetAddr and CAddress
234+
// unserialize methods know that an address in addrv2 format is coming.
235+
stream_version |= ADDRV2_FORMAT;
236+
}
237+
238+
OverrideStream<Stream> s(&s_, s_.GetType(), stream_version);
239+
240+
uint8_t compat;
241+
s >> compat;
242+
const uint8_t lowest_compatible = compat - INCOMPATIBILITY_BASE;
243+
if (lowest_compatible > FILE_FORMAT) {
244+
throw std::ios_base::failure(strprintf(
245+
"Unsupported format of addrman database: %u. It is compatible with formats >=%u, "
246+
"but the maximum supported by this version of %s is %u.",
247+
format, lowest_compatible, PACKAGE_NAME, static_cast<uint8_t>(FILE_FORMAT)));
248+
}
249+
250+
s >> nKey;
251+
s >> nNew;
252+
s >> nTried;
253+
int nUBuckets = 0;
254+
s >> nUBuckets;
255+
if (format >= Format::V1_DETERMINISTIC) {
256+
nUBuckets ^= (1 << 30);
257+
}
258+
259+
if (nNew > ADDRMAN_NEW_BUCKET_COUNT * ADDRMAN_BUCKET_SIZE || nNew < 0) {
260+
throw std::ios_base::failure(
261+
strprintf("Corrupt CAddrMan serialization: nNew=%d, should be in [0, %d]",
262+
nNew,
263+
ADDRMAN_NEW_BUCKET_COUNT * ADDRMAN_BUCKET_SIZE));
264+
}
265+
266+
if (nTried > ADDRMAN_TRIED_BUCKET_COUNT * ADDRMAN_BUCKET_SIZE || nTried < 0) {
267+
throw std::ios_base::failure(
268+
strprintf("Corrupt CAddrMan serialization: nTried=%d, should be in [0, %d]",
269+
nTried,
270+
ADDRMAN_TRIED_BUCKET_COUNT * ADDRMAN_BUCKET_SIZE));
271+
}
272+
273+
// Deserialize entries from the new table.
274+
for (int n = 0; n < nNew; n++) {
275+
CAddrInfo &info = mapInfo[n];
276+
s >> info;
277+
mapAddr[info] = n;
278+
info.nRandomPos = vRandom.size();
279+
vRandom.push_back(n);
280+
}
281+
nIdCount = nNew;
282+
283+
// Deserialize entries from the tried table.
284+
int nLost = 0;
285+
for (int n = 0; n < nTried; n++) {
286+
CAddrInfo info;
287+
s >> info;
288+
int nKBucket = info.GetTriedBucket(nKey, m_asmap);
289+
int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket);
290+
if (info.IsValid()
291+
&& vvTried[nKBucket][nKBucketPos] == -1) {
292+
info.nRandomPos = vRandom.size();
293+
info.fInTried = true;
294+
vRandom.push_back(nIdCount);
295+
mapInfo[nIdCount] = info;
296+
mapAddr[info] = nIdCount;
297+
vvTried[nKBucket][nKBucketPos] = nIdCount;
298+
nIdCount++;
299+
} else {
300+
nLost++;
301+
}
302+
}
303+
nTried -= nLost;
304+
305+
// Store positions in the new table buckets to apply later (if possible).
306+
// An entry may appear in up to ADDRMAN_NEW_BUCKETS_PER_ADDRESS buckets,
307+
// so we store all bucket-entry_index pairs to iterate through later.
308+
std::vector<std::pair<int, int>> bucket_entries;
309+
310+
for (int bucket = 0; bucket < nUBuckets; ++bucket) {
311+
int num_entries{0};
312+
s >> num_entries;
313+
for (int n = 0; n < num_entries; ++n) {
314+
int entry_index{0};
315+
s >> entry_index;
316+
if (entry_index >= 0 && entry_index < nNew) {
317+
bucket_entries.emplace_back(bucket, entry_index);
318+
}
319+
}
320+
}
321+
322+
// If the bucket count and asmap checksum haven't changed, then attempt
323+
// to restore the entries to the buckets/positions they were in before
324+
// serialization.
325+
uint256 supplied_asmap_checksum;
326+
if (m_asmap.size() != 0) {
327+
supplied_asmap_checksum = SerializeHash(m_asmap);
328+
}
329+
uint256 serialized_asmap_checksum;
330+
if (format >= Format::V2_ASMAP) {
331+
s >> serialized_asmap_checksum;
332+
}
333+
const bool restore_bucketing{nUBuckets == ADDRMAN_NEW_BUCKET_COUNT &&
334+
serialized_asmap_checksum == supplied_asmap_checksum};
335+
336+
if (!restore_bucketing) {
337+
LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
338+
}
339+
340+
for (auto bucket_entry : bucket_entries) {
341+
int bucket{bucket_entry.first};
342+
const int entry_index{bucket_entry.second};
343+
CAddrInfo& info = mapInfo[entry_index];
344+
345+
// Don't store the entry in the new bucket if it's not a valid address for our addrman
346+
if (!info.IsValid()) continue;
347+
348+
// The entry shouldn't appear in more than
349+
// ADDRMAN_NEW_BUCKETS_PER_ADDRESS. If it has already, just skip
350+
// this bucket_entry.
351+
if (info.nRefCount >= ADDRMAN_NEW_BUCKETS_PER_ADDRESS) continue;
352+
353+
int bucket_position = info.GetBucketPosition(nKey, true, bucket);
354+
if (restore_bucketing && vvNew[bucket][bucket_position] == -1) {
355+
// Bucketing has not changed, using existing bucket positions for the new table
356+
vvNew[bucket][bucket_position] = entry_index;
357+
++info.nRefCount;
358+
} else {
359+
// In case the new table data cannot be used (bucket count wrong or new asmap),
360+
// try to give them a reference based on their primary source address.
361+
bucket = info.GetNewBucket(nKey, m_asmap);
362+
bucket_position = info.GetBucketPosition(nKey, true, bucket);
363+
if (vvNew[bucket][bucket_position] == -1) {
364+
vvNew[bucket][bucket_position] = entry_index;
365+
++info.nRefCount;
366+
}
367+
}
368+
}
369+
370+
// Prune new entries with refcount 0 (as a result of collisions or invalid address).
371+
int nLostUnk = 0;
372+
for (auto it = mapInfo.cbegin(); it != mapInfo.cend(); ) {
373+
if (it->second.fInTried == false && it->second.nRefCount == 0) {
374+
const auto itCopy = it++;
375+
Delete(itCopy->first);
376+
++nLostUnk;
377+
} else {
378+
++it;
379+
}
380+
}
381+
if (nLost + nLostUnk > 0) {
382+
LogPrint(BCLog::ADDRMAN, "addrman lost %i new and %i tried addresses due to collisions or invalid addresses\n", nLostUnk, nLost);
383+
}
384+
385+
Check();
386+
}
387+
388+
// explicit instantiation
389+
template void CAddrMan::Serialize(CHashWriter& s) const;
390+
template void CAddrMan::Serialize(CAutoFile& s) const;
391+
template void CAddrMan::Serialize(CDataStream& s) const;
392+
template void CAddrMan::Unserialize(CAutoFile& s);
393+
template void CAddrMan::Unserialize(CHashVerifier<CAutoFile>& s);
394+
template void CAddrMan::Unserialize(CDataStream& s);
395+
template void CAddrMan::Unserialize(CHashVerifier<CDataStream>& s);
396+
97397
CAddrInfo* CAddrMan::Find(const CNetAddr& addr, int* pnId)
98398
{
99399
AssertLockHeld(cs);

0 commit comments

Comments
 (0)