bitcoincore-dev
diff --git a/‎src/addrman.cpp
Lines changed: 300 additions & 0 deletions b/‎src/addrman.cpp
Lines changed: 300 additions & 0 deletions
@@ -15,6 +15,27 @@
 #include <unordered_map>
 #include <unordered_set>
 
+/** Over how many buckets entries with tried addresses from a single group (/16 for IPv4) are spread */
+static constexpr uint32_t ADDRMAN_TRIED_BUCKETS_PER_GROUP{8};
+/** Over how many buckets entries with new addresses originating from a single group are spread */
+static constexpr uint32_t ADDRMAN_NEW_BUCKETS_PER_SOURCE_GROUP{64};
+/** Maximum number of times an address can be added to the new table */
+static constexpr int32_t ADDRMAN_NEW_BUCKETS_PER_ADDRESS{8};
+/** How old addresses can maximally be */
+static constexpr int64_t ADDRMAN_HORIZON_DAYS{30};
+/** After how many failed attempts we give up on a new node */
+static constexpr int32_t ADDRMAN_RETRIES{3};
+/** How many successive failures are allowed ... */
+static constexpr int32_t ADDRMAN_MAX_FAILURES{10};
+/** ... in at least this many days */
+static constexpr int64_t ADDRMAN_MIN_FAIL_DAYS{7};
+/** How recent a successful connection should be before we allow an address to be evicted from tried */
+static constexpr int64_t ADDRMAN_REPLACEMENT_HOURS{4};
+/** The maximum number of tried addr collisions to store */
+static constexpr size_t ADDRMAN_SET_TRIED_COLLISION_SIZE{10};
+/** The maximum time we'll spend trying to resolve a tried table collision, in seconds */
+static constexpr int64_t ADDRMAN_TEST_WINDOW{40*60}; // 40 minutes
+
 int CAddrInfo::GetTriedBucket(const uint256& nKey, const std::vector<bool> &asmap) const
 {
     uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetKey()).GetCheapHash();
@@ -94,6 +115,285 @@ CAddrMan::CAddrMan(bool deterministic, int32_t consistency_check_ratio)
     }
 }
 
+template <typename Stream>
+void CAddrMan::Serialize(Stream& s_) const
+{
+    LOCK(cs);
+
+    /**
+     * Serialized format.
+     * * format version byte (@see `Format`)
+     * * lowest compatible format version byte. This is used to help old software decide
+     *   whether to parse the file. For example:
+     *   * Bitcoin Core version N knows how to parse up to format=3. If a new format=4 is
+     *     introduced in version N+1 that is compatible with format=3 and it is known that
+     *     version N will be able to parse it, then version N+1 will write
+     *     (format=4, lowest_compatible=3) in the first two bytes of the file, and so
+     *     version N will still try to parse it.
+     *   * Bitcoin Core version N+2 introduces a new incompatible format=5. It will write
+     *     (format=5, lowest_compatible=5) and so any versions that do not know how to parse
+     *     format=5 will not try to read the file.
+     * * nKey
+     * * nNew
+     * * nTried
+     * * number of "new" buckets XOR 2**30
+     * * all new addresses (total count: nNew)
+     * * all tried addresses (total count: nTried)
+     * * for each new bucket:
+     *   * number of elements
+     *   * for each element: index in the serialized "all new addresses"
+     * * asmap checksum
+     *
+     * 2**30 is xorred with the number of buckets to make addrman deserializer v0 detect it
+     * as incompatible. This is necessary because it did not check the version number on
+     * deserialization.
+     *
+     * vvNew, vvTried, mapInfo, mapAddr and vRandom are never encoded explicitly;
+     * they are instead reconstructed from the other information.
+     *
+     * This format is more complex, but significantly smaller (at most 1.5 MiB), and supports
+     * changes to the ADDRMAN_ parameters without breaking the on-disk structure.
+     *
+     * We don't use SERIALIZE_METHODS since the serialization and deserialization code has
+     * very little in common.
+     */
+
+    // Always serialize in the latest version (FILE_FORMAT).
+
+    OverrideStream<Stream> s(&s_, s_.GetType(), s_.GetVersion() | ADDRV2_FORMAT);
+
+    s << static_cast<uint8_t>(FILE_FORMAT);
+
+    // Increment `lowest_compatible` iff a newly introduced format is incompatible with
+    // the previous one.
+    static constexpr uint8_t lowest_compatible = Format::V3_BIP155;
+    s << static_cast<uint8_t>(INCOMPATIBILITY_BASE + lowest_compatible);
+
+    s << nKey;
+    s << nNew;
+    s << nTried;
+
+    int nUBuckets = ADDRMAN_NEW_BUCKET_COUNT ^ (1 << 30);
+    s << nUBuckets;
+    std::unordered_map<int, int> mapUnkIds;
+    int nIds = 0;
+    for (const auto& entry : mapInfo) {
+        mapUnkIds[entry.first] = nIds;
+        const CAddrInfo &info = entry.second;
+        if (info.nRefCount) {
+            assert(nIds != nNew); // this means nNew was wrong, oh ow
+            s << info;
+            nIds++;
+        }
+    }
+    nIds = 0;
+    for (const auto& entry : mapInfo) {
+        const CAddrInfo &info = entry.second;
+        if (info.fInTried) {
+            assert(nIds != nTried); // this means nTried was wrong, oh ow
+            s << info;
+            nIds++;
+        }
+    }
+    for (int bucket = 0; bucket < ADDRMAN_NEW_BUCKET_COUNT; bucket++) {
+        int nSize = 0;
+        for (int i = 0; i < ADDRMAN_BUCKET_SIZE; i++) {
+            if (vvNew[bucket][i] != -1)
+                nSize++;
+        }
+        s << nSize;
+        for (int i = 0; i < ADDRMAN_BUCKET_SIZE; i++) {
+            if (vvNew[bucket][i] != -1) {
+                int nIndex = mapUnkIds[vvNew[bucket][i]];
+                s << nIndex;
+            }
+        }
+    }
+    // Store asmap checksum after bucket entries so that it
+    // can be ignored by older clients for backward compatibility.
+    uint256 asmap_checksum;
+    if (m_asmap.size() != 0) {
+        asmap_checksum = SerializeHash(m_asmap);
+    }
+    s << asmap_checksum;
+}
+
+template <typename Stream>
+void CAddrMan::Unserialize(Stream& s_)
+{
+    LOCK(cs);
+
+    assert(vRandom.empty());
+
+    Format format;
+    s_ >> Using<CustomUintFormatter<1>>(format);
+
+    int stream_version = s_.GetVersion();
+    if (format >= Format::V3_BIP155) {
+        // Add ADDRV2_FORMAT to the version so that the CNetAddr and CAddress
+        // unserialize methods know that an address in addrv2 format is coming.
+        stream_version |= ADDRV2_FORMAT;
+    }
+
+    OverrideStream<Stream> s(&s_, s_.GetType(), stream_version);
+
+    uint8_t compat;
+    s >> compat;
+    const uint8_t lowest_compatible = compat - INCOMPATIBILITY_BASE;
+    if (lowest_compatible > FILE_FORMAT) {
+        throw std::ios_base::failure(strprintf(
+                    "Unsupported format of addrman database: %u. It is compatible with formats >=%u, "
+                    "but the maximum supported by this version of %s is %u.",
+                    format, lowest_compatible, PACKAGE_NAME, static_cast<uint8_t>(FILE_FORMAT)));
+    }
+
+    s >> nKey;
+    s >> nNew;
+    s >> nTried;
+    int nUBuckets = 0;
+    s >> nUBuckets;
+    if (format >= Format::V1_DETERMINISTIC) {
+        nUBuckets ^= (1 << 30);
+    }
+
+    if (nNew > ADDRMAN_NEW_BUCKET_COUNT * ADDRMAN_BUCKET_SIZE || nNew < 0) {
+        throw std::ios_base::failure(
+                strprintf("Corrupt CAddrMan serialization: nNew=%d, should be in [0, %d]",
+                    nNew,
+                    ADDRMAN_NEW_BUCKET_COUNT * ADDRMAN_BUCKET_SIZE));
+    }
+
+    if (nTried > ADDRMAN_TRIED_BUCKET_COUNT * ADDRMAN_BUCKET_SIZE || nTried < 0) {
+        throw std::ios_base::failure(
+                strprintf("Corrupt CAddrMan serialization: nTried=%d, should be in [0, %d]",
+                    nTried,
+                    ADDRMAN_TRIED_BUCKET_COUNT * ADDRMAN_BUCKET_SIZE));
+    }
+
+    // Deserialize entries from the new table.
+    for (int n = 0; n < nNew; n++) {
+        CAddrInfo &info = mapInfo[n];
+        s >> info;
+        mapAddr[info] = n;
+        info.nRandomPos = vRandom.size();
+        vRandom.push_back(n);
+    }
+    nIdCount = nNew;
+
+    // Deserialize entries from the tried table.
+    int nLost = 0;
+    for (int n = 0; n < nTried; n++) {
+        CAddrInfo info;
+        s >> info;
+        int nKBucket = info.GetTriedBucket(nKey, m_asmap);
+        int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket);
+        if (info.IsValid()
+                && vvTried[nKBucket][nKBucketPos] == -1) {
+            info.nRandomPos = vRandom.size();
+            info.fInTried = true;
+            vRandom.push_back(nIdCount);
+            mapInfo[nIdCount] = info;
+            mapAddr[info] = nIdCount;
+            vvTried[nKBucket][nKBucketPos] = nIdCount;
+            nIdCount++;
+        } else {
+            nLost++;
+        }
+    }
+    nTried -= nLost;
+
+    // Store positions in the new table buckets to apply later (if possible).
+    // An entry may appear in up to ADDRMAN_NEW_BUCKETS_PER_ADDRESS buckets,
+    // so we store all bucket-entry_index pairs to iterate through later.
+    std::vector<std::pair<int, int>> bucket_entries;
+
+    for (int bucket = 0; bucket < nUBuckets; ++bucket) {
+        int num_entries{0};
+        s >> num_entries;
+        for (int n = 0; n < num_entries; ++n) {
+            int entry_index{0};
+            s >> entry_index;
+            if (entry_index >= 0 && entry_index < nNew) {
+                bucket_entries.emplace_back(bucket, entry_index);
+            }
+        }
+    }
+
+    // If the bucket count and asmap checksum haven't changed, then attempt
+    // to restore the entries to the buckets/positions they were in before
+    // serialization.
+    uint256 supplied_asmap_checksum;
+    if (m_asmap.size() != 0) {
+        supplied_asmap_checksum = SerializeHash(m_asmap);
+    }
+    uint256 serialized_asmap_checksum;
+    if (format >= Format::V2_ASMAP) {
+        s >> serialized_asmap_checksum;
+    }
+    const bool restore_bucketing{nUBuckets == ADDRMAN_NEW_BUCKET_COUNT &&
+        serialized_asmap_checksum == supplied_asmap_checksum};
+
+    if (!restore_bucketing) {
+        LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
+    }
+
+    for (auto bucket_entry : bucket_entries) {
+        int bucket{bucket_entry.first};
+        const int entry_index{bucket_entry.second};
+        CAddrInfo& info = mapInfo[entry_index];
+
+        // Don't store the entry in the new bucket if it's not a valid address for our addrman
+        if (!info.IsValid()) continue;
+
+        // The entry shouldn't appear in more than
+        // ADDRMAN_NEW_BUCKETS_PER_ADDRESS. If it has already, just skip
+        // this bucket_entry.
+        if (info.nRefCount >= ADDRMAN_NEW_BUCKETS_PER_ADDRESS) continue;
+
+        int bucket_position = info.GetBucketPosition(nKey, true, bucket);
+        if (restore_bucketing && vvNew[bucket][bucket_position] == -1) {
+            // Bucketing has not changed, using existing bucket positions for the new table
+            vvNew[bucket][bucket_position] = entry_index;
+            ++info.nRefCount;
+        } else {
+            // In case the new table data cannot be used (bucket count wrong or new asmap),
+            // try to give them a reference based on their primary source address.
+            bucket = info.GetNewBucket(nKey, m_asmap);
+            bucket_position = info.GetBucketPosition(nKey, true, bucket);
+            if (vvNew[bucket][bucket_position] == -1) {
+                vvNew[bucket][bucket_position] = entry_index;
+                ++info.nRefCount;
+            }
+        }
+    }
+
+    // Prune new entries with refcount 0 (as a result of collisions or invalid address).
+    int nLostUnk = 0;
+    for (auto it = mapInfo.cbegin(); it != mapInfo.cend(); ) {
+        if (it->second.fInTried == false && it->second.nRefCount == 0) {
+            const auto itCopy = it++;
+            Delete(itCopy->first);
+            ++nLostUnk;
+        } else {
+            ++it;
+        }
+    }
+    if (nLost + nLostUnk > 0) {
+        LogPrint(BCLog::ADDRMAN, "addrman lost %i new and %i tried addresses due to collisions or invalid addresses\n", nLostUnk, nLost);
+    }
+
+    Check();
+}
+
+// explicit instantiation
+template void CAddrMan::Serialize(CHashWriter& s) const;
+template void CAddrMan::Serialize(CAutoFile& s) const;
+template void CAddrMan::Serialize(CDataStream& s) const;
+template void CAddrMan::Unserialize(CAutoFile& s);
+template void CAddrMan::Unserialize(CHashVerifier<CAutoFile>& s);
+template void CAddrMan::Unserialize(CDataStream& s);
+template void CAddrMan::Unserialize(CHashVerifier<CDataStream>& s);
+
 CAddrInfo* CAddrMan::Find(const CNetAddr& addr, int* pnId)
 {
     AssertLockHeld(cs);