Skip to content

Commit ec45646

Browse files
committed
Integrate ASN bucketing in Addrman and add tests
Instead of using /16 netgroups to bucket nodes in Addrman for connection diversification, ASN, which better represents an actor in terms of network-layer infrastructure, is used. For testing, asmap.raw is used. It represents a minimal asmap needed for testing purposes.
1 parent 8feb4e4 commit ec45646

File tree

11 files changed

+473
-78
lines changed

11 files changed

+473
-78
lines changed

src/Makefile.test.include

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ JSON_TEST_FILES = \
5353
test/data/tx_invalid.json \
5454
test/data/tx_valid.json
5555

56-
RAW_TEST_FILES =
56+
RAW_TEST_FILES = \
57+
test/data/asmap.raw
5758

5859
GENERATED_TEST_FILES = $(JSON_TEST_FILES:.json=.json.h) $(RAW_TEST_FILES:.raw=.raw.h)
5960

@@ -429,3 +430,12 @@ endif
429430
echo "};};"; \
430431
} > "[email protected]" && mv -f "[email protected]" "$@"
431432
@echo "Generated $@"
433+
434+
%.raw.h: %.raw
435+
@$(MKDIR_P) $(@D)
436+
@{ \
437+
echo "static unsigned const char $(*F)_raw[] = {" && \
438+
$(HEXDUMP) -v -e '8/1 "0x%02x, "' -e '"\n"' $< | $(SED) -e 's/0x ,//g' && \
439+
echo "};"; \
440+
} > "[email protected]" && mv -f "[email protected]" "$@"
441+
@echo "Generated $@"

src/addrman.cpp

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,17 @@
88
#include <hash.h>
99
#include <serialize.h>
1010

11-
int CAddrInfo::GetTriedBucket(const uint256& nKey) const
11+
int CAddrInfo::GetTriedBucket(const uint256& nKey, const std::vector<bool> &asmap) const
1212
{
1313
uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetKey()).GetCheapHash();
14-
uint64_t hash2 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup() << (hash1 % ADDRMAN_TRIED_BUCKETS_PER_GROUP)).GetCheapHash();
14+
uint64_t hash2 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup(asmap) << (hash1 % ADDRMAN_TRIED_BUCKETS_PER_GROUP)).GetCheapHash();
1515
return hash2 % ADDRMAN_TRIED_BUCKET_COUNT;
1616
}
1717

18-
int CAddrInfo::GetNewBucket(const uint256& nKey, const CNetAddr& src) const
18+
int CAddrInfo::GetNewBucket(const uint256& nKey, const CNetAddr& src, const std::vector<bool> &asmap) const
1919
{
20-
std::vector<unsigned char> vchSourceGroupKey = src.GetGroup();
21-
uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup() << vchSourceGroupKey).GetCheapHash();
20+
std::vector<unsigned char> vchSourceGroupKey = src.GetGroup(asmap);
21+
uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup(asmap) << vchSourceGroupKey).GetCheapHash();
2222
uint64_t hash2 = (CHashWriter(SER_GETHASH, 0) << nKey << vchSourceGroupKey << (hash1 % ADDRMAN_NEW_BUCKETS_PER_SOURCE_GROUP)).GetCheapHash();
2323
return hash2 % ADDRMAN_NEW_BUCKET_COUNT;
2424
}
@@ -153,7 +153,7 @@ void CAddrMan::MakeTried(CAddrInfo& info, int nId)
153153
assert(info.nRefCount == 0);
154154

155155
// which tried bucket to move the entry to
156-
int nKBucket = info.GetTriedBucket(nKey);
156+
int nKBucket = info.GetTriedBucket(nKey, m_asmap);
157157
int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket);
158158

159159
// first make space to add it (the existing tried entry there is moved to new, deleting whatever is there).
@@ -169,7 +169,7 @@ void CAddrMan::MakeTried(CAddrInfo& info, int nId)
169169
nTried--;
170170

171171
// find which new bucket it belongs to
172-
int nUBucket = infoOld.GetNewBucket(nKey);
172+
int nUBucket = infoOld.GetNewBucket(nKey, m_asmap);
173173
int nUBucketPos = infoOld.GetBucketPosition(nKey, true, nUBucket);
174174
ClearNew(nUBucket, nUBucketPos);
175175
assert(vvNew[nUBucket][nUBucketPos] == -1);
@@ -233,7 +233,7 @@ void CAddrMan::Good_(const CService& addr, bool test_before_evict, int64_t nTime
233233
return;
234234

235235
// which tried bucket to move the entry to
236-
int tried_bucket = info.GetTriedBucket(nKey);
236+
int tried_bucket = info.GetTriedBucket(nKey, m_asmap);
237237
int tried_bucket_pos = info.GetBucketPosition(nKey, false, tried_bucket);
238238

239239
// Will moving this address into tried evict another entry?
@@ -301,7 +301,7 @@ bool CAddrMan::Add_(const CAddress& addr, const CNetAddr& source, int64_t nTimeP
301301
fNew = true;
302302
}
303303

304-
int nUBucket = pinfo->GetNewBucket(nKey, source);
304+
int nUBucket = pinfo->GetNewBucket(nKey, source, m_asmap);
305305
int nUBucketPos = pinfo->GetBucketPosition(nKey, true, nUBucket);
306306
if (vvNew[nUBucket][nUBucketPos] != nId) {
307307
bool fInsert = vvNew[nUBucket][nUBucketPos] == -1;
@@ -439,7 +439,7 @@ int CAddrMan::Check_()
439439
if (vvTried[n][i] != -1) {
440440
if (!setTried.count(vvTried[n][i]))
441441
return -11;
442-
if (mapInfo[vvTried[n][i]].GetTriedBucket(nKey) != n)
442+
if (mapInfo[vvTried[n][i]].GetTriedBucket(nKey, m_asmap) != n)
443443
return -17;
444444
if (mapInfo[vvTried[n][i]].GetBucketPosition(nKey, false, n) != i)
445445
return -18;
@@ -545,7 +545,7 @@ void CAddrMan::ResolveCollisions_()
545545
CAddrInfo& info_new = mapInfo[id_new];
546546

547547
// Which tried bucket to move the entry to.
548-
int tried_bucket = info_new.GetTriedBucket(nKey);
548+
int tried_bucket = info_new.GetTriedBucket(nKey, m_asmap);
549549
int tried_bucket_pos = info_new.GetBucketPosition(nKey, false, tried_bucket);
550550
if (!info_new.IsValid()) { // id_new may no longer map to a valid address
551551
erase_collision = true;
@@ -609,10 +609,33 @@ CAddrInfo CAddrMan::SelectTriedCollision_()
609609
CAddrInfo& newInfo = mapInfo[id_new];
610610

611611
// which tried bucket to move the entry to
612-
int tried_bucket = newInfo.GetTriedBucket(nKey);
612+
int tried_bucket = newInfo.GetTriedBucket(nKey, m_asmap);
613613
int tried_bucket_pos = newInfo.GetBucketPosition(nKey, false, tried_bucket);
614614

615615
int id_old = vvTried[tried_bucket][tried_bucket_pos];
616616

617617
return mapInfo[id_old];
618618
}
619+
620+
std::vector<bool> CAddrMan::DecodeAsmap(fs::path path)
621+
{
622+
std::vector<bool> bits;
623+
FILE *filestr = fsbridge::fopen(path, "rb");
624+
CAutoFile file(filestr, SER_DISK, CLIENT_VERSION);
625+
if (file.IsNull()) {
626+
LogPrintf("Failed to open asmap file from disk.\n");
627+
return bits;
628+
}
629+
fseek(filestr, 0, SEEK_END);
630+
int length = ftell(filestr);
631+
LogPrintf("Opened asmap file %s (%d bytes) from disk.\n", path, length);
632+
fseek(filestr, 0, SEEK_SET);
633+
char cur_byte;
634+
for (int i = 0; i < length; ++i) {
635+
file >> cur_byte;
636+
for (int bit = 0; bit < 8; ++bit) {
637+
bits.push_back((cur_byte >> bit) & 1);
638+
}
639+
}
640+
return bits;
641+
}

src/addrman.h

Lines changed: 76 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,17 @@
1212
#include <sync.h>
1313
#include <timedata.h>
1414
#include <util/system.h>
15+
#include <clientversion.h>
1516

1617
#include <map>
1718
#include <set>
1819
#include <stdint.h>
1920
#include <vector>
21+
#include <iostream>
22+
#include <streams.h>
23+
#include <fs.h>
24+
#include <hash.h>
25+
2026

2127
/**
2228
* Extended statistics about a CAddress
@@ -72,15 +78,15 @@ class CAddrInfo : public CAddress
7278
}
7379

7480
//! Calculate in which "tried" bucket this entry belongs
75-
int GetTriedBucket(const uint256 &nKey) const;
81+
int GetTriedBucket(const uint256 &nKey, const std::vector<bool> &asmap) const;
7682

7783
//! Calculate in which "new" bucket this entry belongs, given a certain source
78-
int GetNewBucket(const uint256 &nKey, const CNetAddr& src) const;
84+
int GetNewBucket(const uint256 &nKey, const CNetAddr& src, const std::vector<bool> &asmap) const;
7985

8086
//! Calculate in which "new" bucket this entry belongs, using its default source
81-
int GetNewBucket(const uint256 &nKey) const
87+
int GetNewBucket(const uint256 &nKey, const std::vector<bool> &asmap) const
8288
{
83-
return GetNewBucket(nKey, source);
89+
return GetNewBucket(nKey, source, asmap);
8490
}
8591

8692
//! Calculate in which position of a bucket to store this entry.
@@ -174,6 +180,7 @@ static const int64_t ADDRMAN_TEST_WINDOW = 40*60; // 40 minutes
174180
*/
175181
class CAddrMan
176182
{
183+
friend class CAddrManTest;
177184
protected:
178185
//! critical section to protect the inner data structures
179186
mutable CCriticalSection cs;
@@ -268,9 +275,29 @@ class CAddrMan
268275
void SetServices_(const CService &addr, ServiceFlags nServices) EXCLUSIVE_LOCKS_REQUIRED(cs);
269276

270277
public:
278+
// Compressed IP->ASN mapping, loaded from a file when a node starts.
279+
// Should be always empty if no file was provided.
280+
// This mapping is then used for bucketing nodes in Addrman.
281+
//
282+
// If asmap is provided, nodes will be bucketed by
283+
// AS they belong to, in order to make impossible for a node
284+
// to connect to several nodes hosted in a single AS.
285+
// This is done in response to Erebus attack, but also to generally
286+
// diversify the connections every node creates,
287+
// especially useful when a large fraction of nodes
288+
// operate under a couple of cloud providers.
289+
//
290+
// If a new asmap was provided, the existing records
291+
// would be re-bucketed accordingly.
292+
std::vector<bool> m_asmap;
293+
294+
// Read asmap from provided binary file
295+
static std::vector<bool> DecodeAsmap(fs::path path);
296+
297+
271298
/**
272299
* serialized format:
273-
* * version byte (currently 1)
300+
* * version byte (1 for pre-asmap files, 2 for files including asmap version)
274301
* * 0x20 + nKey (serialized as if it were a vector, for backward compatibility)
275302
* * nNew
276303
* * nTried
@@ -302,7 +329,7 @@ class CAddrMan
302329
{
303330
LOCK(cs);
304331

305-
unsigned char nVersion = 1;
332+
unsigned char nVersion = 2;
306333
s << nVersion;
307334
s << ((unsigned char)32);
308335
s << nKey;
@@ -345,6 +372,13 @@ class CAddrMan
345372
}
346373
}
347374
}
375+
// Store asmap version after bucket entries so that it
376+
// can be ignored by older clients for backward compatibility.
377+
uint256 asmap_version;
378+
if (m_asmap.size() != 0) {
379+
asmap_version = SerializeHash(m_asmap);
380+
}
381+
s << asmap_version;
348382
}
349383

350384
template<typename Stream>
@@ -353,7 +387,6 @@ class CAddrMan
353387
LOCK(cs);
354388

355389
Clear();
356-
357390
unsigned char nVersion;
358391
s >> nVersion;
359392
unsigned char nKeySize;
@@ -383,16 +416,6 @@ class CAddrMan
383416
mapAddr[info] = n;
384417
info.nRandomPos = vRandom.size();
385418
vRandom.push_back(n);
386-
if (nVersion != 1 || nUBuckets != ADDRMAN_NEW_BUCKET_COUNT) {
387-
// In case the new table data cannot be used (nVersion unknown, or bucket count wrong),
388-
// immediately try to give them a reference based on their primary source address.
389-
int nUBucket = info.GetNewBucket(nKey);
390-
int nUBucketPos = info.GetBucketPosition(nKey, true, nUBucket);
391-
if (vvNew[nUBucket][nUBucketPos] == -1) {
392-
vvNew[nUBucket][nUBucketPos] = n;
393-
info.nRefCount++;
394-
}
395-
}
396419
}
397420
nIdCount = nNew;
398421

@@ -401,7 +424,7 @@ class CAddrMan
401424
for (int n = 0; n < nTried; n++) {
402425
CAddrInfo info;
403426
s >> info;
404-
int nKBucket = info.GetTriedBucket(nKey);
427+
int nKBucket = info.GetTriedBucket(nKey, m_asmap);
405428
int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket);
406429
if (vvTried[nKBucket][nKBucketPos] == -1) {
407430
info.nRandomPos = vRandom.size();
@@ -417,20 +440,48 @@ class CAddrMan
417440
}
418441
nTried -= nLost;
419442

420-
// Deserialize positions in the new table (if possible).
443+
// Store positions in the new table buckets to apply later (if possible).
444+
std::map<int, int> entryToBucket; // Represents which entry belonged to which bucket when serializing
445+
421446
for (int bucket = 0; bucket < nUBuckets; bucket++) {
422447
int nSize = 0;
423448
s >> nSize;
424449
for (int n = 0; n < nSize; n++) {
425450
int nIndex = 0;
426451
s >> nIndex;
427452
if (nIndex >= 0 && nIndex < nNew) {
428-
CAddrInfo &info = mapInfo[nIndex];
429-
int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
430-
if (nVersion == 1 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS) {
431-
info.nRefCount++;
432-
vvNew[bucket][nUBucketPos] = nIndex;
433-
}
453+
entryToBucket[nIndex] = bucket;
454+
}
455+
}
456+
}
457+
458+
uint256 supplied_asmap_version;
459+
if (m_asmap.size() != 0) {
460+
supplied_asmap_version = SerializeHash(m_asmap);
461+
}
462+
uint256 serialized_asmap_version;
463+
if (nVersion > 1) {
464+
s >> serialized_asmap_version;
465+
}
466+
467+
for (int n = 0; n < nNew; n++) {
468+
CAddrInfo &info = mapInfo[n];
469+
int bucket = entryToBucket[n];
470+
int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
471+
if (nVersion == 2 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 &&
472+
info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) {
473+
// Bucketing has not changed, using existing bucket positions for the new table
474+
vvNew[bucket][nUBucketPos] = n;
475+
info.nRefCount++;
476+
} else {
477+
// In case the new table data cannot be used (nVersion unknown, bucket count wrong or new asmap),
478+
// try to give them a reference based on their primary source address.
479+
LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
480+
bucket = info.GetNewBucket(nKey, m_asmap);
481+
nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
482+
if (vvNew[bucket][nUBucketPos] == -1) {
483+
vvNew[bucket][nUBucketPos] = n;
484+
info.nRefCount++;
434485
}
435486
}
436487
}

src/init.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,11 @@
5252
#include <util/threadnames.h>
5353
#include <util/translation.h>
5454
#include <util/validation.h>
55+
#include <util/asmap.h>
5556
#include <validation.h>
57+
#include <hash.h>
58+
59+
5660
#include <validationinterface.h>
5761
#include <walletinitinterface.h>
5862

@@ -97,6 +101,8 @@ static constexpr int DUMP_BANS_INTERVAL = 60 * 15;
97101

98102
static const char* FEE_ESTIMATES_FILENAME="fee_estimates.dat";
99103

104+
static const char* DEFAULT_ASMAP_FILENAME="ip_asn.map";
105+
100106
/**
101107
* The PID file facilities.
102108
*/
@@ -426,6 +432,7 @@ void SetupServerArgs()
426432
gArgs.AddArg("-peertimeout=<n>", strprintf("Specify p2p connection timeout in seconds. This option determines the amount of time a peer may be inactive before the connection to it is dropped. (minimum: 1, default: %d)", DEFAULT_PEER_CONNECT_TIMEOUT), ArgsManager::ALLOW_ANY | ArgsManager::DEBUG_ONLY, OptionsCategory::CONNECTION);
427433
gArgs.AddArg("-torcontrol=<ip>:<port>", strprintf("Tor control port to use if onion listening enabled (default: %s)", DEFAULT_TOR_CONTROL), ArgsManager::ALLOW_ANY, OptionsCategory::CONNECTION);
428434
gArgs.AddArg("-torpassword=<pass>", "Tor control port password (default: empty)", ArgsManager::ALLOW_ANY, OptionsCategory::CONNECTION);
435+
gArgs.AddArg("-asmap=<file>", "Specify asn mapping used for bucketing of the peers. Path should be relative to the -datadir path.", ArgsManager::ALLOW_ANY, OptionsCategory::CONNECTION);
429436
#ifdef USE_UPNP
430437
#if USE_UPNP
431438
gArgs.AddArg("-upnp", "Use UPnP to map the listening port (default: 1 when listening and no -proxy)", ArgsManager::ALLOW_ANY, OptionsCategory::CONNECTION);
@@ -1807,6 +1814,25 @@ bool AppInitMain(NodeContext& node)
18071814
return false;
18081815
}
18091816

1817+
// Read asmap file if configured
1818+
if (gArgs.IsArgSet("-asmap")) {
1819+
std::string asmap_file = gArgs.GetArg("-asmap", "");
1820+
if (asmap_file.empty()) {
1821+
asmap_file = DEFAULT_ASMAP_FILENAME;
1822+
}
1823+
const fs::path asmap_path = GetDataDir() / asmap_file;
1824+
std::vector<bool> asmap = CAddrMan::DecodeAsmap(asmap_path);
1825+
if (asmap.size() == 0) {
1826+
InitError(strprintf(_("Could not find or parse specified asmap: '%s'").translated, asmap_path));
1827+
return false;
1828+
}
1829+
node.connman->SetAsmap(asmap);
1830+
const uint256 asmap_version = SerializeHash(asmap);
1831+
LogPrintf("Using asmap version %s for IP bucketing.\n", asmap_version.ToString());
1832+
} else {
1833+
LogPrintf("Using /16 prefix for IP bucketing.\n");
1834+
}
1835+
18101836
// ********************************************************* Step 13: finished
18111837

18121838
SetRPCWarmupFinished();

0 commit comments

Comments
 (0)