Skip to content

Commit c0c409d

Browse files
committed
Merge #19697: Improvements on ADDR caching
0d04784 Refactor the functional test (Gleb Naumenko) 83ad65f Address nits in ADDR caching (Gleb Naumenko) 81b00f8 Add indexing ADDR cache by local socket addr (Gleb Naumenko) 42ec558 Justify the choice of ADDR cache lifetime (Gleb Naumenko) Pull request description: This is a follow-up on #18991 which does 3 things: - improves privacy of a node listening to multiple addresses via adding cache index by local socket addr (suggested [here](bitcoin/bitcoin#18991 (comment))) - documents on the choice of 24h cache lifetime - addresses nits from #18991 ACKs for top commit: jnewbery: utACK 0d04784 vasild: ACK 0d04784 jonatack: Code review ACK 0d04784 Tree-SHA512: bb65a34dd1ce2811186d3e4469bc33e8399cebaaa494ce13041c7cff23275870e4176a719f7a72f8d779c49f8b2344bf4fa1aeb3ea4e2626d5ae76514f00a750
2 parents b99a163 + 0d04784 commit c0c409d

File tree

4 files changed

+64
-46
lines changed

4 files changed

+64
-46
lines changed

src/net.cpp

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ const std::string NET_MESSAGE_COMMAND_OTHER = "*other*";
9494

9595
static const uint64_t RANDOMIZER_ID_NETGROUP = 0x6c0edd8036ef4036ULL; // SHA256("netgroup")[0:8]
9696
static const uint64_t RANDOMIZER_ID_LOCALHOSTNONCE = 0xd93e69e2bbfa5735ULL; // SHA256("localhostnonce")[0:8]
97+
static const uint64_t RANDOMIZER_ID_ADDRCACHE = 0x1cf2e4ddd306dda9ULL; // SHA256("addrcache")[0:8]
9798
//
9899
// Global state variables
99100
//
@@ -2560,15 +2561,47 @@ std::vector<CAddress> CConnman::GetAddresses(size_t max_addresses, size_t max_pc
25602561
return addresses;
25612562
}
25622563

2563-
std::vector<CAddress> CConnman::GetAddresses(Network requestor_network, size_t max_addresses, size_t max_pct)
2564+
std::vector<CAddress> CConnman::GetAddresses(CNode& requestor, size_t max_addresses, size_t max_pct)
25642565
{
2566+
SOCKET socket;
2567+
WITH_LOCK(requestor.cs_hSocket, socket = requestor.hSocket);
2568+
auto local_socket_bytes = GetBindAddress(socket).GetAddrBytes();
2569+
uint64_t cache_id = GetDeterministicRandomizer(RANDOMIZER_ID_ADDRCACHE)
2570+
.Write(requestor.addr.GetNetwork())
2571+
.Write(local_socket_bytes.data(), local_socket_bytes.size())
2572+
.Finalize();
25652573
const auto current_time = GetTime<std::chrono::microseconds>();
2566-
if (m_addr_response_caches.find(requestor_network) == m_addr_response_caches.end() ||
2567-
m_addr_response_caches[requestor_network].m_update_addr_response < current_time) {
2568-
m_addr_response_caches[requestor_network].m_addrs_response_cache = GetAddresses(max_addresses, max_pct);
2569-
m_addr_response_caches[requestor_network].m_update_addr_response = current_time + std::chrono::hours(21) + GetRandMillis(std::chrono::hours(6));
2574+
auto r = m_addr_response_caches.emplace(cache_id, CachedAddrResponse{});
2575+
CachedAddrResponse& cache_entry = r.first->second;
2576+
if (cache_entry.m_cache_entry_expiration < current_time) { // If emplace() added new one it has expiration 0.
2577+
cache_entry.m_addrs_response_cache = GetAddresses(max_addresses, max_pct);
2578+
// Choosing a proper cache lifetime is a trade-off between the privacy leak minimization
2579+
// and the usefulness of ADDR responses to honest users.
2580+
//
2581+
// Longer cache lifetime makes it more difficult for an attacker to scrape
2582+
// enough AddrMan data to maliciously infer something useful.
2583+
// By the time an attacker scraped enough AddrMan records, most of
2584+
// the records should be old enough to not leak topology info by
2585+
// e.g. analyzing real-time changes in timestamps.
2586+
//
2587+
// It takes only several hundred requests to scrape everything from an AddrMan containing 100,000 nodes,
2588+
// so ~24 hours of cache lifetime indeed makes the data less inferable by the time
2589+
// most of it could be scraped (considering that timestamps are updated via
2590+
// ADDR self-announcements and when nodes communicate).
2591+
// We also should be robust to those attacks which may not require scraping *full* victim's AddrMan
2592+
// (because even several timestamps of the same handful of nodes may leak privacy).
2593+
//
2594+
// On the other hand, longer cache lifetime makes ADDR responses
2595+
// outdated and less useful for an honest requestor, e.g. if most nodes
2596+
// in the ADDR response are no longer active.
2597+
//
2598+
// However, the churn in the network is known to be rather low. Since we consider
2599+
// nodes to be "terrible" (see IsTerrible()) if the timestamps are older than 30 days,
2600+
// max. 24 hours of "penalty" due to cache shouldn't make any meaningful difference
2601+
// in terms of the freshness of the response.
2602+
cache_entry.m_cache_entry_expiration = current_time + std::chrono::hours(21) + GetRandMillis(std::chrono::hours(6));
25702603
}
2571-
return m_addr_response_caches[requestor_network].m_addrs_response_cache;
2604+
return cache_entry.m_addrs_response_cache;
25722605
}
25732606

25742607
bool CConnman::AddNode(const std::string& strNode)

src/net.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ class CConnman
311311
* A non-malicious call (from RPC or a peer with addr permission) should
312312
* call the function without a parameter to avoid using the cache.
313313
*/
314-
std::vector<CAddress> GetAddresses(Network requestor_network, size_t max_addresses, size_t max_pct);
314+
std::vector<CAddress> GetAddresses(CNode& requestor, size_t max_addresses, size_t max_pct);
315315

316316
// This allows temporarily exceeding m_max_outbound_full_relay, with the goal of finding
317317
// a peer that is better than all our current peers.
@@ -484,20 +484,24 @@ class CConnman
484484
*/
485485
struct CachedAddrResponse {
486486
std::vector<CAddress> m_addrs_response_cache;
487-
std::chrono::microseconds m_update_addr_response{0};
487+
std::chrono::microseconds m_cache_entry_expiration{0};
488488
};
489489

490490
/**
491491
* Addr responses stored in different caches
492-
* per network prevent cross-network node identification.
492+
* per (network, local socket) prevent cross-network node identification.
493493
* If a node for example is multi-homed under Tor and IPv6,
494494
* a single cache (or no cache at all) would let an attacker
495495
* to easily detect that it is the same node by comparing responses.
496-
* The used memory equals to 1000 CAddress records (or around 32 bytes) per
496+
* Indexing by local socket prevents leakage when a node has multiple
497+
* listening addresses on the same network.
498+
*
499+
* The used memory equals to 1000 CAddress records (or around 40 bytes) per
497500
* distinct Network (up to 5) we have/had an inbound peer from,
498-
* resulting in at most ~160 KB.
501+
* resulting in at most ~196 KB. Every separate local socket may
502+
* add up to ~196 KB extra.
499503
*/
500-
std::map<Network, CachedAddrResponse> m_addr_response_caches;
504+
std::map<uint64_t, CachedAddrResponse> m_addr_response_caches;
501505

502506
/**
503507
* Services this instance offers.

src/net_processing.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3545,7 +3545,7 @@ void PeerManager::ProcessMessage(CNode& pfrom, const std::string& msg_type, CDat
35453545
if (pfrom.HasPermission(PF_ADDR)) {
35463546
vAddr = m_connman.GetAddresses(MAX_ADDR_TO_SEND, MAX_PCT_ADDR_TO_SEND);
35473547
} else {
3548-
vAddr = m_connman.GetAddresses(pfrom.addr.GetNetwork(), MAX_ADDR_TO_SEND, MAX_PCT_ADDR_TO_SEND);
3548+
vAddr = m_connman.GetAddresses(pfrom, MAX_ADDR_TO_SEND, MAX_PCT_ADDR_TO_SEND);
35493549
}
35503550
FastRandomContext insecure_rand;
35513551
for (const CAddress &addr : vAddr) {

test/functional/p2p_getaddr_caching.py

Lines changed: 14 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,8 @@
55
"""Test addr response caching"""
66

77
import time
8-
from test_framework.messages import (
9-
CAddress,
10-
NODE_NETWORK,
11-
NODE_WITNESS,
12-
msg_addr,
13-
msg_getaddr,
14-
)
8+
9+
from test_framework.messages import msg_getaddr
1510
from test_framework.p2p import (
1611
P2PInterface,
1712
p2p_lock
@@ -21,21 +16,9 @@
2116
assert_equal,
2217
)
2318

19+
# As defined in net_processing.
2420
MAX_ADDR_TO_SEND = 1000
25-
26-
def gen_addrs(n):
27-
addrs = []
28-
for i in range(n):
29-
addr = CAddress()
30-
addr.time = int(time.time())
31-
addr.nServices = NODE_NETWORK | NODE_WITNESS
32-
# Use first octets to occupy different AddrMan buckets
33-
first_octet = i >> 8
34-
second_octet = i % 256
35-
addr.ip = "{}.{}.1.1".format(first_octet, second_octet)
36-
addr.port = 8333
37-
addrs.append(addr)
38-
return addrs
21+
MAX_PCT_ADDR_TO_SEND = 23
3922

4023
class AddrReceiver(P2PInterface):
4124

@@ -62,18 +45,16 @@ def set_test_params(self):
6245
self.num_nodes = 1
6346

6447
def run_test(self):
65-
self.log.info('Create connection that sends and requests addr messages')
66-
addr_source = self.nodes[0].add_p2p_connection(P2PInterface())
67-
68-
msg_send_addrs = msg_addr()
6948
self.log.info('Fill peer AddrMan with a lot of records')
70-
# Since these addrs are sent from the same source, not all of them will be stored,
71-
# because we allocate a limited number of AddrMan buckets per addr source.
72-
total_addrs = 10000
73-
addrs = gen_addrs(total_addrs)
74-
for i in range(int(total_addrs/MAX_ADDR_TO_SEND)):
75-
msg_send_addrs.addrs = addrs[i * MAX_ADDR_TO_SEND:(i + 1) * MAX_ADDR_TO_SEND]
76-
addr_source.send_and_ping(msg_send_addrs)
49+
for i in range(10000):
50+
first_octet = i >> 8
51+
second_octet = i % 256
52+
a = "{}.{}.1.1".format(first_octet, second_octet)
53+
self.nodes[0].addpeeraddress(a, 8333)
54+
55+
# Need to make sure we hit MAX_ADDR_TO_SEND records in the addr response later because
56+
# only a fraction of all known addresses can be cached and returned.
57+
assert(len(self.nodes[0].getnodeaddresses(0)) > int(MAX_ADDR_TO_SEND / (MAX_PCT_ADDR_TO_SEND / 100)))
7758

7859
responses = []
7960
self.log.info('Send many addr requests within short time to receive same response')
@@ -89,7 +70,7 @@ def run_test(self):
8970
responses.append(addr_receiver.get_received_addrs())
9071
for response in responses[1:]:
9172
assert_equal(response, responses[0])
92-
assert(len(response) < MAX_ADDR_TO_SEND)
73+
assert(len(response) == MAX_ADDR_TO_SEND)
9374

9475
cur_mock_time += 3 * 24 * 60 * 60
9576
self.nodes[0].setmocktime(cur_mock_time)

0 commit comments

Comments
 (0)