Skip to content

Commit 2f959a5

Browse files
committed
Merge #11560: Connect to a new outbound peer if our tip is stale
6262915 Add unit test for stale tip checking (Suhas Daftuar) 83df257 Add CConnmanTest to mutate g_connman in tests (João Barbosa) ac7b37c Connect to an extra outbound peer if our tip is stale (Suhas Daftuar) db32a65 Track tip update time and last new block announcement from each peer (Suhas Daftuar) 2d4327d net: Allow connecting to extra outbound peers (Suhas Daftuar) Pull request description: This is an alternative approach to #11534. Rather than disconnect an outbound peer when our tip looks stale, instead try to connect to an additional outbound peer. Periodically, check to see if we have more outbound peers than we target (ie if any extra peers are in use), and if so, disconnect the one that least recently announced a new block (breaking ties by choosing the newest peer that we connected to). Tree-SHA512: 8f19e910e0bb36867f81783e020af225f356451899adfc7ade1895d6d3bd5afe51c83759610dfd10c62090c4fe404efa0283b2f63fde0bd7da898a1aaa7fb281
2 parents 7008b07 + 6262915 commit 2f959a5

File tree

8 files changed

+313
-8
lines changed

8 files changed

+313
-8
lines changed

src/init.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1270,7 +1270,7 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler)
12701270
g_connman = std::unique_ptr<CConnman>(new CConnman(GetRand(std::numeric_limits<uint64_t>::max()), GetRand(std::numeric_limits<uint64_t>::max())));
12711271
CConnman& connman = *g_connman;
12721272

1273-
peerLogic.reset(new PeerLogicValidation(&connman));
1273+
peerLogic.reset(new PeerLogicValidation(&connman, scheduler));
12741274
RegisterValidationInterface(peerLogic.get());
12751275

12761276
// sanitize comments per BIP-0014, format user agent and check total size

src/net.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,37 @@ void CConnman::ProcessOneShot()
16931693
}
16941694
}
16951695

1696+
bool CConnman::GetTryNewOutboundPeer()
1697+
{
1698+
return m_try_another_outbound_peer;
1699+
}
1700+
1701+
void CConnman::SetTryNewOutboundPeer(bool flag)
1702+
{
1703+
m_try_another_outbound_peer = flag;
1704+
LogPrint(BCLog::NET, "net: setting try another outbound peer=%s\n", flag ? "true" : "false");
1705+
}
1706+
1707+
// Return the number of peers we have over our outbound connection limit
1708+
// Exclude peers that are marked for disconnect, or are going to be
1709+
// disconnected soon (eg one-shots and feelers)
1710+
// Also exclude peers that haven't finished initial connection handshake yet
1711+
// (so that we don't decide we're over our desired connection limit, and then
1712+
// evict some peer that has finished the handshake)
1713+
int CConnman::GetExtraOutboundCount()
1714+
{
1715+
int nOutbound = 0;
1716+
{
1717+
LOCK(cs_vNodes);
1718+
for (CNode* pnode : vNodes) {
1719+
if (!pnode->fInbound && !pnode->m_manual_connection && !pnode->fFeeler && !pnode->fDisconnect && !pnode->fOneShot && pnode->fSuccessfullyConnected) {
1720+
++nOutbound;
1721+
}
1722+
}
1723+
}
1724+
return std::max(nOutbound - nMaxOutbound, 0);
1725+
}
1726+
16961727
void CConnman::ThreadOpenConnections(const std::vector<std::string> connect)
16971728
{
16981729
// Connect to specific addresses
@@ -1781,7 +1812,8 @@ void CConnman::ThreadOpenConnections(const std::vector<std::string> connect)
17811812
// * Only make a feeler connection once every few minutes.
17821813
//
17831814
bool fFeeler = false;
1784-
if (nOutbound >= nMaxOutbound) {
1815+
1816+
if (nOutbound >= nMaxOutbound && !GetTryNewOutboundPeer()) {
17851817
int64_t nTime = GetTimeMicros(); // The current time right now (in microseconds).
17861818
if (nTime > nNextFeeler) {
17871819
nNextFeeler = PoissonNextSend(nTime, FEELER_INTERVAL);
@@ -2204,6 +2236,7 @@ CConnman::CConnman(uint64_t nSeed0In, uint64_t nSeed1In) : nSeed0(nSeed0In), nSe
22042236
semOutbound = nullptr;
22052237
semAddnode = nullptr;
22062238
flagInterruptMsgProc = false;
2239+
SetTryNewOutboundPeer(false);
22072240

22082241
Options connOptions;
22092242
Init(connOptions);

src/net.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,19 @@ class CConnman
251251
void GetBanned(banmap_t &banmap);
252252
void SetBanned(const banmap_t &banmap);
253253

254+
// This allows temporarily exceeding nMaxOutbound, with the goal of finding
255+
// a peer that is better than all our current peers.
256+
void SetTryNewOutboundPeer(bool flag);
257+
bool GetTryNewOutboundPeer();
258+
259+
// Return the number of outbound peers we have in excess of our target (eg,
260+
// if we previously called SetTryNewOutboundPeer(true), and have since set
261+
// to false, we may have extra peers that we wish to disconnect). This may
262+
// return a value less than (num_outbound_connections - num_outbound_slots)
263+
// in cases where some outbound connections are not yet fully connected, or
264+
// not yet fully disconnected.
265+
int GetExtraOutboundCount();
266+
254267
bool AddNode(const std::string& node);
255268
bool RemoveAddedNode(const std::string& node);
256269
std::vector<AddedNodeInfo> GetAddedNodeInfo();
@@ -413,6 +426,13 @@ class CConnman
413426
std::thread threadOpenAddedConnections;
414427
std::thread threadOpenConnections;
415428
std::thread threadMessageHandler;
429+
430+
/** flag for deciding to connect to an extra outbound peer,
431+
* in excess of nMaxOutbound
432+
* This takes the place of a feeler connection */
433+
std::atomic_bool m_try_another_outbound_peer;
434+
435+
friend struct CConnmanTest;
416436
};
417437
extern std::unique_ptr<CConnman> g_connman;
418438
void Discover(boost::thread_group& threadGroup);

src/net_processing.cpp

Lines changed: 140 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "primitives/transaction.h"
2424
#include "random.h"
2525
#include "reverse_iterator.h"
26+
#include "scheduler.h"
2627
#include "tinyformat.h"
2728
#include "txmempool.h"
2829
#include "ui_interface.h"
@@ -127,6 +128,9 @@ namespace {
127128
/** Number of outbound peers with m_chain_sync.m_protect. */
128129
int g_outbound_peers_with_protect_from_disconnect = 0;
129130

131+
/** When our tip was last updated. */
132+
int64_t g_last_tip_update = 0;
133+
130134
/** Relay map, protected by cs_main. */
131135
typedef std::map<uint256, CTransactionRef> MapRelay;
132136
MapRelay mapRelay;
@@ -231,6 +235,9 @@ struct CNodeState {
231235

232236
ChainSyncTimeoutState m_chain_sync;
233237

238+
//! Time of last new block announcement
239+
int64_t m_last_block_announcement;
240+
234241
CNodeState(CAddress addrIn, std::string addrNameIn) : address(addrIn), name(addrNameIn) {
235242
fCurrentlyConnected = false;
236243
nMisbehavior = 0;
@@ -254,6 +261,7 @@ struct CNodeState {
254261
fWantsCmpctWitness = false;
255262
fSupportsDesiredCmpctVersion = false;
256263
m_chain_sync = { 0, nullptr, false, false };
264+
m_last_block_announcement = 0;
257265
}
258266
};
259267

@@ -427,6 +435,15 @@ void MaybeSetPeerAsAnnouncingHeaderAndIDs(NodeId nodeid, CConnman* connman) {
427435
}
428436
}
429437

438+
bool TipMayBeStale(const Consensus::Params &consensusParams)
439+
{
440+
AssertLockHeld(cs_main);
441+
if (g_last_tip_update == 0) {
442+
g_last_tip_update = GetTime();
443+
}
444+
return g_last_tip_update < GetTime() - consensusParams.nPowTargetSpacing * 3 && mapBlocksInFlight.empty();
445+
}
446+
430447
// Requires cs_main
431448
bool CanDirectFetch(const Consensus::Params &consensusParams)
432449
{
@@ -533,6 +550,15 @@ void FindNextBlocksToDownload(NodeId nodeid, unsigned int count, std::vector<con
533550

534551
} // namespace
535552

553+
// This function is used for testing the stale tip eviction logic, see
554+
// DoS_tests.cpp
555+
void UpdateLastBlockAnnounceTime(NodeId node, int64_t time_in_seconds)
556+
{
557+
LOCK(cs_main);
558+
CNodeState *state = State(node);
559+
if (state) state->m_last_block_announcement = time_in_seconds;
560+
}
561+
536562
// Returns true for outbound peers, excluding manual connections, feelers, and
537563
// one-shots
538564
bool IsOutboundDisconnectionCandidate(const CNode *node)
@@ -764,9 +790,17 @@ static bool StaleBlockRequestAllowed(const CBlockIndex* pindex, const Consensus:
764790
(GetBlockProofEquivalentTime(*pindexBestHeader, *pindex, *pindexBestHeader, consensusParams) < STALE_RELAY_AGE_LIMIT);
765791
}
766792

767-
PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn) : connman(connmanIn) {
793+
PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn, CScheduler &scheduler) : connman(connmanIn), m_stale_tip_check_time(0) {
768794
// Initialize global variables that cannot be constructed at startup.
769795
recentRejects.reset(new CRollingBloomFilter(120000, 0.000001));
796+
797+
const Consensus::Params& consensusParams = Params().GetConsensus();
798+
// Stale tip checking and peer eviction are on two different timers, but we
799+
// don't want them to get out of sync due to drift in the scheduler, so we
800+
// combine them in one function and schedule at the quicker (peer-eviction)
801+
// timer.
802+
static_assert(EXTRA_PEER_CHECK_INTERVAL < STALE_CHECK_INTERVAL, "peer eviction timer should be less than stale tip check timer");
803+
scheduler.scheduleEvery(std::bind(&PeerLogicValidation::CheckForStaleTipAndEvictPeers, this, consensusParams), EXTRA_PEER_CHECK_INTERVAL * 1000);
770804
}
771805

772806
void PeerLogicValidation::BlockConnected(const std::shared_ptr<const CBlock>& pblock, const CBlockIndex* pindex, const std::vector<CTransactionRef>& vtxConflicted) {
@@ -797,6 +831,8 @@ void PeerLogicValidation::BlockConnected(const std::shared_ptr<const CBlock>& pb
797831
}
798832
LogPrint(BCLog::MEMPOOL, "Erased %d orphan tx included or conflicted by block\n", nErased);
799833
}
834+
835+
g_last_tip_update = GetTime();
800836
}
801837

802838
// All of the following cache a recent block, and are protected by cs_most_recent_block
@@ -1215,6 +1251,7 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve
12151251
return true;
12161252
}
12171253

1254+
bool received_new_header = false;
12181255
const CBlockIndex *pindexLast = nullptr;
12191256
{
12201257
LOCK(cs_main);
@@ -1255,6 +1292,12 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve
12551292
}
12561293
hashLastBlock = header.GetHash();
12571294
}
1295+
1296+
// If we don't have the last header, then they'll have given us
1297+
// something new (if these headers are valid).
1298+
if (mapBlockIndex.find(hashLastBlock) == mapBlockIndex.end()) {
1299+
received_new_header = true;
1300+
}
12581301
}
12591302

12601303
CValidationState state;
@@ -1319,6 +1362,10 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve
13191362
// because it is set in UpdateBlockAvailability. Some nullptr checks
13201363
// are still present, however, as belt-and-suspenders.
13211364

1365+
if (received_new_header && pindexLast->nChainWork > chainActive.Tip()->nChainWork) {
1366+
nodestate->m_last_block_announcement = GetTime();
1367+
}
1368+
13221369
if (nCount == MAX_HEADERS_RESULTS) {
13231370
// Headers message had its maximum size; the peer may have more headers.
13241371
// TODO: optimize: if pindexLast is an ancestor of chainActive.Tip or pindexBestHeader, continue
@@ -1403,6 +1450,7 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve
14031450
// If this is an outbound peer, check to see if we should protect
14041451
// it from the bad/lagging chain logic.
14051452
if (g_outbound_peers_with_protect_from_disconnect < MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT && nodestate->pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork && !nodestate->m_chain_sync.m_protect) {
1453+
LogPrint(BCLog::NET, "Protecting outbound peer=%d from eviction\n", pfrom->GetId());
14061454
nodestate->m_chain_sync.m_protect = true;
14071455
++g_outbound_peers_with_protect_from_disconnect;
14081456
}
@@ -2219,6 +2267,8 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr
22192267
CBlockHeaderAndShortTxIDs cmpctblock;
22202268
vRecv >> cmpctblock;
22212269

2270+
bool received_new_header = false;
2271+
22222272
{
22232273
LOCK(cs_main);
22242274

@@ -2228,6 +2278,10 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr
22282278
connman->PushMessage(pfrom, msgMaker.Make(NetMsgType::GETHEADERS, chainActive.GetLocator(pindexBestHeader), uint256()));
22292279
return true;
22302280
}
2281+
2282+
if (mapBlockIndex.find(cmpctblock.header.GetHash()) == mapBlockIndex.end()) {
2283+
received_new_header = true;
2284+
}
22312285
}
22322286

22332287
const CBlockIndex *pindex = nullptr;
@@ -2266,6 +2320,14 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr
22662320
assert(pindex);
22672321
UpdateBlockAvailability(pfrom->GetId(), pindex->GetBlockHash());
22682322

2323+
CNodeState *nodestate = State(pfrom->GetId());
2324+
2325+
// If this was a new header with more work than our tip, update the
2326+
// peer's last block announcement time
2327+
if (received_new_header && pindex->nChainWork > chainActive.Tip()->nChainWork) {
2328+
nodestate->m_last_block_announcement = GetTime();
2329+
}
2330+
22692331
std::map<uint256, std::pair<NodeId, std::list<QueuedBlock>::iterator> >::iterator blockInFlightIt = mapBlocksInFlight.find(pindex->GetBlockHash());
22702332
bool fAlreadyInFlight = blockInFlightIt != mapBlocksInFlight.end();
22712333

@@ -2288,8 +2350,6 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr
22882350
if (!fAlreadyInFlight && !CanDirectFetch(chainparams.GetConsensus()))
22892351
return true;
22902352

2291-
CNodeState *nodestate = State(pfrom->GetId());
2292-
22932353
if (IsWitnessEnabled(pindex->pprev, chainparams.GetConsensus()) && !nodestate->fSupportsDesiredCmpctVersion) {
22942354
// Don't bother trying to process compact blocks from v1 peers
22952355
// after segwit activates.
@@ -2967,6 +3027,83 @@ void PeerLogicValidation::ConsiderEviction(CNode *pto, int64_t time_in_seconds)
29673027
}
29683028
}
29693029

3030+
void PeerLogicValidation::EvictExtraOutboundPeers(int64_t time_in_seconds)
3031+
{
3032+
// Check whether we have too many outbound peers
3033+
int extra_peers = connman->GetExtraOutboundCount();
3034+
if (extra_peers > 0) {
3035+
// If we have more outbound peers than we target, disconnect one.
3036+
// Pick the outbound peer that least recently announced
3037+
// us a new block, with ties broken by choosing the more recent
3038+
// connection (higher node id)
3039+
NodeId worst_peer = -1;
3040+
int64_t oldest_block_announcement = std::numeric_limits<int64_t>::max();
3041+
3042+
LOCK(cs_main);
3043+
3044+
connman->ForEachNode([&](CNode* pnode) {
3045+
// Ignore non-outbound peers, or nodes marked for disconnect already
3046+
if (!IsOutboundDisconnectionCandidate(pnode) || pnode->fDisconnect) return;
3047+
CNodeState *state = State(pnode->GetId());
3048+
if (state == nullptr) return; // shouldn't be possible, but just in case
3049+
// Don't evict our protected peers
3050+
if (state->m_chain_sync.m_protect) return;
3051+
if (state->m_last_block_announcement < oldest_block_announcement || (state->m_last_block_announcement == oldest_block_announcement && pnode->GetId() > worst_peer)) {
3052+
worst_peer = pnode->GetId();
3053+
oldest_block_announcement = state->m_last_block_announcement;
3054+
}
3055+
});
3056+
if (worst_peer != -1) {
3057+
bool disconnected = connman->ForNode(worst_peer, [&](CNode *pnode) {
3058+
// Only disconnect a peer that has been connected to us for
3059+
// some reasonable fraction of our check-frequency, to give
3060+
// it time for new information to have arrived.
3061+
// Also don't disconnect any peer we're trying to download a
3062+
// block from.
3063+
CNodeState &state = *State(pnode->GetId());
3064+
if (time_in_seconds - pnode->nTimeConnected > MINIMUM_CONNECT_TIME && state.nBlocksInFlight == 0) {
3065+
LogPrint(BCLog::NET, "disconnecting extra outbound peer=%d (last block announcement received at time %d)\n", pnode->GetId(), oldest_block_announcement);
3066+
pnode->fDisconnect = true;
3067+
return true;
3068+
} else {
3069+
LogPrint(BCLog::NET, "keeping outbound peer=%d chosen for eviction (connect time: %d, blocks_in_flight: %d)\n", pnode->GetId(), pnode->nTimeConnected, state.nBlocksInFlight);
3070+
return false;
3071+
}
3072+
});
3073+
if (disconnected) {
3074+
// If we disconnected an extra peer, that means we successfully
3075+
// connected to at least one peer after the last time we
3076+
// detected a stale tip. Don't try any more extra peers until
3077+
// we next detect a stale tip, to limit the load we put on the
3078+
// network from these extra connections.
3079+
connman->SetTryNewOutboundPeer(false);
3080+
}
3081+
}
3082+
}
3083+
}
3084+
3085+
void PeerLogicValidation::CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams)
3086+
{
3087+
if (connman == nullptr) return;
3088+
3089+
int64_t time_in_seconds = GetTime();
3090+
3091+
EvictExtraOutboundPeers(time_in_seconds);
3092+
3093+
if (time_in_seconds > m_stale_tip_check_time) {
3094+
LOCK(cs_main);
3095+
// Check whether our tip is stale, and if so, allow using an extra
3096+
// outbound peer
3097+
if (TipMayBeStale(consensusParams)) {
3098+
LogPrintf("Potential stale tip detected, will try using extra outbound peer (last tip update: %d seconds ago)\n", time_in_seconds - g_last_tip_update);
3099+
connman->SetTryNewOutboundPeer(true);
3100+
} else if (connman->GetTryNewOutboundPeer()) {
3101+
connman->SetTryNewOutboundPeer(false);
3102+
}
3103+
m_stale_tip_check_time = time_in_seconds + STALE_CHECK_INTERVAL;
3104+
}
3105+
}
3106+
29703107
class CompareInvMempoolOrder
29713108
{
29723109
CTxMemPool *mp;

0 commit comments

Comments
 (0)