@@ -110,8 +110,11 @@ static constexpr auto GETDATA_TX_INTERVAL{60s};
110110static const unsigned int MAX_GETDATA_SZ = 1000 ;
111111/* * Number of blocks that can be requested at any given time from a single peer. */
112112static const int MAX_BLOCKS_IN_TRANSIT_PER_PEER = 16 ;
113- /* * Time during which a peer must stall block download progress before being disconnected. */
114- static constexpr auto BLOCK_STALLING_TIMEOUT{2s};
113+ /* * Default time during which a peer must stall block download progress before being disconnected.
114+ * the actual timeout is increased temporarily if peers are disconnected for hitting the timeout */
115+ static constexpr auto BLOCK_STALLING_TIMEOUT_DEFAULT{2s};
116+ /* * Maximum timeout for stalling block download. */
117+ static constexpr auto BLOCK_STALLING_TIMEOUT_MAX{64s};
115118/* * Number of headers sent in one getheaders result. We rely on the assumption that if a peer sends
116119 * less than this number, we reached its tip. Changing this value is a protocol upgrade. */
117120static const unsigned int MAX_HEADERS_RESULTS = 2000 ;
@@ -705,6 +708,9 @@ class PeerManagerImpl final : public PeerManager
705708 /* * Number of preferable block download peers. */
706709 int m_num_preferred_download_peers GUARDED_BY (cs_main){0 };
707710
711+ /* * Stalling timeout for blocks in IBD */
712+ std::atomic<std::chrono::seconds> m_block_stalling_timeout{BLOCK_STALLING_TIMEOUT_DEFAULT};
713+
708714 bool AlreadyHaveTx (const GenTxid& gtxid)
709715 EXCLUSIVE_LOCKS_REQUIRED(cs_main, !m_recent_confirmed_transactions_mutex);
710716
@@ -1700,7 +1706,8 @@ void PeerManagerImpl::StartScheduledTasks(CScheduler& scheduler)
17001706/* *
17011707 * Evict orphan txn pool entries based on a newly connected
17021708 * block, remember the recently confirmed transactions, and delete tracked
1703- * announcements for them. Also save the time of the last tip update.
1709+ * announcements for them. Also save the time of the last tip update and
1710+ * possibly reduce dynamic block stalling timeout.
17041711 */
17051712void PeerManagerImpl::BlockConnected (const std::shared_ptr<const CBlock>& pblock, const CBlockIndex* pindex)
17061713{
@@ -1723,6 +1730,16 @@ void PeerManagerImpl::BlockConnected(const std::shared_ptr<const CBlock>& pblock
17231730 m_txrequest.ForgetTxHash (ptx->GetWitnessHash ());
17241731 }
17251732 }
1733+
1734+ // In case the dynamic timeout was doubled once or more, reduce it slowly back to its default value
1735+ auto stalling_timeout = m_block_stalling_timeout.load ();
1736+ Assume (stalling_timeout >= BLOCK_STALLING_TIMEOUT_DEFAULT);
1737+ if (stalling_timeout != BLOCK_STALLING_TIMEOUT_DEFAULT) {
1738+ const auto new_timeout = std::max (std::chrono::duration_cast<std::chrono::seconds>(stalling_timeout * 0.85 ), BLOCK_STALLING_TIMEOUT_DEFAULT);
1739+ if (m_block_stalling_timeout.compare_exchange_strong (stalling_timeout, new_timeout)) {
1740+ LogPrint (BCLog::NET, " Decreased stalling timeout to %d seconds\n " , new_timeout.count ());
1741+ }
1742+ }
17261743}
17271744
17281745void PeerManagerImpl::BlockDisconnected (const std::shared_ptr<const CBlock> &block, const CBlockIndex* pindex)
@@ -5225,12 +5242,19 @@ bool PeerManagerImpl::SendMessages(CNode* pto)
52255242 m_connman.PushMessage (pto, msgMaker.Make (NetMsgType::INV, vInv));
52265243
52275244 // Detect whether we're stalling
5228- if (state.m_stalling_since .count () && state.m_stalling_since < current_time - BLOCK_STALLING_TIMEOUT) {
5245+ auto stalling_timeout = m_block_stalling_timeout.load ();
5246+ if (state.m_stalling_since .count () && state.m_stalling_since < current_time - stalling_timeout) {
52295247 // Stalling only triggers when the block download window cannot move. During normal steady state,
52305248 // the download window should be much larger than the to-be-downloaded set of blocks, so disconnection
52315249 // should only happen during initial block download.
52325250 LogPrintf (" Peer=%d is stalling block download, disconnecting\n " , pto->GetId ());
52335251 pto->fDisconnect = true ;
5252+ // Increase timeout for the next peer so that we don't disconnect multiple peers if our own
5253+ // bandwidth is insufficient.
5254+ const auto new_timeout = std::min (2 * stalling_timeout, BLOCK_STALLING_TIMEOUT_MAX);
5255+ if (stalling_timeout != new_timeout && m_block_stalling_timeout.compare_exchange_strong (stalling_timeout, new_timeout)) {
5256+ LogPrint (BCLog::NET, " Increased stalling timeout temporarily to %d seconds\n " , m_block_stalling_timeout.load ().count ());
5257+ }
52345258 return true ;
52355259 }
52365260 // In case there is a block that has been in flight from this peer for block_interval * (1 + 0.5 * N)
0 commit comments