@@ -54,14 +54,15 @@ var (
5454 blockTargetRTT = 3 * time .Second / 2 // [eth/61] Target time for completing a block retrieval request
5555 blockTTL = 3 * blockTargetRTT // [eth/61] Maximum time allowance before a block request is considered expired
5656
57- headerTargetRTT = time .Second // [eth/62] Target time for completing a header retrieval request (only for measurements for now)
58- headerTTL = 3 * time .Second // [eth/62] Time it takes for a header request to time out
59- bodyTargetRTT = 3 * time .Second / 2 // [eth/62] Target time for completing a block body retrieval request
60- bodyTTL = 3 * bodyTargetRTT // [eth/62] Maximum time allowance before a block body request is considered expired
61- receiptTargetRTT = 3 * time .Second / 2 // [eth/63] Target time for completing a receipt retrieval request
62- receiptTTL = 3 * receiptTargetRTT // [eth/63] Maximum time allowance before a receipt request is considered expired
63- stateTargetRTT = 2 * time .Second / 2 // [eth/63] Target time for completing a state trie retrieval request
64- stateTTL = 3 * stateTargetRTT // [eth/63] Maximum time allowance before a node data request is considered expired
57+ rttMinEstimate = 2 * time .Second // Minimum round-trip time to target for download requests
58+ rttMaxEstimate = 20 * time .Second // Maximum rount-trip time to target for download requests
59+ rttMinConfidence = 0.1 // Worse confidence factor in our estimated RTT value
60+ ttlScaling = 3 // Constant scaling factor for RTT -> TTL conversion
61+ ttlLimit = time .Minute // Maximum TTL allowance to prevent reaching crazy timeouts
62+
63+ qosTuningPeers = 5 // Number of peers to tune based on (best peers)
64+ qosConfidenceCap = 10 // Number of peers above which not to modify RTT confidence
65+ qosTuningImpact = 0.25 // Impact that a new tuning target has on the previous value
6566
6667 maxQueuedHashes = 32 * 1024 // [eth/61] Maximum number of hashes to queue for import (DOS protection)
6768 maxQueuedHeaders = 32 * 1024 // [eth/62] Maximum number of headers to queue for import (DOS protection)
@@ -113,7 +114,8 @@ type Downloader struct {
113114 fsPivotLock * types.Header // Pivot header on critical section entry (cannot change between retries)
114115 fsPivotFails int // Number of fast sync failures in the critical section
115116
116- interrupt int32 // Atomic boolean to signal termination
117+ rttEstimate uint64 // Round trip time to target for download requests
118+ rttConfidence uint64 // Confidence in the estimated RTT (unit: millionths to allow atomic ops)
117119
118120 // Statistics
119121 syncStatsChainOrigin uint64 // Origin block number where syncing started at
@@ -159,6 +161,9 @@ type Downloader struct {
159161 cancelCh chan struct {} // Channel to cancel mid-flight syncs
160162 cancelLock sync.RWMutex // Lock to protect the cancel channel in delivers
161163
164+ quitCh chan struct {} // Quit channel to signal termination
165+ quitLock sync.RWMutex // Lock to prevent double closes
166+
162167 // Testing hooks
163168 syncInitHook func (uint64 , uint64 ) // Method to call upon initiating a new sync run
164169 bodyFetchHook func ([]* types.Header ) // Method to call upon starting a block body fetch
@@ -172,11 +177,13 @@ func New(stateDb ethdb.Database, mux *event.TypeMux, hasHeader headerCheckFn, ha
172177 headFastBlock headFastBlockRetrievalFn , commitHeadBlock headBlockCommitterFn , getTd tdRetrievalFn , insertHeaders headerChainInsertFn ,
173178 insertBlocks blockChainInsertFn , insertReceipts receiptChainInsertFn , rollback chainRollbackFn , dropPeer peerDropFn ) * Downloader {
174179
175- return & Downloader {
180+ dl := & Downloader {
176181 mode : FullSync ,
177182 mux : mux ,
178183 queue : newQueue (stateDb ),
179184 peers : newPeerSet (),
185+ rttEstimate : uint64 (rttMaxEstimate ),
186+ rttConfidence : uint64 (1000000 ),
180187 hasHeader : hasHeader ,
181188 hasBlockAndState : hasBlockAndState ,
182189 getHeader : getHeader ,
@@ -203,7 +210,10 @@ func New(stateDb ethdb.Database, mux *event.TypeMux, hasHeader headerCheckFn, ha
203210 receiptWakeCh : make (chan bool , 1 ),
204211 stateWakeCh : make (chan bool , 1 ),
205212 headerProcCh : make (chan []* types.Header , 1 ),
213+ quitCh : make (chan struct {}),
206214 }
215+ go dl .qosTuner ()
216+ return dl
207217}
208218
209219// Progress retrieves the synchronisation boundaries, specifically the origin
@@ -250,6 +260,8 @@ func (d *Downloader) RegisterPeer(id string, version int, head common.Hash,
250260 glog .V (logger .Error ).Infoln ("Register failed:" , err )
251261 return err
252262 }
263+ d .qosReduceConfidence ()
264+
253265 return nil
254266}
255267
@@ -515,7 +527,16 @@ func (d *Downloader) cancel() {
515527// Terminate interrupts the downloader, canceling all pending operations.
516528// The downloader cannot be reused after calling Terminate.
517529func (d * Downloader ) Terminate () {
518- atomic .StoreInt32 (& d .interrupt , 1 )
530+ // Close the termination channel (make sure double close is allowed)
531+ d .quitLock .Lock ()
532+ select {
533+ case <- d .quitCh :
534+ default :
535+ close (d .quitCh )
536+ }
537+ d .quitLock .Unlock ()
538+
539+ // Cancel any pending download requests
519540 d .cancel ()
520541}
521542
@@ -932,7 +953,7 @@ func (d *Downloader) fetchBlocks61(from uint64) error {
932953 // Reserve a chunk of hashes for a peer. A nil can mean either that
933954 // no more hashes are available, or that the peer is known not to
934955 // have them.
935- request := d .queue .ReserveBlocks (peer , peer .BlockCapacity ())
956+ request := d .queue .ReserveBlocks (peer , peer .BlockCapacity (blockTargetRTT ))
936957 if request == nil {
937958 continue
938959 }
@@ -973,7 +994,7 @@ func (d *Downloader) fetchHeight(p *peer) (*types.Header, error) {
973994 // Request the advertised remote head block and wait for the response
974995 go p .getRelHeaders (p .head , 1 , 0 , false )
975996
976- timeout := time .After (headerTTL )
997+ timeout := time .After (d . requestTTL () )
977998 for {
978999 select {
9791000 case <- d .cancelCh :
@@ -1041,7 +1062,7 @@ func (d *Downloader) findAncestor(p *peer, height uint64) (uint64, error) {
10411062
10421063 // Wait for the remote response to the head fetch
10431064 number , hash := uint64 (0 ), common.Hash {}
1044- timeout := time .After (hashTTL )
1065+ timeout := time .After (d . requestTTL () )
10451066
10461067 for finished := false ; ! finished ; {
10471068 select {
@@ -1118,7 +1139,7 @@ func (d *Downloader) findAncestor(p *peer, height uint64) (uint64, error) {
11181139 // Split our chain interval in two, and request the hash to cross check
11191140 check := (start + end ) / 2
11201141
1121- timeout := time .After (hashTTL )
1142+ timeout := time .After (d . requestTTL () )
11221143 go p .getAbsHeaders (uint64 (check ), 1 , 0 , false )
11231144
11241145 // Wait until a reply arrives to this request
@@ -1199,7 +1220,7 @@ func (d *Downloader) fetchHeaders(p *peer, from uint64) error {
11991220
12001221 getHeaders := func (from uint64 ) {
12011222 request = time .Now ()
1202- timeout .Reset (headerTTL )
1223+ timeout .Reset (d . requestTTL () )
12031224
12041225 if skeleton {
12051226 glog .V (logger .Detail ).Infof ("%v: fetching %d skeleton headers from #%d" , p , MaxHeaderFetch , from )
@@ -1311,13 +1332,13 @@ func (d *Downloader) fillHeaderSkeleton(from uint64, skeleton []*types.Header) (
13111332 pack := packet .(* headerPack )
13121333 return d .queue .DeliverHeaders (pack .peerId , pack .headers , d .headerProcCh )
13131334 }
1314- expire = func () map [string ]int { return d .queue .ExpireHeaders (headerTTL ) }
1335+ expire = func () map [string ]int { return d .queue .ExpireHeaders (d . requestTTL () ) }
13151336 throttle = func () bool { return false }
13161337 reserve = func (p * peer , count int ) (* fetchRequest , bool , error ) {
13171338 return d .queue .ReserveHeaders (p , count ), false , nil
13181339 }
13191340 fetch = func (p * peer , req * fetchRequest ) error { return p .FetchHeaders (req .From , MaxHeaderFetch ) }
1320- capacity = func (p * peer ) int { return p .HeaderCapacity () }
1341+ capacity = func (p * peer ) int { return p .HeaderCapacity (d . requestRTT () ) }
13211342 setIdle = func (p * peer , accepted int ) { p .SetHeadersIdle (accepted ) }
13221343 )
13231344 err := d .fetchParts (errCancelHeaderFetch , d .headerCh , deliver , d .queue .headerContCh , expire ,
@@ -1341,9 +1362,9 @@ func (d *Downloader) fetchBodies(from uint64) error {
13411362 pack := packet .(* bodyPack )
13421363 return d .queue .DeliverBodies (pack .peerId , pack .transactions , pack .uncles )
13431364 }
1344- expire = func () map [string ]int { return d .queue .ExpireBodies (bodyTTL ) }
1365+ expire = func () map [string ]int { return d .queue .ExpireBodies (d . requestTTL () ) }
13451366 fetch = func (p * peer , req * fetchRequest ) error { return p .FetchBodies (req ) }
1346- capacity = func (p * peer ) int { return p .BlockCapacity () }
1367+ capacity = func (p * peer ) int { return p .BlockCapacity (d . requestRTT () ) }
13471368 setIdle = func (p * peer , accepted int ) { p .SetBodiesIdle (accepted ) }
13481369 )
13491370 err := d .fetchParts (errCancelBodyFetch , d .bodyCh , deliver , d .bodyWakeCh , expire ,
@@ -1365,9 +1386,9 @@ func (d *Downloader) fetchReceipts(from uint64) error {
13651386 pack := packet .(* receiptPack )
13661387 return d .queue .DeliverReceipts (pack .peerId , pack .receipts )
13671388 }
1368- expire = func () map [string ]int { return d .queue .ExpireReceipts (receiptTTL ) }
1389+ expire = func () map [string ]int { return d .queue .ExpireReceipts (d . requestTTL () ) }
13691390 fetch = func (p * peer , req * fetchRequest ) error { return p .FetchReceipts (req ) }
1370- capacity = func (p * peer ) int { return p .ReceiptCapacity () }
1391+ capacity = func (p * peer ) int { return p .ReceiptCapacity (d . requestRTT () ) }
13711392 setIdle = func (p * peer , accepted int ) { p .SetReceiptsIdle (accepted ) }
13721393 )
13731394 err := d .fetchParts (errCancelReceiptFetch , d .receiptCh , deliver , d .receiptWakeCh , expire ,
@@ -1417,13 +1438,13 @@ func (d *Downloader) fetchNodeData() error {
14171438 }
14181439 })
14191440 }
1420- expire = func () map [string ]int { return d .queue .ExpireNodeData (stateTTL ) }
1441+ expire = func () map [string ]int { return d .queue .ExpireNodeData (d . requestTTL () ) }
14211442 throttle = func () bool { return false }
14221443 reserve = func (p * peer , count int ) (* fetchRequest , bool , error ) {
14231444 return d .queue .ReserveNodeData (p , count ), false , nil
14241445 }
14251446 fetch = func (p * peer , req * fetchRequest ) error { return p .FetchNodeData (req ) }
1426- capacity = func (p * peer ) int { return p .NodeDataCapacity () }
1447+ capacity = func (p * peer ) int { return p .NodeDataCapacity (d . requestRTT () ) }
14271448 setIdle = func (p * peer , accepted int ) { p .SetNodeDataIdle (accepted ) }
14281449 )
14291450 err := d .fetchParts (errCancelStateFetch , d .stateCh , deliver , d .stateWakeCh , expire ,
@@ -1799,8 +1820,10 @@ func (d *Downloader) processContent() error {
17991820 }
18001821 for len (results ) != 0 {
18011822 // Check for any termination requests
1802- if atomic .LoadInt32 (& d .interrupt ) == 1 {
1823+ select {
1824+ case <- d .quitCh :
18031825 return errCancelContentProcessing
1826+ default :
18041827 }
18051828 // Retrieve the a batch of results to import
18061829 var (
@@ -1901,3 +1924,74 @@ func (d *Downloader) deliver(id string, destCh chan dataPack, packet dataPack, i
19011924 return errNoSyncActive
19021925 }
19031926}
1927+
1928+ // qosTuner is the quality of service tuning loop that occasionally gathers the
1929+ // peer latency statistics and updates the estimated request round trip time.
1930+ func (d * Downloader ) qosTuner () {
1931+ for {
1932+ // Retrieve the current median RTT and integrate into the previoust target RTT
1933+ rtt := time .Duration (float64 (1 - qosTuningImpact )* float64 (atomic .LoadUint64 (& d .rttEstimate )) + qosTuningImpact * float64 (d .peers .medianRTT ()))
1934+ atomic .StoreUint64 (& d .rttEstimate , uint64 (rtt ))
1935+
1936+ // A new RTT cycle passed, increase our confidence in the estimated RTT
1937+ conf := atomic .LoadUint64 (& d .rttConfidence )
1938+ conf = conf + (1000000 - conf )/ 2
1939+ atomic .StoreUint64 (& d .rttConfidence , conf )
1940+
1941+ // Log the new QoS values and sleep until the next RTT
1942+ glog .V (logger .Debug ).Infof ("Quality of service: rtt %v, conf %.3f, ttl %v" , rtt , float64 (conf )/ 1000000.0 , d .requestTTL ())
1943+ select {
1944+ case <- d .quitCh :
1945+ return
1946+ case <- time .After (rtt ):
1947+ }
1948+ }
1949+ }
1950+
1951+ // qosReduceConfidence is meant to be called when a new peer joins the downloader's
1952+ // peer set, needing to reduce the confidence we have in out QoS estimates.
1953+ func (d * Downloader ) qosReduceConfidence () {
1954+ // If we have a single peer, confidence is always 1
1955+ peers := uint64 (d .peers .Len ())
1956+ if peers == 1 {
1957+ atomic .StoreUint64 (& d .rttConfidence , 1000000 )
1958+ return
1959+ }
1960+ // If we have a ton of peers, don't drop confidence)
1961+ if peers >= uint64 (qosConfidenceCap ) {
1962+ return
1963+ }
1964+ // Otherwise drop the confidence factor
1965+ conf := atomic .LoadUint64 (& d .rttConfidence ) * (peers - 1 ) / peers
1966+ if float64 (conf )/ 1000000 < rttMinConfidence {
1967+ conf = uint64 (rttMinConfidence * 1000000 )
1968+ }
1969+ atomic .StoreUint64 (& d .rttConfidence , conf )
1970+
1971+ rtt := time .Duration (atomic .LoadUint64 (& d .rttEstimate ))
1972+ glog .V (logger .Debug ).Infof ("Quality of service: rtt %v, conf %.3f, ttl %v" , rtt , float64 (conf )/ 1000000.0 , d .requestTTL ())
1973+ }
1974+
1975+ // requestRTT returns the current target round trip time for a download request
1976+ // to complete in.
1977+ //
1978+ // Note, the returned RTT is .9 of the actually estimated RTT. The reason is that
1979+ // the downloader tries to adapt queries to the RTT, so multiple RTT values can
1980+ // be adapted to, but smaller ones are preffered (stabler download stream).
1981+ func (d * Downloader ) requestRTT () time.Duration {
1982+ return time .Duration (atomic .LoadUint64 (& d .rttEstimate )) * 9 / 10
1983+ }
1984+
1985+ // requestTTL returns the current timeout allowance for a single download request
1986+ // to finish under.
1987+ func (d * Downloader ) requestTTL () time.Duration {
1988+ var (
1989+ rtt = time .Duration (atomic .LoadUint64 (& d .rttEstimate ))
1990+ conf = float64 (atomic .LoadUint64 (& d .rttConfidence )) / 1000000.0
1991+ )
1992+ ttl := time .Duration (ttlScaling ) * time .Duration (float64 (rtt )/ conf )
1993+ if ttl > ttlLimit {
1994+ ttl = ttlLimit
1995+ }
1996+ return ttl
1997+ }
0 commit comments