@@ -63,6 +63,10 @@ func (d *Downloader) syncState(root common.Hash) *stateSync {
63
63
s := newStateSync (d , root )
64
64
select {
65
65
case d .stateSyncStart <- s :
66
+ // If we tell the statesync to restart with a new root, we also need
67
+ // to wait for it to actually also start -- when old requests have timed
68
+ // out or been delivered
69
+ <- s .started
66
70
case <- d .quitCh :
67
71
s .err = errCancelStateFetch
68
72
close (s .done )
@@ -95,15 +99,9 @@ func (d *Downloader) runStateSync(s *stateSync) *stateSync {
95
99
finished []* stateReq // Completed or failed requests
96
100
timeout = make (chan * stateReq ) // Timed out active requests
97
101
)
98
- defer func () {
99
- // Cancel active request timers on exit. Also set peers to idle so they're
100
- // available for the next sync.
101
- for _ , req := range active {
102
- req .timer .Stop ()
103
- req .peer .SetNodeDataIdle (len (req .items ))
104
- }
105
- }()
102
+
106
103
// Run the state sync.
104
+ log .Trace ("State sync starting" , "root" , s .root )
107
105
go s .run ()
108
106
defer s .Cancel ()
109
107
@@ -126,9 +124,11 @@ func (d *Downloader) runStateSync(s *stateSync) *stateSync {
126
124
select {
127
125
// The stateSync lifecycle:
128
126
case next := <- d .stateSyncStart :
127
+ d .spindownStateSync (active , finished , timeout , peerDrop )
129
128
return next
130
129
131
130
case <- s .done :
131
+ d .spindownStateSync (active , finished , timeout , peerDrop )
132
132
return nil
133
133
134
134
// Send the next finished request to the current sync:
@@ -189,11 +189,9 @@ func (d *Downloader) runStateSync(s *stateSync) *stateSync {
189
189
// causes valid requests to go missing and sync to get stuck.
190
190
if old := active [req .peer .id ]; old != nil {
191
191
log .Warn ("Busy peer assigned new state fetch" , "peer" , old .peer .id )
192
-
193
- // Make sure the previous one doesn't get siletly lost
192
+ // Move the previous request to the finished set
194
193
old .timer .Stop ()
195
194
old .dropped = true
196
-
197
195
finished = append (finished , old )
198
196
}
199
197
// Start a timer to notify the sync loop if the peer stalled.
@@ -210,6 +208,46 @@ func (d *Downloader) runStateSync(s *stateSync) *stateSync {
210
208
}
211
209
}
212
210
211
+ // spindownStateSync 'drains' the outstanding requests; some will be delivered and other
212
+ // will time out. This is to ensure that when the next stateSync starts working, all peers
213
+ // are marked as idle and de facto _are_ idle.
214
+ func (d * Downloader ) spindownStateSync (active map [string ]* stateReq , finished []* stateReq , timeout chan * stateReq , peerDrop chan * peerConnection ) {
215
+ log .Trace ("State sync spinning down" , "active" , len (active ), "finished" , len (finished ))
216
+
217
+ for len (active ) > 0 {
218
+ var (
219
+ req * stateReq
220
+ reason string
221
+ )
222
+ select {
223
+ // Handle (drop) incoming state packs:
224
+ case pack := <- d .stateCh :
225
+ req = active [pack .PeerId ()]
226
+ reason = "delivered"
227
+ // Handle dropped peer connections:
228
+ case p := <- peerDrop :
229
+ req = active [p .id ]
230
+ reason = "peerdrop"
231
+ // Handle timed-out requests:
232
+ case req = <- timeout :
233
+ reason = "timeout"
234
+ }
235
+ if req == nil {
236
+ continue
237
+ }
238
+ req .peer .log .Trace ("State peer marked idle (spindown)" , "req.items" , len (req .items ), "reason" , reason )
239
+ req .timer .Stop ()
240
+ delete (active , req .peer .id )
241
+ req .peer .SetNodeDataIdle (len (req .items ))
242
+ }
243
+ // The 'finished' set contains deliveries that we were going to pass to processing.
244
+ // Those are now moot, but we still need to set those peers as idle, which would
245
+ // otherwise have been done after processing
246
+ for _ , req := range finished {
247
+ req .peer .SetNodeDataIdle (len (req .items ))
248
+ }
249
+ }
250
+
213
251
// stateSync schedules requests for downloading a particular state trie defined
214
252
// by a given state root.
215
253
type stateSync struct {
@@ -222,11 +260,15 @@ type stateSync struct {
222
260
numUncommitted int
223
261
bytesUncommitted int
224
262
263
+ started chan struct {} // Started is signalled once the sync loop starts
264
+
225
265
deliver chan * stateReq // Delivery channel multiplexing peer responses
226
266
cancel chan struct {} // Channel to signal a termination request
227
267
cancelOnce sync.Once // Ensures cancel only ever gets called once
228
268
done chan struct {} // Channel to signal termination completion
229
269
err error // Any error hit during sync (set before completion)
270
+
271
+ root common.Hash
230
272
}
231
273
232
274
// stateTask represents a single trie node download task, containing a set of
@@ -246,6 +288,8 @@ func newStateSync(d *Downloader, root common.Hash) *stateSync {
246
288
deliver : make (chan * stateReq ),
247
289
cancel : make (chan struct {}),
248
290
done : make (chan struct {}),
291
+ started : make (chan struct {}),
292
+ root : root ,
249
293
}
250
294
}
251
295
@@ -276,6 +320,7 @@ func (s *stateSync) Cancel() error {
276
320
// pushed here async. The reason is to decouple processing from data receipt
277
321
// and timeouts.
278
322
func (s * stateSync ) loop () (err error ) {
323
+ close (s .started )
279
324
// Listen for new peer events to assign tasks to them
280
325
newPeer := make (chan * peerConnection , 1024 )
281
326
peerSub := s .d .peers .SubscribeNewPeers (newPeer )
@@ -331,11 +376,11 @@ func (s *stateSync) loop() (err error) {
331
376
}
332
377
// Process all the received blobs and check for stale delivery
333
378
delivered , err := s .process (req )
379
+ req .peer .SetNodeDataIdle (delivered )
334
380
if err != nil {
335
381
log .Warn ("Node data write error" , "err" , err )
336
382
return err
337
383
}
338
- req .peer .SetNodeDataIdle (delivered )
339
384
}
340
385
}
341
386
return nil
@@ -372,7 +417,7 @@ func (s *stateSync) assignTasks() {
372
417
373
418
// If the peer was assigned tasks to fetch, send the network request
374
419
if len (req .items ) > 0 {
375
- req .peer .log .Trace ("Requesting new batch of data" , "type" , "state" , "count" , len (req .items ))
420
+ req .peer .log .Trace ("Requesting new batch of data" , "type" , "state" , "count" , len (req .items ), "root" , s . root )
376
421
select {
377
422
case s .d .trackStateReq <- req :
378
423
req .peer .FetchNodeData (req .items )
0 commit comments