7
7
"fmt"
8
8
"strings"
9
9
"sync"
10
+ "sync/atomic"
10
11
"time"
11
12
12
13
goheader "github.com/celestiaorg/go-header"
@@ -30,6 +31,9 @@ type p2pHandler interface {
30
31
ProcessDataRange (ctx context.Context , fromHeight , toHeight uint64 ) []common.DAHeightEvent
31
32
}
32
33
34
+ // maxRetriesBeforeHalt is the maximum number of retries against the execution client before halting the syncer.
35
+ const maxRetriesBeforeHalt = 3
36
+
33
37
// Syncer handles block synchronization from DA and P2P sources.
34
38
type Syncer struct {
35
39
// Core components
@@ -51,8 +55,7 @@ type Syncer struct {
51
55
lastStateMtx * sync.RWMutex
52
56
53
57
// DA state
54
- daHeight uint64
55
- daStateMtx * sync.RWMutex
58
+ daHeight uint64
56
59
57
60
// P2P stores
58
61
headerStore goheader.Store [* types.SignedHeader ]
@@ -70,9 +73,10 @@ type Syncer struct {
70
73
logger zerolog.Logger
71
74
72
75
// Lifecycle
73
- ctx context.Context
74
- cancel context.CancelFunc
75
- wg sync.WaitGroup
76
+ ctx context.Context
77
+ cancel context.CancelFunc
78
+ wg sync.WaitGroup
79
+ retriesBeforeHalt map [uint64 ]uint64
76
80
}
77
81
78
82
// NewSyncer creates a new block syncer
@@ -91,21 +95,21 @@ func NewSyncer(
91
95
errorCh chan <- error ,
92
96
) * Syncer {
93
97
return & Syncer {
94
- store : store ,
95
- exec : exec ,
96
- da : da ,
97
- cache : cache ,
98
- metrics : metrics ,
99
- config : config ,
100
- genesis : genesis ,
101
- options : options ,
102
- headerStore : headerStore ,
103
- dataStore : dataStore ,
104
- lastStateMtx : & sync.RWMutex {},
105
- daStateMtx : & sync. RWMutex {} ,
106
- heightInCh : make ( chan common. DAHeightEvent , 10_000 ) ,
107
- errorCh : errorCh ,
108
- logger : logger . With (). Str ( "component" , "syncer" ). Logger ( ),
98
+ store : store ,
99
+ exec : exec ,
100
+ da : da ,
101
+ cache : cache ,
102
+ metrics : metrics ,
103
+ config : config ,
104
+ genesis : genesis ,
105
+ options : options ,
106
+ headerStore : headerStore ,
107
+ dataStore : dataStore ,
108
+ lastStateMtx : & sync.RWMutex {},
109
+ heightInCh : make ( chan common. DAHeightEvent , 10_000 ) ,
110
+ errorCh : errorCh ,
111
+ logger : logger . With (). Str ( "component" , "syncer" ). Logger () ,
112
+ retriesBeforeHalt : make ( map [ uint64 ] uint64 ),
109
113
}
110
114
}
111
115
@@ -166,16 +170,12 @@ func (s *Syncer) SetLastState(state types.State) {
166
170
167
171
// GetDAHeight returns the current DA height
168
172
func (s * Syncer ) GetDAHeight () uint64 {
169
- s .daStateMtx .RLock ()
170
- defer s .daStateMtx .RUnlock ()
171
- return s .daHeight
173
+ return atomic .LoadUint64 (& s .daHeight )
172
174
}
173
175
174
176
// SetDAHeight updates the DA height
175
177
func (s * Syncer ) SetDAHeight (height uint64 ) {
176
- s .daStateMtx .Lock ()
177
- defer s .daStateMtx .Unlock ()
178
- s .daHeight = height
178
+ atomic .StoreUint64 (& s .daHeight , height )
179
179
}
180
180
181
181
// initializeState loads the current sync state
@@ -246,15 +246,13 @@ func (s *Syncer) syncLoop() {
246
246
lastHeaderHeight := initialHeight
247
247
lastDataHeight := initialHeight
248
248
249
- // Backoff control when DA replies with height-from-future
249
+ // Backoff control when DA replies with errors
250
250
var hffDelay time.Duration
251
251
var nextDARequestAt time.Time
252
252
253
253
blockTicker := time .NewTicker (s .config .Node .BlockTime .Duration )
254
254
defer blockTicker .Stop ()
255
255
256
- // TODO: we should request to see what the head of the chain is at
257
- // then we know if we are falling behind or in sync mode
258
256
for {
259
257
select {
260
258
case <- s .ctx .Done ():
@@ -265,26 +263,34 @@ func (s *Syncer) syncLoop() {
265
263
s .processPendingEvents ()
266
264
267
265
now := time .Now ()
266
+ daHeight := s .GetDAHeight ()
267
+
268
268
// Respect backoff window if set
269
269
if nextDARequestAt .IsZero () || now .After (nextDARequestAt ) || now .Equal (nextDARequestAt ) {
270
270
// Retrieve from DA as fast as possible (unless throttled by HFF)
271
- events , err := s .daRetriever .RetrieveFromDA (s .ctx , s .GetDAHeight ())
271
+ // DaHeight is only increased on successful retrieval, it will retry on failure at the next iteration
272
+ events , err := s .daRetriever .RetrieveFromDA (s .ctx , daHeight )
272
273
if err != nil {
273
- if s .isHeightFromFutureError (err ) {
274
+ if errors .Is (err , coreda .ErrBlobNotFound ) {
275
+ // no data at this height, increase DA height
276
+ // we do still want to check p2p
277
+ s .SetDAHeight (daHeight + 1 )
278
+
279
+ // Reset backoff on success
280
+ nextDARequestAt = time.Time {}
281
+ } else {
274
282
// Back off exactly by DA block time to avoid overloading
275
283
hffDelay = s .config .DA .BlockTime .Duration
276
284
if hffDelay <= 0 {
277
285
hffDelay = 2 * time .Second
278
286
}
279
- s .logger .Debug ().Dur ("delay" , hffDelay ).Uint64 ("da_height" , s .GetDAHeight ()).Msg ("height from future; backing off DA requests" )
280
287
nextDARequestAt = now .Add (hffDelay )
281
- } else if errors .Is (err , coreda .ErrBlobNotFound ) {
282
- // no data at this height, increase DA height
283
- s .SetDAHeight (s .GetDAHeight () + 1 )
284
- } else {
285
- // Non-HFF errors: do not backoff artificially
286
- nextDARequestAt = time.Time {}
287
- s .logger .Error ().Err (err ).Msg ("failed to retrieve from DA" )
288
+
289
+ if s .isHeightFromFutureError (err ) {
290
+ s .logger .Debug ().Dur ("delay" , hffDelay ).Uint64 ("da_height" , daHeight ).Msg ("height from future; backing off DA requests" )
291
+ } else {
292
+ s .logger .Error ().Err (err ).Dur ("delay" , hffDelay ).Uint64 ("da_height" , daHeight ).Msg ("failed to retrieve from DA; backing off DA requests" )
293
+ }
288
294
}
289
295
} else {
290
296
// Reset backoff on success
@@ -300,8 +306,8 @@ func (s *Syncer) syncLoop() {
300
306
}
301
307
302
308
// increment DA height on successful retrieval and continue immediately
303
- s .SetDAHeight (s . GetDAHeight () + 1 )
304
- continue
309
+ s .SetDAHeight (daHeight + 1 )
310
+ continue // event sent, no need to check p2p
305
311
}
306
312
}
307
313
@@ -469,9 +475,15 @@ func (s *Syncer) applyBlock(header types.Header, data *types.Data, currentState
469
475
newAppHash , _ , err := s .exec .ExecuteTxs (ctx , rawTxs , header .Height (),
470
476
header .Time (), currentState .AppHash )
471
477
if err != nil {
472
- s .sendCriticalError (fmt .Errorf ("failed to execute transactions: %w" , err ))
473
- return types.State {}, fmt .Errorf ("failed to execute transactions: %w" , err )
478
+ s .retriesBeforeHalt [header .Height ()]++
479
+ if s .retriesBeforeHalt [header .Height ()] > maxRetriesBeforeHalt {
480
+ s .sendCriticalError (fmt .Errorf ("failed to execute transactions: %w" , err ))
481
+ return types.State {}, fmt .Errorf ("failed to execute transactions: %w" , err )
482
+ }
483
+
484
+ return types.State {}, fmt .Errorf ("failed to execute transactions (retry %d / %d): %w" , s .retriesBeforeHalt [header .Height ()], maxRetriesBeforeHalt , err )
474
485
}
486
+ delete (s .retriesBeforeHalt , header .Height ())
475
487
476
488
// Create new state
477
489
newState , err := currentState .NextState (header , newAppHash )
0 commit comments