Skip to content

Commit 8cbdc86

Browse files
authored
core: define and test chain rewind corner cases (#21409)
* core: define and test chain reparation cornercases * core: write up a variety of set-head tests * core, eth: unify chain rollbacks, handle all the cases * core: make linter smile * core: remove commented out legacy code * core, eth/downloader: fix review comments * core: revert a removed recovery mechanism
1 parent 0bdd295 commit 8cbdc86

13 files changed

+3952
-214
lines changed

core/blockchain.go

Lines changed: 86 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,16 @@ type CacheConfig struct {
130130
SnapshotWait bool // Wait for snapshot construction on startup. TODO(karalabe): This is a dirty hack for testing, nuke it
131131
}
132132

133+
// defaultCacheConfig are the default caching values if none are specified by the
134+
// user (also used during testing).
135+
var defaultCacheConfig = &CacheConfig{
136+
TrieCleanLimit: 256,
137+
TrieDirtyLimit: 256,
138+
TrieTimeLimit: 5 * time.Minute,
139+
SnapshotLimit: 256,
140+
SnapshotWait: true,
141+
}
142+
133143
// BlockChain represents the canonical chain given a database with a genesis
134144
// block. The Blockchain manages chain imports, reverts, chain reorganisations.
135145
//
@@ -204,13 +214,7 @@ type BlockChain struct {
204214
// Processor.
205215
func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *params.ChainConfig, engine consensus.Engine, vmConfig vm.Config, shouldPreserve func(block *types.Block) bool, txLookupLimit *uint64) (*BlockChain, error) {
206216
if cacheConfig == nil {
207-
cacheConfig = &CacheConfig{
208-
TrieCleanLimit: 256,
209-
TrieDirtyLimit: 256,
210-
TrieTimeLimit: 5 * time.Minute,
211-
SnapshotLimit: 256,
212-
SnapshotWait: true,
213-
}
217+
cacheConfig = defaultCacheConfig
214218
}
215219
bodyCache, _ := lru.New(bodyCacheLimit)
216220
bodyRLPCache, _ := lru.New(bodyCacheLimit)
@@ -268,15 +272,18 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
268272
txIndexBlock = frozen
269273
}
270274
}
271-
272275
if err := bc.loadLastState(); err != nil {
273276
return nil, err
274277
}
275-
// The first thing the node will do is reconstruct the verification data for
276-
// the head block (ethash cache or clique voting snapshot). Might as well do
277-
// it in advance.
278-
bc.engine.VerifyHeader(bc, bc.CurrentHeader(), true)
279-
278+
// Make sure the state associated with the block is available
279+
head := bc.CurrentBlock()
280+
if _, err := state.New(head.Root(), bc.stateCache, bc.snaps); err != nil {
281+
log.Warn("Head state missing, repairing", "number", head.Number(), "hash", head.Hash())
282+
if err := bc.SetHead(head.NumberU64()); err != nil {
283+
return nil, err
284+
}
285+
}
286+
// Ensure that a previous crash in SetHead doesn't leave extra ancients
280287
if frozen, err := bc.db.Ancients(); err == nil && frozen > 0 {
281288
var (
282289
needRewind bool
@@ -286,7 +293,7 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
286293
// blockchain repair. If the head full block is even lower than the ancient
287294
// chain, truncate the ancient store.
288295
fullBlock := bc.CurrentBlock()
289-
if fullBlock != nil && fullBlock != bc.genesisBlock && fullBlock.NumberU64() < frozen-1 {
296+
if fullBlock != nil && fullBlock.Hash() != bc.genesisBlock.Hash() && fullBlock.NumberU64() < frozen-1 {
290297
needRewind = true
291298
low = fullBlock.NumberU64()
292299
}
@@ -301,15 +308,17 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
301308
}
302309
}
303310
if needRewind {
304-
var hashes []common.Hash
305-
previous := bc.CurrentHeader().Number.Uint64()
306-
for i := low + 1; i <= bc.CurrentHeader().Number.Uint64(); i++ {
307-
hashes = append(hashes, rawdb.ReadCanonicalHash(bc.db, i))
311+
log.Error("Truncating ancient chain", "from", bc.CurrentHeader().Number.Uint64(), "to", low)
312+
if err := bc.SetHead(low); err != nil {
313+
return nil, err
308314
}
309-
bc.Rollback(hashes)
310-
log.Warn("Truncate ancient chain", "from", previous, "to", low)
311315
}
312316
}
317+
// The first thing the node will do is reconstruct the verification data for
318+
// the head block (ethash cache or clique voting snapshot). Might as well do
319+
// it in advance.
320+
bc.engine.VerifyHeader(bc, bc.CurrentHeader(), true)
321+
313322
// Check the current state of the block hashes and make sure that we do not have any of the bad blocks in our chain
314323
for hash := range BadHashes {
315324
if header := bc.GetHeaderByHash(hash); header != nil {
@@ -318,7 +327,9 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par
318327
// make sure the headerByNumber (if present) is in our current canonical chain
319328
if headerByNumber != nil && headerByNumber.Hash() == header.Hash() {
320329
log.Error("Found bad hash, rewinding chain", "number", header.Number, "hash", header.ParentHash)
321-
bc.SetHead(header.Number.Uint64() - 1)
330+
if err := bc.SetHead(header.Number.Uint64() - 1); err != nil {
331+
return nil, err
332+
}
322333
log.Error("Chain rewind was successful, resuming normal operation")
323334
}
324335
}
@@ -385,15 +396,6 @@ func (bc *BlockChain) loadLastState() error {
385396
log.Warn("Head block missing, resetting chain", "hash", head)
386397
return bc.Reset()
387398
}
388-
// Make sure the state associated with the block is available
389-
if _, err := state.New(currentBlock.Root(), bc.stateCache, bc.snaps); err != nil {
390-
// Dangling block without a state associated, init from scratch
391-
log.Warn("Head state missing, repairing chain", "number", currentBlock.Number(), "hash", currentBlock.Hash())
392-
if err := bc.repair(&currentBlock); err != nil {
393-
return err
394-
}
395-
rawdb.WriteHeadBlockHash(bc.db, currentBlock.Hash())
396-
}
397399
// Everything seems to be fine, set as the head block
398400
bc.currentBlock.Store(currentBlock)
399401
headBlockGauge.Update(int64(currentBlock.NumberU64()))
@@ -427,30 +429,48 @@ func (bc *BlockChain) loadLastState() error {
427429
log.Info("Loaded most recent local header", "number", currentHeader.Number, "hash", currentHeader.Hash(), "td", headerTd, "age", common.PrettyAge(time.Unix(int64(currentHeader.Time), 0)))
428430
log.Info("Loaded most recent local full block", "number", currentBlock.Number(), "hash", currentBlock.Hash(), "td", blockTd, "age", common.PrettyAge(time.Unix(int64(currentBlock.Time()), 0)))
429431
log.Info("Loaded most recent local fast block", "number", currentFastBlock.Number(), "hash", currentFastBlock.Hash(), "td", fastTd, "age", common.PrettyAge(time.Unix(int64(currentFastBlock.Time()), 0)))
430-
432+
if pivot := rawdb.ReadLastPivotNumber(bc.db); pivot != nil {
433+
log.Info("Loaded last fast-sync pivot marker", "number", *pivot)
434+
}
431435
return nil
432436
}
433437

434-
// SetHead rewinds the local chain to a new head. In the case of headers, everything
435-
// above the new head will be deleted and the new one set. In the case of blocks
436-
// though, the head may be further rewound if block bodies are missing (non-archive
437-
// nodes after a fast sync).
438+
// SetHead rewinds the local chain to a new head. Depending on whether the node
439+
// was fast synced or full synced and in which state, the method will try to
440+
// delete minimal data from disk whilst retaining chain consistency.
438441
func (bc *BlockChain) SetHead(head uint64) error {
439-
log.Warn("Rewinding blockchain", "target", head)
440-
441442
bc.chainmu.Lock()
442443
defer bc.chainmu.Unlock()
443444

444-
updateFn := func(db ethdb.KeyValueWriter, header *types.Header) {
445-
// Rewind the block chain, ensuring we don't end up with a stateless head block
446-
if currentBlock := bc.CurrentBlock(); currentBlock != nil && header.Number.Uint64() < currentBlock.NumberU64() {
445+
// Retrieve the last pivot block to short circuit rollbacks beyond it and the
446+
// current freezer limit to start nuking id underflown
447+
pivot := rawdb.ReadLastPivotNumber(bc.db)
448+
frozen, _ := bc.db.Ancients()
449+
450+
updateFn := func(db ethdb.KeyValueWriter, header *types.Header) (uint64, bool) {
451+
// Rewind the block chain, ensuring we don't end up with a stateless head
452+
// block. Note, depth equality is permitted to allow using SetHead as a
453+
// chain reparation mechanism without deleting any data!
454+
if currentBlock := bc.CurrentBlock(); currentBlock != nil && header.Number.Uint64() <= currentBlock.NumberU64() {
447455
newHeadBlock := bc.GetBlock(header.Hash(), header.Number.Uint64())
448456
if newHeadBlock == nil {
457+
log.Error("Gap in the chain, rewinding to genesis", "number", header.Number, "hash", header.Hash())
449458
newHeadBlock = bc.genesisBlock
450459
} else {
451-
if _, err := state.New(newHeadBlock.Root(), bc.stateCache, bc.snaps); err != nil {
452-
// Rewound state missing, rolled back to before pivot, reset to genesis
453-
newHeadBlock = bc.genesisBlock
460+
// Block exists, keep rewinding until we find one with state
461+
for {
462+
if _, err := state.New(newHeadBlock.Root(), bc.stateCache, bc.snaps); err != nil {
463+
log.Trace("Block state missing, rewinding further", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash())
464+
if pivot == nil || newHeadBlock.NumberU64() > *pivot {
465+
newHeadBlock = bc.GetBlock(newHeadBlock.ParentHash(), newHeadBlock.NumberU64()-1)
466+
continue
467+
} else {
468+
log.Trace("Rewind passed pivot, aiming genesis", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash(), "pivot", *pivot)
469+
newHeadBlock = bc.genesisBlock
470+
}
471+
}
472+
log.Debug("Rewound to block with state", "number", newHeadBlock.NumberU64(), "hash", newHeadBlock.Hash())
473+
break
454474
}
455475
}
456476
rawdb.WriteHeadBlockHash(db, newHeadBlock.Hash())
@@ -462,7 +482,6 @@ func (bc *BlockChain) SetHead(head uint64) error {
462482
bc.currentBlock.Store(newHeadBlock)
463483
headBlockGauge.Update(int64(newHeadBlock.NumberU64()))
464484
}
465-
466485
// Rewind the fast block in a simpleton way to the target head
467486
if currentFastBlock := bc.CurrentFastBlock(); currentFastBlock != nil && header.Number.Uint64() < currentFastBlock.NumberU64() {
468487
newHeadFastBlock := bc.GetBlock(header.Hash(), header.Number.Uint64())
@@ -479,19 +498,27 @@ func (bc *BlockChain) SetHead(head uint64) error {
479498
bc.currentFastBlock.Store(newHeadFastBlock)
480499
headFastBlockGauge.Update(int64(newHeadFastBlock.NumberU64()))
481500
}
482-
}
501+
head := bc.CurrentBlock().NumberU64()
483502

503+
// If setHead underflown the freezer threshold and the block processing
504+
// intent afterwards is full block importing, delete the chain segment
505+
// between the stateful-block and the sethead target.
506+
var wipe bool
507+
if head+1 < frozen {
508+
wipe = pivot == nil || head >= *pivot
509+
}
510+
return head, wipe // Only force wipe if full synced
511+
}
484512
// Rewind the header chain, deleting all block bodies until then
485513
delFn := func(db ethdb.KeyValueWriter, hash common.Hash, num uint64) {
486514
// Ignore the error here since light client won't hit this path
487515
frozen, _ := bc.db.Ancients()
488516
if num+1 <= frozen {
489517
// Truncate all relative data(header, total difficulty, body, receipt
490518
// and canonical hash) from ancient store.
491-
if err := bc.db.TruncateAncients(num + 1); err != nil {
519+
if err := bc.db.TruncateAncients(num); err != nil {
492520
log.Crit("Failed to truncate ancient data", "number", num, "err", err)
493521
}
494-
495522
// Remove the hash <-> number mapping from the active store.
496523
rawdb.DeleteHeaderNumber(db, hash)
497524
} else {
@@ -503,8 +530,18 @@ func (bc *BlockChain) SetHead(head uint64) error {
503530
}
504531
// Todo(rjl493456442) txlookup, bloombits, etc
505532
}
506-
bc.hc.SetHead(head, updateFn, delFn)
507-
533+
// If SetHead was only called as a chain reparation method, try to skip
534+
// touching the header chain altogether, unless the freezer is broken
535+
if block := bc.CurrentBlock(); block.NumberU64() == head {
536+
if target, force := updateFn(bc.db, block.Header()); force {
537+
bc.hc.SetHead(target, updateFn, delFn)
538+
}
539+
} else {
540+
// Rewind the chain to the requested head and keep going backwards until a
541+
// block with a state is found or fast sync pivot is passed
542+
log.Warn("Rewinding blockchain", "target", head)
543+
bc.hc.SetHead(head, updateFn, delFn)
544+
}
508545
// Clear out any stale content from the caches
509546
bc.bodyCache.Purge()
510547
bc.bodyRLPCache.Purge()
@@ -627,28 +664,6 @@ func (bc *BlockChain) ResetWithGenesisBlock(genesis *types.Block) error {
627664
return nil
628665
}
629666

630-
// repair tries to repair the current blockchain by rolling back the current block
631-
// until one with associated state is found. This is needed to fix incomplete db
632-
// writes caused either by crashes/power outages, or simply non-committed tries.
633-
//
634-
// This method only rolls back the current block. The current header and current
635-
// fast block are left intact.
636-
func (bc *BlockChain) repair(head **types.Block) error {
637-
for {
638-
// Abort if we've rewound to a head block that does have associated state
639-
if _, err := state.New((*head).Root(), bc.stateCache, bc.snaps); err == nil {
640-
log.Info("Rewound blockchain to past state", "number", (*head).Number(), "hash", (*head).Hash())
641-
return nil
642-
}
643-
// Otherwise rewind one block and recheck state availability there
644-
block := bc.GetBlock((*head).ParentHash(), (*head).NumberU64()-1)
645-
if block == nil {
646-
return fmt.Errorf("missing block %d [%x]", (*head).NumberU64()-1, (*head).ParentHash())
647-
}
648-
*head = block
649-
}
650-
}
651-
652667
// Export writes the active chain to the given writer.
653668
func (bc *BlockChain) Export(w io.Writer) error {
654669
return bc.ExportN(w, uint64(0), bc.CurrentBlock().NumberU64())
@@ -985,52 +1000,6 @@ const (
9851000
SideStatTy
9861001
)
9871002

988-
// Rollback is designed to remove a chain of links from the database that aren't
989-
// certain enough to be valid.
990-
func (bc *BlockChain) Rollback(chain []common.Hash) {
991-
bc.chainmu.Lock()
992-
defer bc.chainmu.Unlock()
993-
994-
batch := bc.db.NewBatch()
995-
for i := len(chain) - 1; i >= 0; i-- {
996-
hash := chain[i]
997-
998-
// Degrade the chain markers if they are explicitly reverted.
999-
// In theory we should update all in-memory markers in the
1000-
// last step, however the direction of rollback is from high
1001-
// to low, so it's safe the update in-memory markers directly.
1002-
currentHeader := bc.hc.CurrentHeader()
1003-
if currentHeader.Hash() == hash {
1004-
newHeadHeader := bc.GetHeader(currentHeader.ParentHash, currentHeader.Number.Uint64()-1)
1005-
rawdb.WriteHeadHeaderHash(batch, currentHeader.ParentHash)
1006-
bc.hc.SetCurrentHeader(newHeadHeader)
1007-
}
1008-
if currentFastBlock := bc.CurrentFastBlock(); currentFastBlock.Hash() == hash {
1009-
newFastBlock := bc.GetBlock(currentFastBlock.ParentHash(), currentFastBlock.NumberU64()-1)
1010-
rawdb.WriteHeadFastBlockHash(batch, currentFastBlock.ParentHash())
1011-
bc.currentFastBlock.Store(newFastBlock)
1012-
headFastBlockGauge.Update(int64(newFastBlock.NumberU64()))
1013-
}
1014-
if currentBlock := bc.CurrentBlock(); currentBlock.Hash() == hash {
1015-
newBlock := bc.GetBlock(currentBlock.ParentHash(), currentBlock.NumberU64()-1)
1016-
rawdb.WriteHeadBlockHash(batch, currentBlock.ParentHash())
1017-
bc.currentBlock.Store(newBlock)
1018-
headBlockGauge.Update(int64(newBlock.NumberU64()))
1019-
}
1020-
}
1021-
if err := batch.Write(); err != nil {
1022-
log.Crit("Failed to rollback chain markers", "err", err)
1023-
}
1024-
// Truncate ancient data which exceeds the current header.
1025-
//
1026-
// Notably, it can happen that system crashes without truncating the ancient data
1027-
// but the head indicator has been updated in the active store. Regarding this issue,
1028-
// system will self recovery by truncating the extra data during the setup phase.
1029-
if err := bc.truncateAncient(bc.hc.CurrentHeader().Number.Uint64()); err != nil {
1030-
log.Crit("Truncate ancient store failed", "err", err)
1031-
}
1032-
}
1033-
10341003
// truncateAncient rewinds the blockchain to the specified header and deletes all
10351004
// data in the ancient store that exceeds the specified header.
10361005
func (bc *BlockChain) truncateAncient(head uint64) error {

0 commit comments

Comments
 (0)