Skip to content

Commit 3ca8292

Browse files
authored
Merge pull request #29 from hadv/trie-state-prunning
trie: prunning old state trie node on disk
2 parents 4087d8d + 25a683c commit 3ca8292

File tree

1 file changed

+107
-39
lines changed

1 file changed

+107
-39
lines changed

trie/database.go

Lines changed: 107 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,11 @@ var (
6969
type Database struct {
7070
diskdb ethdb.KeyValueStore // Persistent storage for matured trie nodes
7171

72-
cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs
73-
dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes
74-
oldest common.Hash // Oldest tracked node, flush-list head
75-
newest common.Hash // Newest tracked node, flush-list tail
72+
greedyGC bool // run gc greedy or not
73+
cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs
74+
dirties map[common.Hash]*cachedNode // Data and references relationships of dirty trie nodes
75+
oldest common.Hash // Oldest tracked node, flush-list head
76+
newest common.Hash // Newest tracked node, flush-list tail
7677

7778
preimages map[common.Hash][]byte // Preimages of nodes from the secure trie
7879

@@ -147,6 +148,8 @@ type cachedNode struct {
147148

148149
flushPrev common.Hash // Previous node in the flush-list
149150
flushNext common.Hash // Next node in the flush-list
151+
152+
commited bool
150153
}
151154

152155
// cachedNodeSize is the raw size of a cachedNode data structure without any
@@ -277,6 +280,7 @@ type Config struct {
277280
Cache int // Memory allowance (MB) to use for caching trie nodes in memory
278281
Journal string // Journal of clean cache to survive node restarts
279282
Preimages bool // Flag whether the preimage of trie key is recorded
283+
GreedyGC bool // "light" or "greedy" GC
280284
}
281285

282286
// NewDatabase creates a new trie database to store ephemeral trie content before
@@ -305,6 +309,9 @@ func NewDatabaseWithConfig(diskdb ethdb.KeyValueStore, config *Config) *Database
305309
children: make(map[common.Hash]uint16),
306310
}},
307311
}
312+
if config != nil {
313+
db.greedyGC = config.GreedyGC
314+
}
308315
if config == nil || config.Preimages { // TODO(karalabe): Flip to default off in the future
309316
db.preimages = make(map[common.Hash][]byte)
310317
}
@@ -520,7 +527,14 @@ func (db *Database) Dereference(root common.Hash) {
520527
defer db.lock.Unlock()
521528

522529
nodes, storage, start := len(db.dirties), db.dirtiesSize, time.Now()
523-
db.dereference(root, common.Hash{})
530+
batch := db.diskdb.NewBatch()
531+
db.dereference(batch, root, common.Hash{})
532+
533+
// Flush out all accumulated data from the batch to disk
534+
if err := batch.Write(); err != nil {
535+
log.Warn("Failed to write flush list to disk", "err", err)
536+
}
537+
batch.Reset()
524538

525539
db.gcnodes += uint64(nodes - len(db.dirties))
526540
db.gcsize += storage - db.dirtiesSize
@@ -535,7 +549,7 @@ func (db *Database) Dereference(root common.Hash) {
535549
}
536550

537551
// dereference is the private locked version of Dereference.
538-
func (db *Database) dereference(child common.Hash, parent common.Hash) {
552+
func (db *Database) dereference(batch ethdb.Batch, child common.Hash, parent common.Hash) {
539553
// Dereference the parent-child
540554
node := db.dirties[parent]
541555

@@ -572,10 +586,21 @@ func (db *Database) dereference(child common.Hash, parent common.Hash) {
572586
db.dirties[node.flushPrev].flushNext = node.flushNext
573587
db.dirties[node.flushNext].flushPrev = node.flushPrev
574588
}
589+
590+
if batch.ValueSize() >= ethdb.IdealBatchSize {
591+
if err := batch.Write(); err != nil {
592+
log.Warn("Error on batch flushing out on disk", "err", err)
593+
}
594+
batch.Reset()
595+
}
596+
575597
// Dereference all children and delete the node
576598
node.forChilds(func(hash common.Hash) {
577-
db.dereference(hash, child)
599+
db.dereference(batch, hash, child)
578600
})
601+
if db.dirties[child].commited {
602+
rawdb.DeleteTrieNode(batch, child)
603+
}
579604
delete(db.dirties, child)
580605
db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size))
581606
if node.children != nil {
@@ -624,7 +649,9 @@ func (db *Database) Cap(limit common.StorageSize) error {
624649
for size > limit && oldest != (common.Hash{}) {
625650
// Fetch the oldest referenced node and push into the batch
626651
node := db.dirties[oldest]
627-
rawdb.WriteTrieNode(batch, oldest, node.rlp())
652+
if !node.commited {
653+
rawdb.WriteTrieNode(batch, oldest, node.rlp())
654+
}
628655

629656
// If we exceeded the ideal batch size, commit and reset
630657
if batch.ValueSize() >= ethdb.IdealBatchSize {
@@ -713,21 +740,27 @@ func (db *Database) Commit(node common.Hash, report bool, callback func(common.H
713740
// Move the trie itself into the batch, flushing if enough data is accumulated
714741
nodes, storage := len(db.dirties), db.dirtiesSize
715742

716-
uncacher := &cleaner{db}
743+
var uncacher ethdb.KeyValueWriter
744+
if db.greedyGC {
745+
uncacher = &greedy{db}
746+
} else {
747+
uncacher = &cleaner{db}
748+
}
717749
if err := db.commit(node, batch, uncacher, callback); err != nil {
718750
log.Error("Failed to commit trie from trie database", "err", err)
719751
return err
720752
}
721-
// Trie mostly committed to disk, flush any batch leftovers
722-
if err := batch.Write(); err != nil {
723-
log.Error("Failed to write trie to disk", "err", err)
724-
return err
725-
}
726753
// Uncache any leftovers in the last batch
727754
db.lock.Lock()
728755
defer db.lock.Unlock()
729756

730757
batch.Replay(uncacher)
758+
759+
// Trie mostly committed to disk, flush any batch leftovers
760+
if err := batch.Write(); err != nil {
761+
log.Error("Failed to write trie to disk", "err", err)
762+
return err
763+
}
731764
batch.Reset()
732765

733766
// Reset the storage counters and bumped metrics
@@ -753,10 +786,10 @@ func (db *Database) Commit(node common.Hash, report bool, callback func(common.H
753786
}
754787

755788
// commit is the private locked version of Commit.
756-
func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleaner, callback func(common.Hash)) error {
757-
// If the node does not exist, it's a previously committed node
789+
func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher ethdb.KeyValueWriter, callback func(common.Hash)) error {
790+
// If the node does not exist or marked as committed, then it's a previously committed node
758791
node, ok := db.dirties[hash]
759-
if !ok {
792+
if !ok || node.commited {
760793
return nil
761794
}
762795
var err error
@@ -774,13 +807,13 @@ func (db *Database) commit(hash common.Hash, batch ethdb.Batch, uncacher *cleane
774807
callback(hash)
775808
}
776809
if batch.ValueSize() >= ethdb.IdealBatchSize {
810+
db.lock.Lock()
811+
batch.Replay(uncacher)
812+
db.lock.Unlock()
777813
if err := batch.Write(); err != nil {
778814
return err
779815
}
780-
db.lock.Lock()
781-
batch.Replay(uncacher)
782816
batch.Reset()
783-
db.lock.Unlock()
784817
}
785818
return nil
786819
}
@@ -791,10 +824,32 @@ type cleaner struct {
791824
db *Database
792825
}
793826

827+
// evictDirty update the flush-list and remove node from dirty cache
828+
func evictDirty(db *Database, hash common.Hash, node *cachedNode) {
829+
// Node still exists, remove it from the flush-list
830+
switch hash {
831+
case db.oldest:
832+
db.oldest = node.flushNext
833+
db.dirties[node.flushNext].flushPrev = common.Hash{}
834+
case db.newest:
835+
db.newest = node.flushPrev
836+
db.dirties[node.flushPrev].flushNext = common.Hash{}
837+
default:
838+
db.dirties[node.flushPrev].flushNext = node.flushNext
839+
db.dirties[node.flushNext].flushPrev = node.flushPrev
840+
}
841+
// Remove the node from the dirty cache
842+
delete(db.dirties, hash)
843+
db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size))
844+
if node.children != nil {
845+
db.dirtiesSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2))
846+
}
847+
}
848+
794849
// Put reacts to database writes and implements dirty data uncaching. This is the
795850
// post-processing step of a commit operation where the already persisted trie is
796851
// removed from the dirty cache and moved into the clean cache. The reason behind
797-
// the two-phase commit is to ensure ensure data availability while moving from
852+
// the two-phase commit is to ensure data availability while moving from
798853
// memory to disk.
799854
func (c *cleaner) Put(key []byte, rlp []byte) error {
800855
hash := common.BytesToHash(key)
@@ -804,24 +859,7 @@ func (c *cleaner) Put(key []byte, rlp []byte) error {
804859
if !ok {
805860
return nil
806861
}
807-
// Node still exists, remove it from the flush-list
808-
switch hash {
809-
case c.db.oldest:
810-
c.db.oldest = node.flushNext
811-
c.db.dirties[node.flushNext].flushPrev = common.Hash{}
812-
case c.db.newest:
813-
c.db.newest = node.flushPrev
814-
c.db.dirties[node.flushPrev].flushNext = common.Hash{}
815-
default:
816-
c.db.dirties[node.flushPrev].flushNext = node.flushNext
817-
c.db.dirties[node.flushNext].flushPrev = node.flushPrev
818-
}
819-
// Remove the node from the dirty cache
820-
delete(c.db.dirties, hash)
821-
c.db.dirtiesSize -= common.StorageSize(common.HashLength + int(node.size))
822-
if node.children != nil {
823-
c.db.dirtiesSize -= common.StorageSize(cachedNodeChildrenSize + len(node.children)*(common.HashLength+2))
824-
}
862+
evictDirty(c.db, hash, node)
825863
// Move the flushed node into the clean cache to prevent insta-reloads
826864
if c.db.cleans != nil {
827865
c.db.cleans.Set(hash[:], rlp)
@@ -834,6 +872,36 @@ func (c *cleaner) Delete(key []byte) error {
834872
panic("not implemented")
835873
}
836874

875+
type greedy struct {
876+
db *Database
877+
}
878+
879+
func (g *greedy) Put(key []byte, rlp []byte) error {
880+
hash := common.BytesToHash(key)
881+
882+
// If the node does not exist, we're done on this path
883+
node, ok := g.db.dirties[hash]
884+
if !ok {
885+
return nil
886+
}
887+
// Mark node as commited if node does not existing on db
888+
if exist, _ := g.db.diskdb.Has(hash[:]); !exist {
889+
g.db.dirties[hash].commited = true
890+
} else {
891+
evictDirty(g.db, hash, node)
892+
}
893+
// Move the flushed node into the clean cache to prevent insta-reloads
894+
if g.db.cleans != nil {
895+
g.db.cleans.Set(hash[:], rlp)
896+
memcacheCleanWriteMeter.Mark(int64(len(rlp)))
897+
}
898+
return nil
899+
}
900+
901+
func (g *greedy) Delete(key []byte) error {
902+
panic("not implemented")
903+
}
904+
837905
// Size returns the current storage size of the memory cache in front of the
838906
// persistent database layer.
839907
func (db *Database) Size() (common.StorageSize, common.StorageSize) {

0 commit comments

Comments
 (0)