Skip to content

Commit 17903fe

Browse files
jsvisarjl493456442
andauthored
triedb/pathdb: introduce file-based state journal (ethereum#32060)
Introduce file-based state journal in path database, fixing the Pebble restriction when the journal size exceeds 4GB. --------- Signed-off-by: jsvisa <[email protected]> Co-authored-by: Gary Rong <[email protected]>
1 parent fe0ae06 commit 17903fe

File tree

10 files changed

+239
-40
lines changed

10 files changed

+239
-40
lines changed

cmd/utils/flags.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2198,6 +2198,12 @@ func MakeChain(ctx *cli.Context, stack *node.Node, readonly bool) (*core.BlockCh
21982198
StateHistory: ctx.Uint64(StateHistoryFlag.Name),
21992199
// Disable transaction indexing/unindexing.
22002200
TxLookupLimit: -1,
2201+
2202+
// Enables file journaling for the trie database. The journal files will be stored
2203+
// within the data directory. The corresponding paths will be either:
2204+
// - DATADIR/triedb/merkle.journal
2205+
// - DATADIR/triedb/verkle.journal
2206+
TrieJournalDirectory: stack.ResolvePath("triedb"),
22012207
}
22022208
if options.ArchiveMode && !options.Preimages {
22032209
options.Preimages = true

core/blockchain.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,11 @@ const (
162162
// BlockChainConfig contains the configuration of the BlockChain object.
163163
type BlockChainConfig struct {
164164
// Trie database related options
165-
TrieCleanLimit int // Memory allowance (MB) to use for caching trie nodes in memory
166-
TrieDirtyLimit int // Memory limit (MB) at which to start flushing dirty trie nodes to disk
167-
TrieTimeLimit time.Duration // Time limit after which to flush the current in-memory trie to disk
168-
TrieNoAsyncFlush bool // Whether the asynchronous buffer flushing is disallowed
165+
TrieCleanLimit int // Memory allowance (MB) to use for caching trie nodes in memory
166+
TrieDirtyLimit int // Memory limit (MB) at which to start flushing dirty trie nodes to disk
167+
TrieTimeLimit time.Duration // Time limit after which to flush the current in-memory trie to disk
168+
TrieNoAsyncFlush bool // Whether the asynchronous buffer flushing is disallowed
169+
TrieJournalDirectory string // Directory path to the journal used for persisting trie data across node restarts
169170

170171
Preimages bool // Whether to store preimage of trie key to the disk
171172
StateHistory uint64 // Number of blocks from head whose state histories are reserved.
@@ -246,6 +247,7 @@ func (cfg *BlockChainConfig) triedbConfig(isVerkle bool) *triedb.Config {
246247
EnableStateIndexing: cfg.ArchiveMode,
247248
TrieCleanSize: cfg.TrieCleanLimit * 1024 * 1024,
248249
StateCleanSize: cfg.SnapshotLimit * 1024 * 1024,
250+
JournalDirectory: cfg.TrieJournalDirectory,
249251

250252
// TODO(rjl493456442): The write buffer represents the memory limit used
251253
// for flushing both trie data and state data to disk. The config name

core/rawdb/accessors_state.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,6 @@ func WriteTrieJournal(db ethdb.KeyValueWriter, journal []byte) {
157157
}
158158
}
159159

160-
// DeleteTrieJournal deletes the serialized in-memory trie nodes of layers saved at
161-
// the last shutdown.
162-
func DeleteTrieJournal(db ethdb.KeyValueWriter) {
163-
if err := db.Delete(trieJournalKey); err != nil {
164-
log.Crit("Failed to remove tries journal", "err", err)
165-
}
166-
}
167-
168160
// ReadStateHistoryMeta retrieves the metadata corresponding to the specified
169161
// state history. Compute the position of state history in freezer by minus
170162
// one since the id of first state history starts from one(zero for initial

eth/backend.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,9 +236,13 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) {
236236
VmConfig: vm.Config{
237237
EnablePreimageRecording: config.EnablePreimageRecording,
238238
},
239+
// Enables file journaling for the trie database. The journal files will be stored
240+
// within the data directory. The corresponding paths will be either:
241+
// - DATADIR/triedb/merkle.journal
242+
// - DATADIR/triedb/verkle.journal
243+
TrieJournalDirectory: stack.ResolvePath("triedb"),
239244
}
240245
)
241-
242246
if config.VMTrace != "" {
243247
traceConfig := json.RawMessage("{}")
244248
if config.VMTraceJsonConfig != "" {

triedb/pathdb/database.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"errors"
2222
"fmt"
2323
"io"
24+
"path/filepath"
2425
"sync"
2526
"time"
2627

@@ -120,6 +121,7 @@ type Config struct {
120121
StateCleanSize int // Maximum memory allowance (in bytes) for caching clean state data
121122
WriteBufferSize int // Maximum memory allowance (in bytes) for write buffer
122123
ReadOnly bool // Flag whether the database is opened in read only mode
124+
JournalDirectory string // Absolute path of journal directory (null means the journal data is persisted in key-value store)
123125

124126
// Testing configurations
125127
SnapshotNoBuild bool // Flag Whether the state generation is allowed
@@ -156,6 +158,9 @@ func (c *Config) fields() []interface{} {
156158
} else {
157159
list = append(list, "history", fmt.Sprintf("last %d blocks", c.StateHistory))
158160
}
161+
if c.JournalDirectory != "" {
162+
list = append(list, "journal-dir", c.JournalDirectory)
163+
}
159164
return list
160165
}
161166

@@ -493,7 +498,6 @@ func (db *Database) Enable(root common.Hash) error {
493498
// Drop the stale state journal in persistent database and
494499
// reset the persistent state id back to zero.
495500
batch := db.diskdb.NewBatch()
496-
rawdb.DeleteTrieJournal(batch)
497501
rawdb.DeleteSnapshotRoot(batch)
498502
rawdb.WritePersistentStateID(batch, 0)
499503
if err := batch.Write(); err != nil {
@@ -573,8 +577,6 @@ func (db *Database) Recover(root common.Hash) error {
573577
// disk layer won't be accessible from outside.
574578
db.tree.init(dl)
575579
}
576-
rawdb.DeleteTrieJournal(db.diskdb)
577-
578580
// Explicitly sync the key-value store to ensure all recent writes are
579581
// flushed to disk. This step is crucial to prevent a scenario where
580582
// recent key-value writes are lost due to an application panic, while
@@ -680,6 +682,20 @@ func (db *Database) modifyAllowed() error {
680682
return nil
681683
}
682684

685+
// journalPath returns the absolute path of journal for persisting state data.
686+
func (db *Database) journalPath() string {
687+
if db.config.JournalDirectory == "" {
688+
return ""
689+
}
690+
var fname string
691+
if db.isVerkle {
692+
fname = fmt.Sprintf("verkle.journal")
693+
} else {
694+
fname = fmt.Sprintf("merkle.journal")
695+
}
696+
return filepath.Join(db.config.JournalDirectory, fname)
697+
}
698+
683699
// AccountHistory inspects the account history within the specified range.
684700
//
685701
// Start: State ID of the first history object for the query. 0 implies the first

triedb/pathdb/database_test.go

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,16 @@ import (
2121
"errors"
2222
"fmt"
2323
"math/rand"
24+
"os"
25+
"path/filepath"
26+
"strconv"
2427
"testing"
2528

2629
"github.com/ethereum/go-ethereum/common"
2730
"github.com/ethereum/go-ethereum/core/rawdb"
2831
"github.com/ethereum/go-ethereum/core/types"
2932
"github.com/ethereum/go-ethereum/crypto"
33+
"github.com/ethereum/go-ethereum/ethdb"
3034
"github.com/ethereum/go-ethereum/internal/testrand"
3135
"github.com/ethereum/go-ethereum/rlp"
3236
"github.com/ethereum/go-ethereum/trie"
@@ -121,7 +125,7 @@ type tester struct {
121125
snapStorages map[common.Hash]map[common.Hash]map[common.Hash][]byte // Keyed by the hash of account address and the hash of storage key
122126
}
123127

124-
func newTester(t *testing.T, historyLimit uint64, isVerkle bool, layers int, enableIndex bool) *tester {
128+
func newTester(t *testing.T, historyLimit uint64, isVerkle bool, layers int, enableIndex bool, journalDir string) *tester {
125129
var (
126130
disk, _ = rawdb.Open(rawdb.NewMemoryDatabase(), rawdb.OpenOptions{Ancient: t.TempDir()})
127131
db = New(disk, &Config{
@@ -131,6 +135,7 @@ func newTester(t *testing.T, historyLimit uint64, isVerkle bool, layers int, ena
131135
StateCleanSize: 256 * 1024,
132136
WriteBufferSize: 256 * 1024,
133137
NoAsyncFlush: true,
138+
JournalDirectory: journalDir,
134139
}, isVerkle)
135140

136141
obj = &tester{
@@ -466,7 +471,7 @@ func TestDatabaseRollback(t *testing.T) {
466471
}()
467472

468473
// Verify state histories
469-
tester := newTester(t, 0, false, 32, false)
474+
tester := newTester(t, 0, false, 32, false, "")
470475
defer tester.release()
471476

472477
if err := tester.verifyHistory(); err != nil {
@@ -500,7 +505,7 @@ func TestDatabaseRecoverable(t *testing.T) {
500505
}()
501506

502507
var (
503-
tester = newTester(t, 0, false, 12, false)
508+
tester = newTester(t, 0, false, 12, false, "")
504509
index = tester.bottomIndex()
505510
)
506511
defer tester.release()
@@ -544,7 +549,7 @@ func TestDisable(t *testing.T) {
544549
maxDiffLayers = 128
545550
}()
546551

547-
tester := newTester(t, 0, false, 32, false)
552+
tester := newTester(t, 0, false, 32, false, "")
548553
defer tester.release()
549554

550555
stored := crypto.Keccak256Hash(rawdb.ReadAccountTrieNode(tester.db.diskdb, nil))
@@ -586,7 +591,7 @@ func TestCommit(t *testing.T) {
586591
maxDiffLayers = 128
587592
}()
588593

589-
tester := newTester(t, 0, false, 12, false)
594+
tester := newTester(t, 0, false, 12, false, "")
590595
defer tester.release()
591596

592597
if err := tester.db.Commit(tester.lastHash(), false); err != nil {
@@ -610,20 +615,25 @@ func TestCommit(t *testing.T) {
610615
}
611616

612617
func TestJournal(t *testing.T) {
618+
testJournal(t, "")
619+
testJournal(t, filepath.Join(t.TempDir(), strconv.Itoa(rand.Intn(10000))))
620+
}
621+
622+
func testJournal(t *testing.T, journalDir string) {
613623
// Redefine the diff layer depth allowance for faster testing.
614624
maxDiffLayers = 4
615625
defer func() {
616626
maxDiffLayers = 128
617627
}()
618628

619-
tester := newTester(t, 0, false, 12, false)
629+
tester := newTester(t, 0, false, 12, false, journalDir)
620630
defer tester.release()
621631

622632
if err := tester.db.Journal(tester.lastHash()); err != nil {
623633
t.Errorf("Failed to journal, err: %v", err)
624634
}
625635
tester.db.Close()
626-
tester.db = New(tester.db.diskdb, nil, false)
636+
tester.db = New(tester.db.diskdb, tester.db.config, false)
627637

628638
// Verify states including disk layer and all diff on top.
629639
for i := 0; i < len(tester.roots); i++ {
@@ -640,13 +650,30 @@ func TestJournal(t *testing.T) {
640650
}
641651

642652
func TestCorruptedJournal(t *testing.T) {
653+
testCorruptedJournal(t, "", func(db ethdb.Database) {
654+
// Mutate the journal in disk, it should be regarded as invalid
655+
blob := rawdb.ReadTrieJournal(db)
656+
blob[0] = 0xa
657+
rawdb.WriteTrieJournal(db, blob)
658+
})
659+
660+
directory := filepath.Join(t.TempDir(), strconv.Itoa(rand.Intn(10000)))
661+
testCorruptedJournal(t, directory, func(_ ethdb.Database) {
662+
f, _ := os.OpenFile(filepath.Join(directory, "merkle.journal"), os.O_WRONLY, 0644)
663+
f.WriteAt([]byte{0xa}, 0)
664+
f.Sync()
665+
f.Close()
666+
})
667+
}
668+
669+
func testCorruptedJournal(t *testing.T, journalDir string, modifyFn func(database ethdb.Database)) {
643670
// Redefine the diff layer depth allowance for faster testing.
644671
maxDiffLayers = 4
645672
defer func() {
646673
maxDiffLayers = 128
647674
}()
648675

649-
tester := newTester(t, 0, false, 12, false)
676+
tester := newTester(t, 0, false, 12, false, journalDir)
650677
defer tester.release()
651678

652679
if err := tester.db.Journal(tester.lastHash()); err != nil {
@@ -655,13 +682,10 @@ func TestCorruptedJournal(t *testing.T) {
655682
tester.db.Close()
656683
root := crypto.Keccak256Hash(rawdb.ReadAccountTrieNode(tester.db.diskdb, nil))
657684

658-
// Mutate the journal in disk, it should be regarded as invalid
659-
blob := rawdb.ReadTrieJournal(tester.db.diskdb)
660-
blob[0] = 0xa
661-
rawdb.WriteTrieJournal(tester.db.diskdb, blob)
685+
modifyFn(tester.db.diskdb)
662686

663687
// Verify states, all not-yet-written states should be discarded
664-
tester.db = New(tester.db.diskdb, nil, false)
688+
tester.db = New(tester.db.diskdb, tester.db.config, false)
665689
for i := 0; i < len(tester.roots); i++ {
666690
if tester.roots[i] == root {
667691
if err := tester.verifyState(root); err != nil {
@@ -694,7 +718,7 @@ func TestTailTruncateHistory(t *testing.T) {
694718
maxDiffLayers = 128
695719
}()
696720

697-
tester := newTester(t, 10, false, 12, false)
721+
tester := newTester(t, 10, false, 12, false, "")
698722
defer tester.release()
699723

700724
tester.db.Close()

triedb/pathdb/fileutils_unix.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Copyright 2025 The go-ethereum Authors
2+
// This file is part of the go-ethereum library.
3+
//
4+
// The go-ethereum library is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU Lesser General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
//
9+
// The go-ethereum library is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU Lesser General Public License for more details.
13+
//
14+
// You should have received a copy of the GNU Lesser General Public License
15+
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16+
17+
//go:build !windows
18+
// +build !windows
19+
20+
package pathdb
21+
22+
import (
23+
"errors"
24+
"os"
25+
"syscall"
26+
)
27+
28+
func isErrInvalid(err error) bool {
29+
if errors.Is(err, os.ErrInvalid) {
30+
return true
31+
}
32+
// Go >= 1.8 returns *os.PathError instead
33+
if patherr, ok := err.(*os.PathError); ok && patherr.Err == syscall.EINVAL {
34+
return true
35+
}
36+
return false
37+
}
38+
39+
func syncDir(name string) error {
40+
// As per fsync manpage, Linux seems to expect fsync on directory, however
41+
// some system don't support this, so we will ignore syscall.EINVAL.
42+
//
43+
// From fsync(2):
44+
// Calling fsync() does not necessarily ensure that the entry in the
45+
// directory containing the file has also reached disk. For that an
46+
// explicit fsync() on a file descriptor for the directory is also needed.
47+
f, err := os.Open(name)
48+
if err != nil {
49+
return err
50+
}
51+
defer f.Close()
52+
53+
if err := f.Sync(); err != nil && !isErrInvalid(err) {
54+
return err
55+
}
56+
return nil
57+
}

triedb/pathdb/fileutils_windows.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright 2025 The go-ethereum Authors
2+
// This file is part of the go-ethereum library.
3+
//
4+
// The go-ethereum library is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU Lesser General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
//
9+
// The go-ethereum library is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU Lesser General Public License for more details.
13+
//
14+
// You should have received a copy of the GNU Lesser General Public License
15+
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16+
17+
//go:build windows
18+
// +build windows
19+
20+
package pathdb
21+
22+
func syncDir(name string) error {
23+
// On Windows, fsync on directories is not supported
24+
return nil
25+
}

triedb/pathdb/history_reader_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ func testHistoryReader(t *testing.T, historyLimit uint64) {
126126
}()
127127
//log.SetDefault(log.NewLogger(log.NewTerminalHandlerWithLevel(os.Stderr, log.LevelDebug, true)))
128128

129-
env := newTester(t, historyLimit, false, 64, true)
129+
env := newTester(t, historyLimit, false, 64, true, "")
130130
defer env.release()
131131
waitIndexing(env.db)
132132

0 commit comments

Comments
 (0)