Skip to content

Commit 8fe09df

Browse files
s1nafjl
andauthored
cmd/geth: add prune history command (#31384)
This adds a new subcommand 'geth prune-history' that removes the pre-merge history on supported networks. Geth is not fully ready to work in this mode, please do not run this command on your production node. --------- Co-authored-by: Felix Lange <[email protected]>
1 parent 1886922 commit 8fe09df

File tree

8 files changed

+171
-25
lines changed

8 files changed

+171
-25
lines changed

cmd/geth/chaincmd.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
"github.com/ethereum/go-ethereum/core/state"
3636
"github.com/ethereum/go-ethereum/core/types"
3737
"github.com/ethereum/go-ethereum/crypto"
38+
"github.com/ethereum/go-ethereum/eth/ethconfig"
3839
"github.com/ethereum/go-ethereum/ethdb"
3940
"github.com/ethereum/go-ethereum/internal/era"
4041
"github.com/ethereum/go-ethereum/log"
@@ -189,6 +190,18 @@ It's deprecated, please use "geth db import" instead.
189190
This command dumps out the state for a given block (or latest, if none provided).
190191
`,
191192
}
193+
194+
pruneCommand = &cli.Command{
195+
Action: pruneHistory,
196+
Name: "prune-history",
197+
Usage: "Prune blockchain history (block bodies and receipts) up to the merge block",
198+
ArgsUsage: "",
199+
Flags: utils.DatabaseFlags,
200+
Description: `
201+
The prune-history command removes historical block bodies and receipts from the
202+
blockchain database up to the merge block, while preserving block headers. This
203+
helps reduce storage requirements for nodes that don't need full historical data.`,
204+
}
192205
)
193206

194207
// initGenesis will initialise the given JSON format genesis file and writes it as
@@ -598,3 +611,51 @@ func hashish(x string) bool {
598611
_, err := strconv.Atoi(x)
599612
return err != nil
600613
}
614+
615+
func pruneHistory(ctx *cli.Context) error {
616+
stack, _ := makeConfigNode(ctx)
617+
defer stack.Close()
618+
619+
// Open the chain database
620+
chain, chaindb := utils.MakeChain(ctx, stack, false)
621+
defer chaindb.Close()
622+
defer chain.Stop()
623+
624+
// Determine the prune point. This will be the first PoS block.
625+
prunePoint, ok := ethconfig.HistoryPrunePoints[chain.Genesis().Hash()]
626+
if !ok || prunePoint == nil {
627+
return errors.New("prune point not found")
628+
}
629+
var (
630+
mergeBlock = prunePoint.BlockNumber
631+
mergeBlockHash = prunePoint.BlockHash.Hex()
632+
)
633+
634+
// Check we're far enough past merge to ensure all data is in freezer
635+
currentHeader := chain.CurrentHeader()
636+
if currentHeader == nil {
637+
return errors.New("current header not found")
638+
}
639+
if currentHeader.Number.Uint64() < mergeBlock+params.FullImmutabilityThreshold {
640+
return fmt.Errorf("chain not far enough past merge block, need %d more blocks",
641+
mergeBlock+params.FullImmutabilityThreshold-currentHeader.Number.Uint64())
642+
}
643+
644+
// Double-check the prune block in db has the expected hash.
645+
hash := rawdb.ReadCanonicalHash(chaindb, mergeBlock)
646+
if hash != common.HexToHash(mergeBlockHash) {
647+
return fmt.Errorf("merge block hash mismatch: got %s, want %s", hash.Hex(), mergeBlockHash)
648+
}
649+
650+
log.Info("Starting history pruning", "head", currentHeader.Number, "tail", mergeBlock, "tailHash", mergeBlockHash)
651+
start := time.Now()
652+
rawdb.PruneTransactionIndex(chaindb, mergeBlock)
653+
if _, err := chaindb.TruncateTail(mergeBlock); err != nil {
654+
return fmt.Errorf("failed to truncate ancient data: %v", err)
655+
}
656+
log.Info("History pruning completed", "tail", mergeBlock, "elapsed", common.PrettyDuration(time.Since(start)))
657+
658+
// TODO(s1na): what if there is a crash between the two prune operations?
659+
660+
return nil
661+
}

cmd/geth/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ func init() {
226226
removedbCommand,
227227
dumpCommand,
228228
dumpGenesisCommand,
229+
pruneCommand,
229230
// See accountcmd.go:
230231
accountCommand,
231232
walletCommand,

core/blockchain.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,8 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis
332332
bc.prefetcher = newStatePrefetcher(chainConfig, bc.hc)
333333
bc.processor = NewStateProcessor(chainConfig, bc.hc)
334334

335-
bc.genesisBlock = bc.GetBlockByNumber(0)
335+
genesisHeader := bc.GetHeaderByNumber(0)
336+
bc.genesisBlock = types.NewBlockWithHeader(genesisHeader)
336337
if bc.genesisBlock == nil {
337338
return nil, ErrNoGenesis
338339
}

core/rawdb/accessors_indexes.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,14 @@ func DeleteTxLookupEntries(db ethdb.KeyValueWriter, hashes []common.Hash) {
103103
// DeleteAllTxLookupEntries purges all the transaction indexes in the database.
104104
// If condition is specified, only the entry with condition as True will be
105105
// removed; If condition is not specified, the entry is deleted.
106-
func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func([]byte) bool) {
106+
func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func(common.Hash, []byte) bool) {
107107
iter := NewKeyLengthIterator(db.NewIterator(txLookupPrefix, nil), common.HashLength+len(txLookupPrefix))
108108
defer iter.Release()
109109

110110
batch := db.NewBatch()
111111
for iter.Next() {
112-
if condition == nil || condition(iter.Value()) {
112+
txhash := common.Hash(iter.Key()[1:])
113+
if condition == nil || condition(txhash, iter.Value()) {
113114
batch.Delete(iter.Key())
114115
}
115116
if batch.ValueSize() >= ethdb.IdealBatchSize {

core/rawdb/chain_iterator.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package rawdb
1818

1919
import (
20+
"encoding/binary"
2021
"runtime"
2122
"sync/atomic"
2223
"time"
@@ -361,3 +362,38 @@ func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt ch
361362
func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
362363
unindexTransactions(db, from, to, interrupt, hook, false)
363364
}
365+
366+
// PruneTransactionIndex removes all tx index entries below a certain block number.
367+
func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) {
368+
tail := ReadTxIndexTail(db)
369+
if tail == nil || *tail > pruneBlock {
370+
return // no index, or index ends above pruneBlock
371+
}
372+
// There are blocks below pruneBlock in the index. Iterate the entire index to remove
373+
// their entries. Note if this fails, the index is messed up, but tail still points to
374+
// the old tail.
375+
var count, removed int
376+
DeleteAllTxLookupEntries(db, func(txhash common.Hash, v []byte) bool {
377+
count++
378+
if count%10000000 == 0 {
379+
log.Info("Pruning tx index", "count", count, "removed", removed)
380+
}
381+
if len(v) > 8 {
382+
log.Error("Skipping legacy tx index entry", "hash", txhash)
383+
return false
384+
}
385+
bn := decodeNumber(v)
386+
if bn < pruneBlock {
387+
removed++
388+
return true
389+
}
390+
return false
391+
})
392+
WriteTxIndexTail(db, pruneBlock)
393+
}
394+
395+
func decodeNumber(b []byte) uint64 {
396+
var numBuffer [8]byte
397+
copy(numBuffer[8-len(b):], b)
398+
return binary.BigEndian.Uint64(numBuffer[:])
399+
}

core/rawdb/chain_iterator_test.go

Lines changed: 65 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525

2626
"github.com/ethereum/go-ethereum/common"
2727
"github.com/ethereum/go-ethereum/core/types"
28+
"github.com/ethereum/go-ethereum/ethdb"
2829
)
2930

3031
func TestChainIterator(t *testing.T) {
@@ -102,19 +103,18 @@ func TestChainIterator(t *testing.T) {
102103
}
103104
}
104105

105-
func TestIndexTransactions(t *testing.T) {
106-
// Construct test chain db
107-
chainDb := NewMemoryDatabase()
108-
109-
var block *types.Block
106+
func initDatabaseWithTransactions(db ethdb.Database) ([]*types.Block, []*types.Transaction) {
107+
var blocks []*types.Block
110108
var txs []*types.Transaction
111109
to := common.BytesToAddress([]byte{0x11})
112110

113111
// Write empty genesis block
114-
block = types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, newTestHasher())
115-
WriteBlock(chainDb, block)
116-
WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64())
112+
block := types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, newTestHasher())
113+
WriteBlock(db, block)
114+
WriteCanonicalHash(db, block.Hash(), block.NumberU64())
115+
blocks = append(blocks, block)
117116

117+
// Create transactions.
118118
for i := uint64(1); i <= 10; i++ {
119119
var tx *types.Transaction
120120
if i%2 == 0 {
@@ -138,48 +138,59 @@ func TestIndexTransactions(t *testing.T) {
138138
})
139139
}
140140
txs = append(txs, tx)
141-
block = types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, &types.Body{Transactions: types.Transactions{tx}}, nil, newTestHasher())
142-
WriteBlock(chainDb, block)
143-
WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64())
141+
block := types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, &types.Body{Transactions: types.Transactions{tx}}, nil, newTestHasher())
142+
WriteBlock(db, block)
143+
WriteCanonicalHash(db, block.Hash(), block.NumberU64())
144+
blocks = append(blocks, block)
144145
}
146+
147+
return blocks, txs
148+
}
149+
150+
func TestIndexTransactions(t *testing.T) {
151+
// Construct test chain db
152+
chainDB := NewMemoryDatabase()
153+
154+
_, txs := initDatabaseWithTransactions(chainDB)
155+
145156
// verify checks whether the tx indices in the range [from, to)
146157
// is expected.
147158
verify := func(from, to int, exist bool, tail uint64) {
148159
for i := from; i < to; i++ {
149160
if i == 0 {
150161
continue
151162
}
152-
number := ReadTxLookupEntry(chainDb, txs[i-1].Hash())
163+
number := ReadTxLookupEntry(chainDB, txs[i-1].Hash())
153164
if exist && number == nil {
154165
t.Fatalf("Transaction index %d missing", i)
155166
}
156167
if !exist && number != nil {
157168
t.Fatalf("Transaction index %d is not deleted", i)
158169
}
159170
}
160-
number := ReadTxIndexTail(chainDb)
171+
number := ReadTxIndexTail(chainDB)
161172
if number == nil || *number != tail {
162173
t.Fatalf("Transaction tail mismatch")
163174
}
164175
}
165-
IndexTransactions(chainDb, 5, 11, nil, false)
176+
IndexTransactions(chainDB, 5, 11, nil, false)
166177
verify(5, 11, true, 5)
167178
verify(0, 5, false, 5)
168179

169-
IndexTransactions(chainDb, 0, 5, nil, false)
180+
IndexTransactions(chainDB, 0, 5, nil, false)
170181
verify(0, 11, true, 0)
171182

172-
UnindexTransactions(chainDb, 0, 5, nil, false)
183+
UnindexTransactions(chainDB, 0, 5, nil, false)
173184
verify(5, 11, true, 5)
174185
verify(0, 5, false, 5)
175186

176-
UnindexTransactions(chainDb, 5, 11, nil, false)
187+
UnindexTransactions(chainDB, 5, 11, nil, false)
177188
verify(0, 11, false, 11)
178189

179190
// Testing corner cases
180191
signal := make(chan struct{})
181192
var once sync.Once
182-
indexTransactionsForTesting(chainDb, 5, 11, signal, func(n uint64) bool {
193+
indexTransactionsForTesting(chainDB, 5, 11, signal, func(n uint64) bool {
183194
if n <= 8 {
184195
once.Do(func() {
185196
close(signal)
@@ -190,11 +201,11 @@ func TestIndexTransactions(t *testing.T) {
190201
})
191202
verify(9, 11, true, 9)
192203
verify(0, 9, false, 9)
193-
IndexTransactions(chainDb, 0, 9, nil, false)
204+
IndexTransactions(chainDB, 0, 9, nil, false)
194205

195206
signal = make(chan struct{})
196207
var once2 sync.Once
197-
unindexTransactionsForTesting(chainDb, 0, 11, signal, func(n uint64) bool {
208+
unindexTransactionsForTesting(chainDB, 0, 11, signal, func(n uint64) bool {
198209
if n >= 8 {
199210
once2.Do(func() {
200211
close(signal)
@@ -206,3 +217,37 @@ func TestIndexTransactions(t *testing.T) {
206217
verify(8, 11, true, 8)
207218
verify(0, 8, false, 8)
208219
}
220+
221+
func TestPruneTransactionIndex(t *testing.T) {
222+
chainDB := NewMemoryDatabase()
223+
blocks, _ := initDatabaseWithTransactions(chainDB)
224+
lastBlock := blocks[len(blocks)-1].NumberU64()
225+
pruneBlock := lastBlock - 3
226+
227+
IndexTransactions(chainDB, 0, lastBlock+1, nil, false)
228+
229+
// Check all transactions are in index.
230+
for _, block := range blocks {
231+
for _, tx := range block.Transactions() {
232+
num := ReadTxLookupEntry(chainDB, tx.Hash())
233+
if num == nil || *num != block.NumberU64() {
234+
t.Fatalf("wrong TxLookup entry: %x -> %v", tx.Hash(), num)
235+
}
236+
}
237+
}
238+
239+
PruneTransactionIndex(chainDB, pruneBlock)
240+
241+
// Check transactions from old blocks not included.
242+
for _, block := range blocks {
243+
for _, tx := range block.Transactions() {
244+
num := ReadTxLookupEntry(chainDB, tx.Hash())
245+
if block.NumberU64() < pruneBlock && num != nil {
246+
t.Fatalf("TxLookup entry not removed: %x -> %v", tx.Hash(), num)
247+
}
248+
if block.NumberU64() >= pruneBlock && (num == nil || *num != block.NumberU64()) {
249+
t.Fatalf("wrong TxLookup entry after pruning: %x -> %v", tx.Hash(), num)
250+
}
251+
}
252+
}
253+
}

core/txindexer.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"errors"
2121
"fmt"
2222

23+
"github.com/ethereum/go-ethereum/common"
2324
"github.com/ethereum/go-ethereum/core/rawdb"
2425
"github.com/ethereum/go-ethereum/ethdb"
2526
"github.com/ethereum/go-ethereum/log"
@@ -186,7 +187,7 @@ func (indexer *txIndexer) repair(head uint64) {
186187
// potentially leaving dangling indexes in the database.
187188
// However, this is considered acceptable.
188189
rawdb.WriteTxIndexTail(indexer.db, indexer.cutoff)
189-
rawdb.DeleteAllTxLookupEntries(indexer.db, func(blob []byte) bool {
190+
rawdb.DeleteAllTxLookupEntries(indexer.db, func(txhash common.Hash, blob []byte) bool {
190191
n := rawdb.DecodeTxLookupEntry(blob, indexer.db)
191192
return n != nil && *n < indexer.cutoff
192193
})

tests/testdata

0 commit comments

Comments
 (0)