Skip to content

Commit d468c33

Browse files
authored
cmd/{geth,utils}: add cmd to export preimages in snap enumeration order (#28256)
Adds a subcommand: `geth snapshot export-preimages`, to export preimages of every hash found during a snapshot enumeration: that is, it exports _only the active state_, and not _all_ preimages that have been used but are no longer part of the state. This tool is needed for the verkle transition, in order to distribute the preimages needed for the conversion. Since only the 'active' preimages are exported, the output is shrunk from ~70GB to ~4GB. The order of the output is the order used by the snapshot enumeration, which avoids database thrashing. However, it also means that storage-slot preimages are not deduplicated.
1 parent 5ff929c commit d468c33

File tree

4 files changed

+154
-34
lines changed

4 files changed

+154
-34
lines changed

cmd/geth/chaincmd.go

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -137,20 +137,7 @@ The import-preimages command imports hash preimages from an RLP encoded stream.
137137
It's deprecated, please use "geth db import" instead.
138138
`,
139139
}
140-
exportPreimagesCommand = &cli.Command{
141-
Action: exportPreimages,
142-
Name: "export-preimages",
143-
Usage: "Export the preimage database into an RLP stream",
144-
ArgsUsage: "<dumpfile>",
145-
Flags: flags.Merge([]cli.Flag{
146-
utils.CacheFlag,
147-
utils.SyncModeFlag,
148-
}, utils.DatabaseFlags),
149-
Description: `
150-
The export-preimages command exports hash preimages to an RLP encoded stream.
151-
It's deprecated, please use "geth db export" instead.
152-
`,
153-
}
140+
154141
dumpCommand = &cli.Command{
155142
Action: dump,
156143
Name: "dump",
@@ -386,6 +373,9 @@ func exportChain(ctx *cli.Context) error {
386373
}
387374

388375
// importPreimages imports preimage data from the specified file.
376+
// it is deprecated, and the export function has been removed, but
377+
// the import function is kept around for the time being so that
378+
// older file formats can still be imported.
389379
func importPreimages(ctx *cli.Context) error {
390380
if ctx.Args().Len() < 1 {
391381
utils.Fatalf("This command requires an argument.")
@@ -405,25 +395,6 @@ func importPreimages(ctx *cli.Context) error {
405395
return nil
406396
}
407397

408-
// exportPreimages dumps the preimage data to specified json file in streaming way.
409-
func exportPreimages(ctx *cli.Context) error {
410-
if ctx.Args().Len() < 1 {
411-
utils.Fatalf("This command requires an argument.")
412-
}
413-
stack, _ := makeConfigNode(ctx)
414-
defer stack.Close()
415-
416-
db := utils.MakeChainDatabase(ctx, stack, true)
417-
defer db.Close()
418-
start := time.Now()
419-
420-
if err := utils.ExportPreimages(db, ctx.Args().First()); err != nil {
421-
utils.Fatalf("Export error: %v\n", err)
422-
}
423-
fmt.Printf("Export done in %v\n", time.Since(start))
424-
return nil
425-
}
426-
427398
func parseDumpConfig(ctx *cli.Context, stack *node.Node) (*state.DumpConfig, ethdb.Database, common.Hash, error) {
428399
db := utils.MakeChainDatabase(ctx, stack, true)
429400
defer db.Close()

cmd/geth/main.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,6 @@ func init() {
208208
importCommand,
209209
exportCommand,
210210
importPreimagesCommand,
211-
exportPreimagesCommand,
212211
removedbCommand,
213212
dumpCommand,
214213
dumpGenesisCommand,

cmd/geth/snapshot.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"bytes"
2121
"encoding/json"
2222
"errors"
23+
"fmt"
2324
"os"
2425
"time"
2526

@@ -147,6 +148,17 @@ as the backend data source, making this command a lot faster.
147148
148149
The argument is interpreted as block number or hash. If none is provided, the latest
149150
block is used.
151+
`,
152+
},
153+
{
154+
Action: snapshotExportPreimages,
155+
Name: "export-preimages",
156+
Usage: "Export the preimage in snapshot enumeration order",
157+
ArgsUsage: "<dumpfile> [<root>]",
158+
Flags: utils.DatabaseFlags,
159+
Description: `
160+
The export-preimages command exports hash preimages to a flat file, in exactly
161+
the expected order for the overlay tree migration.
150162
`,
151163
},
152164
},
@@ -604,6 +616,48 @@ func dumpState(ctx *cli.Context) error {
604616
return nil
605617
}
606618

619+
// snapshotExportPreimages dumps the preimage data to a flat file.
620+
func snapshotExportPreimages(ctx *cli.Context) error {
621+
if ctx.NArg() < 1 {
622+
utils.Fatalf("This command requires an argument.")
623+
}
624+
stack, _ := makeConfigNode(ctx)
625+
defer stack.Close()
626+
627+
chaindb := utils.MakeChainDatabase(ctx, stack, true)
628+
defer chaindb.Close()
629+
630+
triedb := utils.MakeTrieDatabase(ctx, chaindb, false, true)
631+
defer triedb.Close()
632+
633+
var root common.Hash
634+
if ctx.NArg() > 1 {
635+
rootBytes := common.FromHex(ctx.Args().Get(1))
636+
if len(rootBytes) != common.HashLength {
637+
return fmt.Errorf("invalid hash: %s", ctx.Args().Get(1))
638+
}
639+
root = common.BytesToHash(rootBytes)
640+
} else {
641+
headBlock := rawdb.ReadHeadBlock(chaindb)
642+
if headBlock == nil {
643+
log.Error("Failed to load head block")
644+
return errors.New("no head block")
645+
}
646+
root = headBlock.Root()
647+
}
648+
snapConfig := snapshot.Config{
649+
CacheSize: 256,
650+
Recovery: false,
651+
NoBuild: true,
652+
AsyncBuild: false,
653+
}
654+
snaptree, err := snapshot.New(snapConfig, chaindb, triedb, root)
655+
if err != nil {
656+
return err
657+
}
658+
return utils.ExportSnapshotPreimages(chaindb, snaptree, ctx.Args().First(), root)
659+
}
660+
607661
// checkAccount iterates the snap data layers, and looks up the given account
608662
// across all layers.
609663
func checkAccount(ctx *cli.Context) error {

cmd/utils/cmd.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"github.com/ethereum/go-ethereum/common"
3434
"github.com/ethereum/go-ethereum/core"
3535
"github.com/ethereum/go-ethereum/core/rawdb"
36+
"github.com/ethereum/go-ethereum/core/state/snapshot"
3637
"github.com/ethereum/go-ethereum/core/types"
3738
"github.com/ethereum/go-ethereum/crypto"
3839
"github.com/ethereum/go-ethereum/eth/ethconfig"
@@ -374,6 +375,101 @@ func ExportPreimages(db ethdb.Database, fn string) error {
374375
return nil
375376
}
376377

378+
// ExportSnapshotPreimages exports the preimages corresponding to the enumeration of
379+
// the snapshot for a given root.
380+
func ExportSnapshotPreimages(chaindb ethdb.Database, snaptree *snapshot.Tree, fn string, root common.Hash) error {
381+
log.Info("Exporting preimages", "file", fn)
382+
383+
fh, err := os.OpenFile(fn, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.ModePerm)
384+
if err != nil {
385+
return err
386+
}
387+
defer fh.Close()
388+
389+
// Enable gzip compressing if file name has gz suffix.
390+
var writer io.Writer = fh
391+
if strings.HasSuffix(fn, ".gz") {
392+
gz := gzip.NewWriter(writer)
393+
defer gz.Close()
394+
writer = gz
395+
}
396+
buf := bufio.NewWriter(writer)
397+
defer buf.Flush()
398+
writer = buf
399+
400+
type hashAndPreimageSize struct {
401+
Hash common.Hash
402+
Size int
403+
}
404+
hashCh := make(chan hashAndPreimageSize)
405+
406+
var (
407+
start = time.Now()
408+
logged = time.Now()
409+
preimages int
410+
)
411+
go func() {
412+
defer close(hashCh)
413+
accIt, err := snaptree.AccountIterator(root, common.Hash{})
414+
if err != nil {
415+
log.Error("Failed to create account iterator", "error", err)
416+
return
417+
}
418+
defer accIt.Release()
419+
420+
for accIt.Next() {
421+
acc, err := types.FullAccount(accIt.Account())
422+
if err != nil {
423+
log.Error("Failed to get full account", "error", err)
424+
return
425+
}
426+
preimages += 1
427+
hashCh <- hashAndPreimageSize{Hash: accIt.Hash(), Size: common.AddressLength}
428+
429+
if acc.Root != (common.Hash{}) && acc.Root != types.EmptyRootHash {
430+
stIt, err := snaptree.StorageIterator(root, accIt.Hash(), common.Hash{})
431+
if err != nil {
432+
log.Error("Failed to create storage iterator", "error", err)
433+
return
434+
}
435+
for stIt.Next() {
436+
preimages += 1
437+
hashCh <- hashAndPreimageSize{Hash: stIt.Hash(), Size: common.HashLength}
438+
439+
if time.Since(logged) > time.Second*8 {
440+
logged = time.Now()
441+
log.Info("Exporting preimages", "count", preimages, "elapsed", common.PrettyDuration(time.Since(start)))
442+
}
443+
}
444+
stIt.Release()
445+
}
446+
if time.Since(logged) > time.Second*8 {
447+
logged = time.Now()
448+
log.Info("Exporting preimages", "count", preimages, "elapsed", common.PrettyDuration(time.Since(start)))
449+
}
450+
}
451+
}()
452+
453+
for item := range hashCh {
454+
preimage := rawdb.ReadPreimage(chaindb, item.Hash)
455+
if len(preimage) == 0 {
456+
return fmt.Errorf("missing preimage for %v", item.Hash)
457+
}
458+
if len(preimage) != item.Size {
459+
return fmt.Errorf("invalid preimage size, have %d", len(preimage))
460+
}
461+
rlpenc, err := rlp.EncodeToBytes(preimage)
462+
if err != nil {
463+
return fmt.Errorf("error encoding preimage: %w", err)
464+
}
465+
if _, err := writer.Write(rlpenc); err != nil {
466+
return fmt.Errorf("failed to write preimage: %w", err)
467+
}
468+
}
469+
log.Info("Exported preimages", "count", preimages, "elapsed", common.PrettyDuration(time.Since(start)), "file", fn)
470+
return nil
471+
}
472+
377473
// exportHeader is used in the export/import flow. When we do an export,
378474
// the first element we output is the exportHeader.
379475
// Whenever a backwards-incompatible change is made, the Version header

0 commit comments

Comments
 (0)