cmd/evm: benchmarking via statetest command + filter by name, index and fork (#30442)

jwasinger · holiman · holiman · commit f57f215f7047 · 2024-11-19T14:50:11.000+01:00
When `evm statetest --bench` is specified, benchmark the execution
similarly to `evm run`.

Also adds the ability to filter tests by name, index and fork. 

---------

Co-authored-by: Martin Holst Swende &lt;martin@swende.se&gt;
diff --git a/cmd/evm/runner.go b/cmd/evm/runner.go
@@ -76,36 +76,53 @@ func readGenesis(genesisPath string) *core.Genesis {
 }
 
 type execStats struct {
-	time           time.Duration // The execution time.
-	allocs         int64         // The number of heap allocations during execution.
-	bytesAllocated int64         // The cumulative number of bytes allocated during execution.
+	Time           time.Duration `json:"time"`           // The execution Time.
+	Allocs         int64         `json:"allocs"`         // The number of heap allocations during execution.
+	BytesAllocated int64         `json:"bytesAllocated"` // The cumulative number of bytes allocated during execution.
+	GasUsed        uint64        `json:"gasUsed"`        // the amount of gas used during execution
 }
 
-func timedExec(bench bool, execFunc func() ([]byte, uint64, error)) (output []byte, gasLeft uint64, stats execStats, err error) {
+func timedExec(bench bool, execFunc func() ([]byte, uint64, error)) ([]byte, execStats, error) {
 	if bench {
+		// Do one warm-up run
+		output, gasUsed, err := execFunc()
 		result := testing.Benchmark(func(b *testing.B) {
 			for i := 0; i < b.N; i++ {
-				output, gasLeft, err = execFunc()
+				haveOutput, haveGasUsed, haveErr := execFunc()
+				if !bytes.Equal(haveOutput, output) {
+					b.Fatalf("output differs, have\n%x\nwant%x\n", haveOutput, output)
+				}
+				if haveGasUsed != gasUsed {
+					b.Fatalf("gas differs, have %v want%v", haveGasUsed, gasUsed)
+				}
+				if haveErr != err {
+					b.Fatalf("err differs, have %v want%v", haveErr, err)
+				}
 			}
 		})
-
 		// Get the average execution time from the benchmarking result.
 		// There are other useful stats here that could be reported.
-		stats.time = time.Duration(result.NsPerOp())
-		stats.allocs = result.AllocsPerOp()
-		stats.bytesAllocated = result.AllocedBytesPerOp()
-	} else {
-		var memStatsBefore, memStatsAfter goruntime.MemStats
-		goruntime.ReadMemStats(&memStatsBefore)
-		startTime := time.Now()
-		output, gasLeft, err = execFunc()
-		stats.time = time.Since(startTime)
-		goruntime.ReadMemStats(&memStatsAfter)
-		stats.allocs = int64(memStatsAfter.Mallocs - memStatsBefore.Mallocs)
-		stats.bytesAllocated = int64(memStatsAfter.TotalAlloc - memStatsBefore.TotalAlloc)
+		stats := execStats{
+			Time:           time.Duration(result.NsPerOp()),
+			Allocs:         result.AllocsPerOp(),
+			BytesAllocated: result.AllocedBytesPerOp(),
+			GasUsed:        gasUsed,
+		}
+		return output, stats, err
 	}
-
-	return output, gasLeft, stats, err
+	var memStatsBefore, memStatsAfter goruntime.MemStats
+	goruntime.ReadMemStats(&memStatsBefore)
+	t0 := time.Now()
+	output, gasUsed, err := execFunc()
+	duration := time.Since(t0)
+	goruntime.ReadMemStats(&memStatsAfter)
+	stats := execStats{
+		Time:           duration,
+		Allocs:         int64(memStatsAfter.Mallocs - memStatsBefore.Mallocs),
+		BytesAllocated: int64(memStatsAfter.TotalAlloc - memStatsBefore.TotalAlloc),
+		GasUsed:        gasUsed,
+	}
+	return output, stats, err
 }
 
 func runCmd(ctx *cli.Context) error {
@@ -265,12 +282,13 @@ func runCmd(ctx *cli.Context) error {
 			statedb.SetCode(receiver, code)
 		}
 		execFunc = func() ([]byte, uint64, error) {
-			return runtime.Call(receiver, input, &runtimeConfig)
+			output, gasLeft, err := runtime.Call(receiver, input, &runtimeConfig)
+			return output, initialGas - gasLeft, err
 		}
 	}
 
 	bench := ctx.Bool(BenchFlag.Name)
-	output, leftOverGas, stats, err := timedExec(bench, execFunc)
+	output, stats, err := timedExec(bench, execFunc)
 
 	if ctx.Bool(DumpFlag.Name) {
 		root, err := statedb.Commit(genesisConfig.Number, true)
@@ -300,7 +318,7 @@ func runCmd(ctx *cli.Context) error {
 execution time:  %v
 allocations:     %d
 allocated bytes: %d
-`, initialGas-leftOverGas, stats.time, stats.allocs, stats.bytesAllocated)
+`, stats.GasUsed, stats.Time, stats.Allocs, stats.BytesAllocated)
 	}
 	if tracer == nil {
 		fmt.Printf("%#x\n", output)
diff --git a/cmd/evm/staterunner.go b/cmd/evm/staterunner.go
@@ -27,26 +27,51 @@ import (
 	"github.com/ethereum/go-ethereum/core/state"
 	"github.com/ethereum/go-ethereum/core/vm"
 	"github.com/ethereum/go-ethereum/eth/tracers/logger"
+	"github.com/ethereum/go-ethereum/internal/flags"
 	"github.com/ethereum/go-ethereum/tests"
 	"github.com/urfave/cli/v2"
 )
 
+var (
+	forkFlag = &cli.StringFlag{
+		Name:     "statetest.fork",
+		Usage:    "The hard-fork to run the test against",
+		Category: flags.VMCategory,
+	}
+	idxFlag = &cli.IntFlag{
+		Name:     "statetest.index",
+		Usage:    "The index of the subtest to run",
+		Category: flags.VMCategory,
+		Value:    -1, // default to select all subtest indices
+	}
+	testNameFlag = &cli.StringFlag{
+		Name:     "statetest.name",
+		Usage:    "The name of the state test to run",
+		Category: flags.VMCategory,
+	}
+)
 var stateTestCommand = &cli.Command{
 	Action:    stateTestCmd,
 	Name:      "statetest",
 	Usage:     "Executes the given state tests. Filenames can be fed via standard input (batch mode) or as an argument (one-off execution).",
 	ArgsUsage: "<file>",
+	Flags: []cli.Flag{
+		forkFlag,
+		idxFlag,
+		testNameFlag,
+	},
 }
 
 // StatetestResult contains the execution status after running a state test, any
 // error that might have occurred and a dump of the final state if requested.
 type StatetestResult struct {
-	Name  string       `json:"name"`
-	Pass  bool         `json:"pass"`
-	Root  *common.Hash `json:"stateRoot,omitempty"`
-	Fork  string       `json:"fork"`
-	Error string       `json:"error,omitempty"`
-	State *state.Dump  `json:"state,omitempty"`
+	Name       string       `json:"name"`
+	Pass       bool         `json:"pass"`
+	Root       *common.Hash `json:"stateRoot,omitempty"`
+	Fork       string       `json:"fork"`
+	Error      string       `json:"error,omitempty"`
+	State      *state.Dump  `json:"state,omitempty"`
+	BenchStats *execStats   `json:"benchStats,omitempty"`
 }
 
 func stateTestCmd(ctx *cli.Context) error {
@@ -67,7 +92,7 @@ func stateTestCmd(ctx *cli.Context) error {
 	}
 	// Load the test content from the input file
 	if len(ctx.Args().First()) != 0 {
-		return runStateTest(ctx.Args().First(), cfg, ctx.Bool(DumpFlag.Name))
+		return runStateTest(ctx, ctx.Args().First(), cfg, ctx.Bool(DumpFlag.Name), ctx.Bool(BenchFlag.Name))
 	}
 	// Read filenames from stdin and execute back-to-back
 	scanner := bufio.NewScanner(os.Stdin)
@@ -76,15 +101,48 @@ func stateTestCmd(ctx *cli.Context) error {
 		if len(fname) == 0 {
 			return nil
 		}
-		if err := runStateTest(fname, cfg, ctx.Bool(DumpFlag.Name)); err != nil {
+		if err := runStateTest(ctx, fname, cfg, ctx.Bool(DumpFlag.Name), ctx.Bool(BenchFlag.Name)); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 
+type stateTestCase struct {
+	name string
+	test tests.StateTest
+	st   tests.StateSubtest
+}
+
+// collectMatchedSubtests returns test cases which match against provided filtering CLI parameters
+func collectMatchedSubtests(ctx *cli.Context, testsByName map[string]tests.StateTest) []stateTestCase {
+	var res []stateTestCase
+	subtestName := ctx.String(testNameFlag.Name)
+	if subtestName != "" {
+		if subtest, ok := testsByName[subtestName]; ok {
+			testsByName := make(map[string]tests.StateTest)
+			testsByName[subtestName] = subtest
+		}
+	}
+	idx := ctx.Int(idxFlag.Name)
+	fork := ctx.String(forkFlag.Name)
+
+	for key, test := range testsByName {
+		for _, st := range test.Subtests() {
+			if idx != -1 && st.Index != idx {
+				continue
+			}
+			if fork != "" && st.Fork != fork {
+				continue
+			}
+			res = append(res, stateTestCase{name: key, st: st, test: test})
+		}
+	}
+	return res
+}
+
 // runStateTest loads the state-test given by fname, and executes the test.
-func runStateTest(fname string, cfg vm.Config, dump bool) error {
+func runStateTest(ctx *cli.Context, fname string, cfg vm.Config, dump bool, bench bool) error {
 	src, err := os.ReadFile(fname)
 	if err != nil {
 		return err
@@ -94,31 +152,38 @@ func runStateTest(fname string, cfg vm.Config, dump bool) error {
 		return err
 	}
 
+	matchingTests := collectMatchedSubtests(ctx, testsByName)
+
 	// Iterate over all the tests, run them and aggregate the results
-	results := make([]StatetestResult, 0, len(testsByName))
-	for key, test := range testsByName {
-		for _, st := range test.Subtests() {
-			// Run the test and aggregate the result
-			result := &StatetestResult{Name: key, Fork: st.Fork, Pass: true}
-			test.Run(st, cfg, false, rawdb.HashScheme, func(err error, tstate *tests.StateTestState) {
-				var root common.Hash
-				if tstate.StateDB != nil {
-					root = tstate.StateDB.IntermediateRoot(false)
-					result.Root = &root
-					fmt.Fprintf(os.Stderr, "{\"stateRoot\": \"%#x\"}\n", root)
-					if dump { // Dump any state to aid debugging
-						cpy, _ := state.New(root, tstate.StateDB.Database())
-						dump := cpy.RawDump(nil)
-						result.State = &dump
-					}
-				}
-				if err != nil {
-					// Test failed, mark as so
-					result.Pass, result.Error = false, err.Error()
+	var results []StatetestResult
+	for _, test := range matchingTests {
+		// Run the test and aggregate the result
+		result := &StatetestResult{Name: test.name, Fork: test.st.Fork, Pass: true}
+		test.test.Run(test.st, cfg, false, rawdb.HashScheme, func(err error, tstate *tests.StateTestState) {
+			var root common.Hash
+			if tstate.StateDB != nil {
+				root = tstate.StateDB.IntermediateRoot(false)
+				result.Root = &root
+				fmt.Fprintf(os.Stderr, "{\"stateRoot\": \"%#x\"}\n", root)
+				if dump { // Dump any state to aid debugging
+					cpy, _ := state.New(root, tstate.StateDB.Database())
+					dump := cpy.RawDump(nil)
+					result.State = &dump
 				}
+			}
+			if err != nil {
+				// Test failed, mark as so
+				result.Pass, result.Error = false, err.Error()
+			}
+		})
+		if bench {
+			_, stats, _ := timedExec(true, func() ([]byte, uint64, error) {
+				_, _, gasUsed, _ := test.test.RunNoVerify(test.st, cfg, false, rawdb.HashScheme)
+				return nil, gasUsed, nil
 			})
-			results = append(results, *result)
+			result.BenchStats = &stats
 		}
+		results = append(results, *result)
 	}
 	out, _ := json.MarshalIndent(results, "", "  ")
 	fmt.Println(string(out))
diff --git a/tests/state_test_util.go b/tests/state_test_util.go
@@ -196,7 +196,7 @@ func (t *StateTest) checkError(subtest StateSubtest, err error) error {
 
 // Run executes a specific subtest and verifies the post-state and logs
 func (t *StateTest) Run(subtest StateSubtest, vmconfig vm.Config, snapshotter bool, scheme string, postCheck func(err error, st *StateTestState)) (result error) {
-	st, root, err := t.RunNoVerify(subtest, vmconfig, snapshotter, scheme)
+	st, root, _, err := t.RunNoVerify(subtest, vmconfig, snapshotter, scheme)
 	// Invoke the callback at the end of function for further analysis.
 	defer func() {
 		postCheck(result, &st)
@@ -228,10 +228,10 @@ func (t *StateTest) Run(subtest StateSubtest, vmconfig vm.Config, snapshotter bo
 
 // RunNoVerify runs a specific subtest and returns the statedb and post-state root.
 // Remember to call state.Close after verifying the test result!
-func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapshotter bool, scheme string) (st StateTestState, root common.Hash, err error) {
+func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapshotter bool, scheme string) (st StateTestState, root common.Hash, gasUsed uint64, err error) {
 	config, eips, err := GetChainConfig(subtest.Fork)
 	if err != nil {
-		return st, common.Hash{}, UnsupportedForkError{subtest.Fork}
+		return st, common.Hash{}, 0, UnsupportedForkError{subtest.Fork}
 	}
 	vmconfig.ExtraEips = eips
 
@@ -250,7 +250,7 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh
 	post := t.json.Post[subtest.Fork][subtest.Index]
 	msg, err := t.json.Tx.toMessage(post, baseFee)
 	if err != nil {
-		return st, common.Hash{}, err
+		return st, common.Hash{}, 0, err
 	}
 
 	{ // Blob transactions may be present after the Cancun fork.
@@ -260,7 +260,7 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh
 		// Here, we just do this shortcut smaller fix, since state tests do not
 		// utilize those codepaths
 		if len(msg.BlobHashes)*params.BlobTxBlobGasPerBlob > params.MaxBlobGasPerBlock {
-			return st, common.Hash{}, errors.New("blob gas exceeds maximum")
+			return st, common.Hash{}, 0, errors.New("blob gas exceeds maximum")
 		}
 	}
 
@@ -269,10 +269,10 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh
 		var ttx types.Transaction
 		err := ttx.UnmarshalBinary(post.TxBytes)
 		if err != nil {
-			return st, common.Hash{}, err
+			return st, common.Hash{}, 0, err
 		}
 		if _, err := types.Sender(types.LatestSigner(config), &ttx); err != nil {
-			return st, common.Hash{}, err
+			return st, common.Hash{}, 0, err
 		}
 	}
 
@@ -322,7 +322,7 @@ func (t *StateTest) RunNoVerify(subtest StateSubtest, vmconfig vm.Config, snapsh
 		receipt := &types.Receipt{GasUsed: vmRet.UsedGas}
 		tracer.OnTxEnd(receipt, nil)
 	}
-	return st, root, err
+	return st, root, vmRet.UsedGas, err
 }
 
 func (t *StateTest) gasLimit(subtest StateSubtest) uint64 {