diff --git a/src/ethereum_test_specs/__init__.py b/src/ethereum_test_specs/__init__.py index 790e2b4351f..9a714640746 100644 --- a/src/ethereum_test_specs/__init__.py +++ b/src/ethereum_test_specs/__init__.py @@ -2,6 +2,8 @@ from .base import BaseTest, TestSpec from .base_static import BaseStaticTest +from .benchmark import BenchmarkTest, BenchmarkTestFiller, BenchmarkTestSpec +from .benchmark_state import BenchmarkStateTest, BenchmarkStateTestFiller, BenchmarkStateTestSpec from .blobs import BlobsTest, BlobsTestFiller, BlobsTestSpec from .blockchain import ( BlockchainTest, @@ -23,6 +25,12 @@ __all__ = ( "BaseStaticTest", "BaseTest", + "BenchmarkTest", + "BenchmarkTestFiller", + "BenchmarkTestSpec", + "BenchmarkStateTest", + "BenchmarkStateTestFiller", + "BenchmarkStateTestSpec", "BlobsTest", "BlobsTestFiller", "BlobsTestSpec", diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py new file mode 100644 index 00000000000..fb0eb8b68f1 --- /dev/null +++ b/src/ethereum_test_specs/benchmark.py @@ -0,0 +1,164 @@ +"""Ethereum benchmark test spec definition and filler.""" + +from typing import Callable, ClassVar, Dict, Generator, List, Optional, Sequence, Type + +import pytest +from pydantic import Field + +from ethereum_clis import TransitionTool +from ethereum_test_base_types import HexNumber +from ethereum_test_exceptions import BlockException, TransactionException +from ethereum_test_execution import ( + BaseExecute, + ExecuteFormat, + LabeledExecuteFormat, + TransactionPost, +) +from ethereum_test_fixtures import ( + BaseFixture, + BlockchainEngineFixture, + BlockchainEngineXFixture, + BlockchainFixture, + FixtureFormat, + LabeledFixtureFormat, +) +from ethereum_test_forks import Fork +from ethereum_test_types import Alloc, Environment, Transaction + +from .base import BaseTest +from .blockchain import Block, BlockchainTest + + +class BenchmarkTest(BaseTest): + """Test type designed specifically for benchmark test cases.""" + + pre: Alloc + post: Alloc + tx: Optional[Transaction] = None + blocks: Optional[List[Block]] = None + block_exception: ( + List[TransactionException | BlockException] | TransactionException | BlockException | None + ) = None + env: Environment = Field(default_factory=Environment) + expected_benchmark_gas_used: int | None = None + + supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [ + BlockchainFixture, + BlockchainEngineFixture, + BlockchainEngineXFixture, + ] + + supported_execute_formats: ClassVar[Sequence[LabeledExecuteFormat]] = [ + LabeledExecuteFormat( + TransactionPost, + "benchmark_test", + "An execute test derived from a benchmark test", + ), + ] + + supported_markers: ClassVar[Dict[str, str]] = { + "blockchain_test_engine_only": "Only generate a blockchain test engine fixture", + "blockchain_test_only": "Only generate a blockchain test fixture", + } + + @classmethod + def pytest_parameter_name(cls) -> str: + """Return the parameter name used in pytest to select this spec type.""" + return "benchmark_test" + + @classmethod + def discard_fixture_format_by_marks( + cls, + fixture_format: FixtureFormat, + fork: Fork, + markers: List[pytest.Mark], + ) -> bool: + """Discard a fixture format from filling if the appropriate marker is used.""" + if "blockchain_test_only" in [m.name for m in markers]: + return fixture_format != BlockchainFixture + if "blockchain_test_engine_only" in [m.name for m in markers]: + return fixture_format != BlockchainEngineFixture + return False + + def get_genesis_environment(self, fork: Fork) -> Environment: + """Get the genesis environment for this benchmark test.""" + return self.env + + def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[Transaction]: + """Split a transaction that exceeds the gas limit cap into multiple transactions.""" + if (gas_limit_cap is None) or (tx.gas_limit <= gas_limit_cap): + return [tx] + + total_gas = int(self.expected_benchmark_gas_used or self.env.gas_limit) + print(f"total_gas: {total_gas}") + num_splits = total_gas // gas_limit_cap + + split_transactions = [] + for i in range(num_splits): + split_tx = tx.model_copy() + total_gas -= gas_limit_cap + split_tx.gas_limit = HexNumber(total_gas if i == num_splits - 1 else gas_limit_cap) + split_tx.nonce = HexNumber(tx.nonce + i) + split_transactions.append(split_tx) + + return split_transactions + + def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: + """Create a BlockchainTest from this BenchmarkTest.""" + if self.blocks is not None: + return BlockchainTest.from_test( + base_test=self, + genesis_environment=self.env, + pre=self.pre, + post=self.post, + blocks=self.blocks, + ) + elif self.tx is not None: + gas_limit_cap = fork.transaction_gas_limit_cap() + + transactions = self.split_transaction(self.tx, gas_limit_cap) + + blocks = [Block(txs=transactions)] + + return BlockchainTest.from_test( + base_test=self, + pre=self.pre, + post=self.post, + blocks=blocks, + genesis_environment=self.env, + ) + else: + raise ValueError("Cannot create BlockchainTest without transactions or blocks") + + def generate( + self, + t8n: TransitionTool, + fork: Fork, + fixture_format: FixtureFormat, + ) -> BaseFixture: + """Generate the blockchain test fixture.""" + self.check_exception_test(exception=self.tx.error is not None if self.tx else False) + if fixture_format in BlockchainTest.supported_fixture_formats: + return self.generate_blockchain_test(fork=fork).generate( + t8n=t8n, fork=fork, fixture_format=fixture_format + ) + else: + raise Exception(f"Unsupported fixture format: {fixture_format}") + + def execute( + self, + *, + fork: Fork, + execute_format: ExecuteFormat, + ) -> BaseExecute: + """Execute the benchmark test by sending it to the live network.""" + if execute_format == TransactionPost: + return TransactionPost( + blocks=[[self.tx]], + post=self.post, + ) + raise Exception(f"Unsupported execute format: {execute_format}") + + +BenchmarkTestSpec = Callable[[str], Generator[BenchmarkTest, None, None]] +BenchmarkTestFiller = Type[BenchmarkTest] diff --git a/src/ethereum_test_specs/benchmark_state.py b/src/ethereum_test_specs/benchmark_state.py new file mode 100644 index 00000000000..e9e959f0615 --- /dev/null +++ b/src/ethereum_test_specs/benchmark_state.py @@ -0,0 +1,229 @@ +"""Ethereum benchmark state test spec definition and filler.""" + +import math +from pprint import pprint +from typing import Callable, ClassVar, Generator, List, Sequence, Type + +from pydantic import ConfigDict + +from ethereum_clis import TransitionTool +from ethereum_test_base_types import HexNumber +from ethereum_test_execution import ( + BaseExecute, + ExecuteFormat, + LabeledExecuteFormat, + TransactionPost, +) +from ethereum_test_fixtures import ( + BaseFixture, + FixtureFormat, + LabeledFixtureFormat, + StateFixture, +) +from ethereum_test_fixtures.common import FixtureBlobSchedule +from ethereum_test_fixtures.state import ( + FixtureConfig, + FixtureEnvironment, + FixtureForkPost, + FixtureTransaction, +) +from ethereum_test_forks import Fork +from ethereum_test_types import Alloc, Environment, Transaction +from ethereum_test_vm import Bytecode + +from .base import BaseTest, OpMode +from .blockchain import Block, BlockchainTest +from .debugging import print_traces +from .helpers import verify_transactions + + +class BenchmarkStateTest(BaseTest): + """Test type designed specifically for benchmark state test cases with full verification.""" + + pre: Alloc + post: Alloc + tx: Transaction + gas_benchmark_value: int + setup_bytecode: Bytecode | None = None + attack_bytecode: Bytecode | None = None + env: Environment + chain_id: int = 1 + + model_config = ConfigDict(arbitrary_types_allowed=True) + + supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [ + StateFixture, + ] + [ + LabeledFixtureFormat( + fixture_format, + f"{fixture_format.format_name}_from_benchmark_state_test", + f"A {fixture_format.format_name} generated from a benchmark_state_test", + ) + for fixture_format in BlockchainTest.supported_fixture_formats + ] + + supported_execute_formats: ClassVar[Sequence[LabeledExecuteFormat]] = [ + LabeledExecuteFormat( + TransactionPost, + "benchmark_state_test_with_verification", + "An execute test derived from a benchmark state test with verification", + ), + ] + + def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[Transaction]: + """Split a transaction that exceeds the gas limit cap into multiple transactions.""" + if (gas_limit_cap is None) or (tx.gas_limit <= gas_limit_cap): + return [tx] + + total_gas = int(tx.gas_limit) + num_splits = math.ceil(total_gas / gas_limit_cap) + + split_transactions = [] + remaining_gas = total_gas + for i in range(num_splits): + split_tx = tx.model_copy() + split_tx.gas_limit = HexNumber(min(gas_limit_cap, remaining_gas)) + split_tx.nonce = HexNumber(tx.nonce + i) + split_transactions.append(split_tx) + remaining_gas -= gas_limit_cap + + return split_transactions + + def make_benchmark_state_test_fixture( + self, + t8n: TransitionTool, + fork: Fork, + ) -> StateFixture: + """Create a fixture from the benchmark state test definition with full verification.""" + # We can't generate a state test fixture that names a transition fork, + # so we get the fork at the block number and timestamp of the state test + fork = fork.fork_at(self.env.number, self.env.timestamp) + + env = self.env.set_fork_requirements(fork) + tx = self.tx.with_signature_and_sender(keep_secret_key=True) + pre_alloc = Alloc.merge( + Alloc.model_validate(fork.pre_allocation()), + self.pre, + ) + + # Verification 1: Check for empty accounts + if empty_accounts := pre_alloc.empty_accounts(): + raise Exception(f"Empty accounts in pre state: {empty_accounts}") + + transition_tool_output = t8n.evaluate( + transition_tool_data=TransitionTool.TransitionToolData( + alloc=pre_alloc, + txs=[tx], + env=env, + fork=fork, + chain_id=self.chain_id, + reward=0, # Reward on state tests is always zero + blob_schedule=fork.blob_schedule(), + state_test=True, + ), + debug_output_path=self.get_next_transition_tool_output_path(), + slow_request=self.is_tx_gas_heavy_test(), + ) + + # Verification 2: Post-allocation verification + try: + self.post.verify_post_alloc(transition_tool_output.alloc) + except Exception as e: + print_traces(t8n.get_traces()) + raise e + + # Verification 3: Transaction verification + try: + verify_transactions( + txs=[tx], + result=transition_tool_output.result, + transition_tool_exceptions_reliable=t8n.exception_mapper.reliable, + ) + except Exception as e: + print_traces(t8n.get_traces()) + pprint(transition_tool_output.result) + pprint(transition_tool_output.alloc) + raise e + + # Verification 4: Benchmark gas validation + if self._operation_mode == OpMode.BENCHMARKING: + expected_benchmark_gas_used = self.gas_benchmark_value + gas_used = int(transition_tool_output.result.gas_used) + assert expected_benchmark_gas_used is not None, "gas_benchmark_value is not set" + assert gas_used == expected_benchmark_gas_used, ( + f"gas_used ({gas_used}) does not match gas_benchmark_value " + f"({expected_benchmark_gas_used})" + f", difference: {gas_used - expected_benchmark_gas_used}" + ) + + return StateFixture( + env=FixtureEnvironment(**env.model_dump(exclude_none=True)), + pre=pre_alloc, + post={ + fork: [ + FixtureForkPost( + state_root=transition_tool_output.result.state_root, + logs_hash=transition_tool_output.result.logs_hash, + tx_bytes=tx.rlp(), + expect_exception=tx.error, + state=transition_tool_output.alloc, + ) + ] + }, + transaction=FixtureTransaction.from_transaction(tx), + config=FixtureConfig( + blob_schedule=FixtureBlobSchedule.from_blob_schedule(fork.blob_schedule()), + chain_id=self.chain_id, + ), + ) + + def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: + """Create a BlockchainTest from this BenchmarkStateTestWithVerification.""" + gas_limit_cap = fork.transaction_gas_limit_cap() + + transactions = self.split_transaction(self.tx, gas_limit_cap) + + blocks = [Block(txs=transactions)] + + return BlockchainTest.from_test( + base_test=self, + pre=self.pre, + post=self.post, + blocks=blocks, + genesis_environment=self.env, + ) + + def generate( + self, + t8n: TransitionTool, + fork: Fork, + fixture_format: FixtureFormat, + ) -> BaseFixture: + """Generate the test fixture.""" + self.check_exception_test(exception=self.tx.error is not None) + if fixture_format in BlockchainTest.supported_fixture_formats: + return self.generate_blockchain_test(fork=fork).generate( + t8n=t8n, fork=fork, fixture_format=fixture_format + ) + elif fixture_format == StateFixture: + return self.make_benchmark_state_test_fixture(t8n, fork) + + raise Exception(f"Unknown fixture format: {fixture_format}") + + def execute( + self, + *, + fork: Fork, + execute_format: ExecuteFormat, + ) -> BaseExecute: + """Execute the benchmark state test by sending it to the live network.""" + if execute_format == TransactionPost: + return TransactionPost( + blocks=[[self.tx]], + post=self.post, + ) + raise Exception(f"Unsupported execute format: {execute_format}") + + +BenchmarkStateTestFiller = Type[BenchmarkStateTest] +BenchmarkStateTestSpec = Callable[[str], Generator[BenchmarkStateTest, None, None]] diff --git a/src/ethereum_test_tools/__init__.py b/src/ethereum_test_tools/__init__.py index 1617bf76ef5..47c4da0aa79 100644 --- a/src/ethereum_test_tools/__init__.py +++ b/src/ethereum_test_tools/__init__.py @@ -25,6 +25,10 @@ from ethereum_test_fixtures import BaseFixture, FixtureCollector from ethereum_test_specs import ( BaseTest, + BenchmarkStateTest, + BenchmarkStateTestFiller, + BenchmarkTest, + BenchmarkTestFiller, BlobsTest, BlobsTestFiller, BlockchainTest, @@ -98,6 +102,10 @@ "AuthorizationTuple", "BaseFixture", "BaseTest", + "BenchmarkTest", + "BenchmarkTestFiller", + "BenchmarkStateTest", + "BenchmarkStateTestFiller", "Blob", "BlobsTest", "BlobsTestFiller", diff --git a/tests/benchmark/test_worst_blocks.py b/tests/benchmark/test_worst_blocks.py index 327cbae00c9..133761a445f 100644 --- a/tests/benchmark/test_worst_blocks.py +++ b/tests/benchmark/test_worst_blocks.py @@ -15,8 +15,9 @@ Account, Address, Alloc, + BenchmarkTestFiller, Block, - BlockchainTestFiller, + Environment, Hash, StateTestFiller, Transaction, @@ -111,8 +112,9 @@ def ether_transfer_case( ["a_to_a", "a_to_b", "diff_acc_to_b", "a_to_diff_acc", "diff_acc_to_diff_acc"], ) def test_block_full_of_ether_transfers( - blockchain_test: BlockchainTestFiller, + benchmark_test: BenchmarkTestFiller, pre: Alloc, + env: Environment, case_id: str, ether_transfer_case, iteration_count: int, @@ -153,7 +155,8 @@ def test_block_full_of_ether_transfers( else {receiver: Account(balance=balance) for receiver, balance in balances.items()} ) - blockchain_test( + benchmark_test( + genesis_environment=env, pre=pre, post=post_state, blocks=[Block(txs=txs)], diff --git a/tests/benchmark/test_worst_compute.py b/tests/benchmark/test_worst_compute.py index 6d9a48e692b..a81377affe5 100644 --- a/tests/benchmark/test_worst_compute.py +++ b/tests/benchmark/test_worst_compute.py @@ -19,6 +19,7 @@ from ethereum_test_tools import ( Address, Alloc, + BenchmarkStateTestFiller, Block, BlockchainTestFiller, Bytecode, @@ -2513,8 +2514,9 @@ def test_worst_calldataload( ], ) def test_worst_swap( - state_test: StateTestFiller, + benchmark_state_test: BenchmarkStateTestFiller, pre: Alloc, + env: Environment, fork: Fork, opcode: Opcode, gas_benchmark_value: int, @@ -2534,8 +2536,10 @@ def test_worst_swap( sender=pre.fund_eoa(), ) - state_test( + benchmark_state_test( + env=env, pre=pre, + gas_benchmark_value=gas_benchmark_value, post={}, tx=tx, )