diff --git a/src/ethereum_test_specs/__init__.py b/src/ethereum_test_specs/__init__.py
index 790e2b4351f..9a714640746 100644
--- a/src/ethereum_test_specs/__init__.py
+++ b/src/ethereum_test_specs/__init__.py
@@ -2,6 +2,8 @@
 
 from .base import BaseTest, TestSpec
 from .base_static import BaseStaticTest
+from .benchmark import BenchmarkTest, BenchmarkTestFiller, BenchmarkTestSpec
+from .benchmark_state import BenchmarkStateTest, BenchmarkStateTestFiller, BenchmarkStateTestSpec
 from .blobs import BlobsTest, BlobsTestFiller, BlobsTestSpec
 from .blockchain import (
     BlockchainTest,
@@ -23,6 +25,12 @@
 __all__ = (
     "BaseStaticTest",
     "BaseTest",
+    "BenchmarkTest",
+    "BenchmarkTestFiller",
+    "BenchmarkTestSpec",
+    "BenchmarkStateTest",
+    "BenchmarkStateTestFiller",
+    "BenchmarkStateTestSpec",
     "BlobsTest",
     "BlobsTestFiller",
     "BlobsTestSpec",
diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py
new file mode 100644
index 00000000000..fb0eb8b68f1
--- /dev/null
+++ b/src/ethereum_test_specs/benchmark.py
@@ -0,0 +1,164 @@
+"""Ethereum benchmark test spec definition and filler."""
+
+from typing import Callable, ClassVar, Dict, Generator, List, Optional, Sequence, Type
+
+import pytest
+from pydantic import Field
+
+from ethereum_clis import TransitionTool
+from ethereum_test_base_types import HexNumber
+from ethereum_test_exceptions import BlockException, TransactionException
+from ethereum_test_execution import (
+    BaseExecute,
+    ExecuteFormat,
+    LabeledExecuteFormat,
+    TransactionPost,
+)
+from ethereum_test_fixtures import (
+    BaseFixture,
+    BlockchainEngineFixture,
+    BlockchainEngineXFixture,
+    BlockchainFixture,
+    FixtureFormat,
+    LabeledFixtureFormat,
+)
+from ethereum_test_forks import Fork
+from ethereum_test_types import Alloc, Environment, Transaction
+
+from .base import BaseTest
+from .blockchain import Block, BlockchainTest
+
+
+class BenchmarkTest(BaseTest):
+    """Test type designed specifically for benchmark test cases."""
+
+    pre: Alloc
+    post: Alloc
+    tx: Optional[Transaction] = None
+    blocks: Optional[List[Block]] = None
+    block_exception: (
+        List[TransactionException | BlockException] | TransactionException | BlockException | None
+    ) = None
+    env: Environment = Field(default_factory=Environment)
+    expected_benchmark_gas_used: int | None = None
+
+    supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [
+        BlockchainFixture,
+        BlockchainEngineFixture,
+        BlockchainEngineXFixture,
+    ]
+
+    supported_execute_formats: ClassVar[Sequence[LabeledExecuteFormat]] = [
+        LabeledExecuteFormat(
+            TransactionPost,
+            "benchmark_test",
+            "An execute test derived from a benchmark test",
+        ),
+    ]
+
+    supported_markers: ClassVar[Dict[str, str]] = {
+        "blockchain_test_engine_only": "Only generate a blockchain test engine fixture",
+        "blockchain_test_only": "Only generate a blockchain test fixture",
+    }
+
+    @classmethod
+    def pytest_parameter_name(cls) -> str:
+        """Return the parameter name used in pytest to select this spec type."""
+        return "benchmark_test"
+
+    @classmethod
+    def discard_fixture_format_by_marks(
+        cls,
+        fixture_format: FixtureFormat,
+        fork: Fork,
+        markers: List[pytest.Mark],
+    ) -> bool:
+        """Discard a fixture format from filling if the appropriate marker is used."""
+        if "blockchain_test_only" in [m.name for m in markers]:
+            return fixture_format != BlockchainFixture
+        if "blockchain_test_engine_only" in [m.name for m in markers]:
+            return fixture_format != BlockchainEngineFixture
+        return False
+
+    def get_genesis_environment(self, fork: Fork) -> Environment:
+        """Get the genesis environment for this benchmark test."""
+        return self.env
+
+    def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[Transaction]:
+        """Split a transaction that exceeds the gas limit cap into multiple transactions."""
+        if (gas_limit_cap is None) or (tx.gas_limit <= gas_limit_cap):
+            return [tx]
+
+        total_gas = int(self.expected_benchmark_gas_used or self.env.gas_limit)
+        print(f"total_gas: {total_gas}")
+        num_splits = total_gas // gas_limit_cap
+
+        split_transactions = []
+        for i in range(num_splits):
+            split_tx = tx.model_copy()
+            total_gas -= gas_limit_cap
+            split_tx.gas_limit = HexNumber(total_gas if i == num_splits - 1 else gas_limit_cap)
+            split_tx.nonce = HexNumber(tx.nonce + i)
+            split_transactions.append(split_tx)
+
+        return split_transactions
+
+    def generate_blockchain_test(self, fork: Fork) -> BlockchainTest:
+        """Create a BlockchainTest from this BenchmarkTest."""
+        if self.blocks is not None:
+            return BlockchainTest.from_test(
+                base_test=self,
+                genesis_environment=self.env,
+                pre=self.pre,
+                post=self.post,
+                blocks=self.blocks,
+            )
+        elif self.tx is not None:
+            gas_limit_cap = fork.transaction_gas_limit_cap()
+
+            transactions = self.split_transaction(self.tx, gas_limit_cap)
+
+            blocks = [Block(txs=transactions)]
+
+            return BlockchainTest.from_test(
+                base_test=self,
+                pre=self.pre,
+                post=self.post,
+                blocks=blocks,
+                genesis_environment=self.env,
+            )
+        else:
+            raise ValueError("Cannot create BlockchainTest without transactions or blocks")
+
+    def generate(
+        self,
+        t8n: TransitionTool,
+        fork: Fork,
+        fixture_format: FixtureFormat,
+    ) -> BaseFixture:
+        """Generate the blockchain test fixture."""
+        self.check_exception_test(exception=self.tx.error is not None if self.tx else False)
+        if fixture_format in BlockchainTest.supported_fixture_formats:
+            return self.generate_blockchain_test(fork=fork).generate(
+                t8n=t8n, fork=fork, fixture_format=fixture_format
+            )
+        else:
+            raise Exception(f"Unsupported fixture format: {fixture_format}")
+
+    def execute(
+        self,
+        *,
+        fork: Fork,
+        execute_format: ExecuteFormat,
+    ) -> BaseExecute:
+        """Execute the benchmark test by sending it to the live network."""
+        if execute_format == TransactionPost:
+            return TransactionPost(
+                blocks=[[self.tx]],
+                post=self.post,
+            )
+        raise Exception(f"Unsupported execute format: {execute_format}")
+
+
+BenchmarkTestSpec = Callable[[str], Generator[BenchmarkTest, None, None]]
+BenchmarkTestFiller = Type[BenchmarkTest]
diff --git a/src/ethereum_test_specs/benchmark_state.py b/src/ethereum_test_specs/benchmark_state.py
new file mode 100644
index 00000000000..e9e959f0615
--- /dev/null
+++ b/src/ethereum_test_specs/benchmark_state.py
@@ -0,0 +1,229 @@
+"""Ethereum benchmark state test spec definition and filler."""
+
+import math
+from pprint import pprint
+from typing import Callable, ClassVar, Generator, List, Sequence, Type
+
+from pydantic import ConfigDict
+
+from ethereum_clis import TransitionTool
+from ethereum_test_base_types import HexNumber
+from ethereum_test_execution import (
+    BaseExecute,
+    ExecuteFormat,
+    LabeledExecuteFormat,
+    TransactionPost,
+)
+from ethereum_test_fixtures import (
+    BaseFixture,
+    FixtureFormat,
+    LabeledFixtureFormat,
+    StateFixture,
+)
+from ethereum_test_fixtures.common import FixtureBlobSchedule
+from ethereum_test_fixtures.state import (
+    FixtureConfig,
+    FixtureEnvironment,
+    FixtureForkPost,
+    FixtureTransaction,
+)
+from ethereum_test_forks import Fork
+from ethereum_test_types import Alloc, Environment, Transaction
+from ethereum_test_vm import Bytecode
+
+from .base import BaseTest, OpMode
+from .blockchain import Block, BlockchainTest
+from .debugging import print_traces
+from .helpers import verify_transactions
+
+
+class BenchmarkStateTest(BaseTest):
+    """Test type designed specifically for benchmark state test cases with full verification."""
+
+    pre: Alloc
+    post: Alloc
+    tx: Transaction
+    gas_benchmark_value: int
+    setup_bytecode: Bytecode | None = None
+    attack_bytecode: Bytecode | None = None
+    env: Environment
+    chain_id: int = 1
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [
+        StateFixture,
+    ] + [
+        LabeledFixtureFormat(
+            fixture_format,
+            f"{fixture_format.format_name}_from_benchmark_state_test",
+            f"A {fixture_format.format_name} generated from a benchmark_state_test",
+        )
+        for fixture_format in BlockchainTest.supported_fixture_formats
+    ]
+
+    supported_execute_formats: ClassVar[Sequence[LabeledExecuteFormat]] = [
+        LabeledExecuteFormat(
+            TransactionPost,
+            "benchmark_state_test_with_verification",
+            "An execute test derived from a benchmark state test with verification",
+        ),
+    ]
+
+    def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[Transaction]:
+        """Split a transaction that exceeds the gas limit cap into multiple transactions."""
+        if (gas_limit_cap is None) or (tx.gas_limit <= gas_limit_cap):
+            return [tx]
+
+        total_gas = int(tx.gas_limit)
+        num_splits = math.ceil(total_gas / gas_limit_cap)
+
+        split_transactions = []
+        remaining_gas = total_gas
+        for i in range(num_splits):
+            split_tx = tx.model_copy()
+            split_tx.gas_limit = HexNumber(min(gas_limit_cap, remaining_gas))
+            split_tx.nonce = HexNumber(tx.nonce + i)
+            split_transactions.append(split_tx)
+            remaining_gas -= gas_limit_cap
+
+        return split_transactions
+
+    def make_benchmark_state_test_fixture(
+        self,
+        t8n: TransitionTool,
+        fork: Fork,
+    ) -> StateFixture:
+        """Create a fixture from the benchmark state test definition with full verification."""
+        # We can't generate a state test fixture that names a transition fork,
+        # so we get the fork at the block number and timestamp of the state test
+        fork = fork.fork_at(self.env.number, self.env.timestamp)
+
+        env = self.env.set_fork_requirements(fork)
+        tx = self.tx.with_signature_and_sender(keep_secret_key=True)
+        pre_alloc = Alloc.merge(
+            Alloc.model_validate(fork.pre_allocation()),
+            self.pre,
+        )
+
+        # Verification 1: Check for empty accounts
+        if empty_accounts := pre_alloc.empty_accounts():
+            raise Exception(f"Empty accounts in pre state: {empty_accounts}")
+
+        transition_tool_output = t8n.evaluate(
+            transition_tool_data=TransitionTool.TransitionToolData(
+                alloc=pre_alloc,
+                txs=[tx],
+                env=env,
+                fork=fork,
+                chain_id=self.chain_id,
+                reward=0,  # Reward on state tests is always zero
+                blob_schedule=fork.blob_schedule(),
+                state_test=True,
+            ),
+            debug_output_path=self.get_next_transition_tool_output_path(),
+            slow_request=self.is_tx_gas_heavy_test(),
+        )
+
+        # Verification 2: Post-allocation verification
+        try:
+            self.post.verify_post_alloc(transition_tool_output.alloc)
+        except Exception as e:
+            print_traces(t8n.get_traces())
+            raise e
+
+        # Verification 3: Transaction verification
+        try:
+            verify_transactions(
+                txs=[tx],
+                result=transition_tool_output.result,
+                transition_tool_exceptions_reliable=t8n.exception_mapper.reliable,
+            )
+        except Exception as e:
+            print_traces(t8n.get_traces())
+            pprint(transition_tool_output.result)
+            pprint(transition_tool_output.alloc)
+            raise e
+
+        # Verification 4: Benchmark gas validation
+        if self._operation_mode == OpMode.BENCHMARKING:
+            expected_benchmark_gas_used = self.gas_benchmark_value
+            gas_used = int(transition_tool_output.result.gas_used)
+            assert expected_benchmark_gas_used is not None, "gas_benchmark_value is not set"
+            assert gas_used == expected_benchmark_gas_used, (
+                f"gas_used ({gas_used}) does not match gas_benchmark_value "
+                f"({expected_benchmark_gas_used})"
+                f", difference: {gas_used - expected_benchmark_gas_used}"
+            )
+
+        return StateFixture(
+            env=FixtureEnvironment(**env.model_dump(exclude_none=True)),
+            pre=pre_alloc,
+            post={
+                fork: [
+                    FixtureForkPost(
+                        state_root=transition_tool_output.result.state_root,
+                        logs_hash=transition_tool_output.result.logs_hash,
+                        tx_bytes=tx.rlp(),
+                        expect_exception=tx.error,
+                        state=transition_tool_output.alloc,
+                    )
+                ]
+            },
+            transaction=FixtureTransaction.from_transaction(tx),
+            config=FixtureConfig(
+                blob_schedule=FixtureBlobSchedule.from_blob_schedule(fork.blob_schedule()),
+                chain_id=self.chain_id,
+            ),
+        )
+
+    def generate_blockchain_test(self, fork: Fork) -> BlockchainTest:
+        """Create a BlockchainTest from this BenchmarkStateTestWithVerification."""
+        gas_limit_cap = fork.transaction_gas_limit_cap()
+
+        transactions = self.split_transaction(self.tx, gas_limit_cap)
+
+        blocks = [Block(txs=transactions)]
+
+        return BlockchainTest.from_test(
+            base_test=self,
+            pre=self.pre,
+            post=self.post,
+            blocks=blocks,
+            genesis_environment=self.env,
+        )
+
+    def generate(
+        self,
+        t8n: TransitionTool,
+        fork: Fork,
+        fixture_format: FixtureFormat,
+    ) -> BaseFixture:
+        """Generate the test fixture."""
+        self.check_exception_test(exception=self.tx.error is not None)
+        if fixture_format in BlockchainTest.supported_fixture_formats:
+            return self.generate_blockchain_test(fork=fork).generate(
+                t8n=t8n, fork=fork, fixture_format=fixture_format
+            )
+        elif fixture_format == StateFixture:
+            return self.make_benchmark_state_test_fixture(t8n, fork)
+
+        raise Exception(f"Unknown fixture format: {fixture_format}")
+
+    def execute(
+        self,
+        *,
+        fork: Fork,
+        execute_format: ExecuteFormat,
+    ) -> BaseExecute:
+        """Execute the benchmark state test by sending it to the live network."""
+        if execute_format == TransactionPost:
+            return TransactionPost(
+                blocks=[[self.tx]],
+                post=self.post,
+            )
+        raise Exception(f"Unsupported execute format: {execute_format}")
+
+
+BenchmarkStateTestFiller = Type[BenchmarkStateTest]
+BenchmarkStateTestSpec = Callable[[str], Generator[BenchmarkStateTest, None, None]]
diff --git a/src/ethereum_test_tools/__init__.py b/src/ethereum_test_tools/__init__.py
index 1617bf76ef5..47c4da0aa79 100644
--- a/src/ethereum_test_tools/__init__.py
+++ b/src/ethereum_test_tools/__init__.py
@@ -25,6 +25,10 @@
 from ethereum_test_fixtures import BaseFixture, FixtureCollector
 from ethereum_test_specs import (
     BaseTest,
+    BenchmarkStateTest,
+    BenchmarkStateTestFiller,
+    BenchmarkTest,
+    BenchmarkTestFiller,
     BlobsTest,
     BlobsTestFiller,
     BlockchainTest,
@@ -98,6 +102,10 @@
     "AuthorizationTuple",
     "BaseFixture",
     "BaseTest",
+    "BenchmarkTest",
+    "BenchmarkTestFiller",
+    "BenchmarkStateTest",
+    "BenchmarkStateTestFiller",
     "Blob",
     "BlobsTest",
     "BlobsTestFiller",
diff --git a/tests/benchmark/test_worst_blocks.py b/tests/benchmark/test_worst_blocks.py
index 327cbae00c9..133761a445f 100644
--- a/tests/benchmark/test_worst_blocks.py
+++ b/tests/benchmark/test_worst_blocks.py
@@ -15,8 +15,9 @@
     Account,
     Address,
     Alloc,
+    BenchmarkTestFiller,
     Block,
-    BlockchainTestFiller,
+    Environment,
     Hash,
     StateTestFiller,
     Transaction,
@@ -111,8 +112,9 @@ def ether_transfer_case(
     ["a_to_a", "a_to_b", "diff_acc_to_b", "a_to_diff_acc", "diff_acc_to_diff_acc"],
 )
 def test_block_full_of_ether_transfers(
-    blockchain_test: BlockchainTestFiller,
+    benchmark_test: BenchmarkTestFiller,
     pre: Alloc,
+    env: Environment,
     case_id: str,
     ether_transfer_case,
     iteration_count: int,
@@ -153,7 +155,8 @@ def test_block_full_of_ether_transfers(
         else {receiver: Account(balance=balance) for receiver, balance in balances.items()}
     )
 
-    blockchain_test(
+    benchmark_test(
+        genesis_environment=env,
         pre=pre,
         post=post_state,
         blocks=[Block(txs=txs)],
diff --git a/tests/benchmark/test_worst_compute.py b/tests/benchmark/test_worst_compute.py
index 6d9a48e692b..a81377affe5 100644
--- a/tests/benchmark/test_worst_compute.py
+++ b/tests/benchmark/test_worst_compute.py
@@ -19,6 +19,7 @@
 from ethereum_test_tools import (
     Address,
     Alloc,
+    BenchmarkStateTestFiller,
     Block,
     BlockchainTestFiller,
     Bytecode,
@@ -2513,8 +2514,9 @@ def test_worst_calldataload(
     ],
 )
 def test_worst_swap(
-    state_test: StateTestFiller,
+    benchmark_state_test: BenchmarkStateTestFiller,
     pre: Alloc,
+    env: Environment,
     fork: Fork,
     opcode: Opcode,
     gas_benchmark_value: int,
@@ -2534,8 +2536,10 @@ def test_worst_swap(
         sender=pre.fund_eoa(),
     )
 
-    state_test(
+    benchmark_state_test(
+        env=env,
         pre=pre,
+        gas_benchmark_value=gas_benchmark_value,
         post={},
         tx=tx,
     )