From 5e4fe9146934c4c4c819ac98006c978b7eed435c Mon Sep 17 00:00:00 2001
From: CPerezz <cperezz19@pm.me>
Date: Mon, 29 Sep 2025 10:47:23 +0200
Subject: [PATCH 1/2] fix(execute): add gas validation for benchmark tests in
 execute mode

Previously, execute mode was not validating that transactions consumed
the expected amount of gas when expected_benchmark_gas_used was set.
This could cause benchmark tests to incorrectly pass even when consuming
significantly less gas than expected (e.g., due to missing factory contracts).
This feature is needed by benchmark tests like the ones in #2186 in
order to make sure that the benchmarks are indeed consuming all gas
available or causing a failure otherwise when the flag is set.

Changes:
- Add expected_benchmark_gas_used and skip_gas_used_validation fields to TransactionPost
- Implement gas validation logic in TransactionPost.execute() using transaction receipts
- Pass gas validation parameters from StateTest and BlockchainTest to TransactionPost
- Add eth_getTransactionReceipt RPC method to fetch gas used from receipts

This ensures benchmark tests fail appropriately when gas consumption doesn't match
expectations, preventing false positives in performance testing.
---
 .../transaction_post.py                       | 35 +++++++++++++++++++
 src/ethereum_test_rpc/rpc.py                  | 12 +++++++
 src/ethereum_test_specs/base.py               |  2 ++
 src/ethereum_test_specs/blockchain.py         |  4 +++
 src/ethereum_test_specs/state.py              |  4 +++
 src/pytest_plugins/execute/execute.py         |  3 ++
 6 files changed, 60 insertions(+)

diff --git a/src/ethereum_test_execution/transaction_post.py b/src/ethereum_test_execution/transaction_post.py
index 5c97eb045bc..4299a4309dd 100644
--- a/src/ethereum_test_execution/transaction_post.py
+++ b/src/ethereum_test_execution/transaction_post.py
@@ -20,6 +20,10 @@ class TransactionPost(BaseExecute):
 
     blocks: List[List[Transaction]]
     post: Alloc
+    # Gas validation fields for benchmark tests
+    gas_benchmark_value: int | None = None  # Default expected gas from command-line
+    expected_benchmark_gas_used: int | None = None  # Expected total gas to be consumed
+    skip_gas_used_validation: bool = False  # Skip gas validation even if expected is set
 
     format_name: ClassVar[str] = "transaction_post_test"
     description: ClassVar[str] = (
@@ -33,6 +37,10 @@ def execute(
         assert not any(tx.ty == 3 for block in self.blocks for tx in block), (
             "Transaction type 3 is not supported in execute mode."
         )
+
+        # Track transaction hashes for gas validation (benchmarking)
+        all_tx_hashes = []
+
         for block in self.blocks:
             signed_txs = []
             for tx_index, tx in enumerate(block):
@@ -51,11 +59,38 @@ def execute(
                 for transaction in signed_txs:
                     if transaction.error is None:
                         eth_rpc.send_wait_transaction(transaction)
+                        all_tx_hashes.append(transaction.hash)
                     else:
                         with pytest.raises(SendTransactionExceptionError):
                             eth_rpc.send_transaction(transaction)
             else:
                 eth_rpc.send_wait_transactions(signed_txs)
+                all_tx_hashes.extend([tx.hash for tx in signed_txs])
+
+        # Perform gas validation if required for benchmarking
+        # Ensures benchmark tests consume exactly the expected gas
+        if not self.skip_gas_used_validation:
+            # Use expected_benchmark_gas_used if set, else gas_benchmark_value
+            expected_gas = self.expected_benchmark_gas_used
+            if expected_gas is None:
+                expected_gas = self.gas_benchmark_value
+
+            # Only validate if we have an expected value
+            if expected_gas is not None:
+                total_gas_used = 0
+                # Fetch transaction receipts to get actual gas used
+                for tx_hash in all_tx_hashes:
+                    receipt = eth_rpc.get_transaction_receipt(tx_hash)
+                    assert receipt is not None, f"Failed to get receipt for transaction {tx_hash}"
+                    gas_used = int(receipt["gasUsed"], 16)
+                    total_gas_used += gas_used
+
+                # Verify that the total gas consumed matches expectations
+                assert total_gas_used == expected_gas, (
+                    f"Total gas used ({total_gas_used}) does not match "
+                    f"expected gas ({expected_gas}), "
+                    f"difference: {total_gas_used - expected_gas}"
+                )
 
         for address, account in self.post.root.items():
             balance = eth_rpc.get_balance(address)
diff --git a/src/ethereum_test_rpc/rpc.py b/src/ethereum_test_rpc/rpc.py
index e654340c28f..9ea2d279eb5 100644
--- a/src/ethereum_test_rpc/rpc.py
+++ b/src/ethereum_test_rpc/rpc.py
@@ -292,6 +292,18 @@ def get_transaction_by_hash(self, transaction_hash: Hash) -> TransactionByHashRe
             pprint(e.errors())
             raise e
 
+    def get_transaction_receipt(self, transaction_hash: Hash) -> dict | None:
+        """
+        `eth_getTransactionReceipt`: Returns transaction receipt.
+
+        Used to get the actual gas used by a transaction for gas validation
+        in benchmark tests.
+        """
+        response = self.post_request(
+            method="getTransactionReceipt", params=[f"{transaction_hash}"]
+        )
+        return response
+
     def get_storage_at(
         self, address: Address, position: Hash, block_number: BlockNumberType = "latest"
     ) -> Hash:
diff --git a/src/ethereum_test_specs/base.py b/src/ethereum_test_specs/base.py
index 3fee6ba4bc1..e26b3145cea 100644
--- a/src/ethereum_test_specs/base.py
+++ b/src/ethereum_test_specs/base.py
@@ -76,6 +76,7 @@ class BaseTest(BaseModel):
     _gas_optimization: int | None = PrivateAttr(None)
     _gas_optimization_max_gas_limit: int | None = PrivateAttr(None)
 
+    gas_benchmark_value: int | None = None
     expected_benchmark_gas_used: int | None = None
     skip_gas_used_validation: bool = False
 
@@ -124,6 +125,7 @@ def from_test(
         new_instance = cls(
             tag=base_test.tag,
             t8n_dump_dir=base_test.t8n_dump_dir,
+            gas_benchmark_value=base_test.gas_benchmark_value,
             expected_benchmark_gas_used=base_test.expected_benchmark_gas_used,
             skip_gas_used_validation=base_test.skip_gas_used_validation,
             **kwargs,
diff --git a/src/ethereum_test_specs/blockchain.py b/src/ethereum_test_specs/blockchain.py
index 6669ab2f274..384ba98e116 100644
--- a/src/ethereum_test_specs/blockchain.py
+++ b/src/ethereum_test_specs/blockchain.py
@@ -899,9 +899,13 @@ def execute(
             blocks: List[List[Transaction]] = []
             for block in self.blocks:
                 blocks += [block.txs]
+            # Pass gas validation params for benchmark tests
             return TransactionPost(
                 blocks=blocks,
                 post=self.post,
+                gas_benchmark_value=self.gas_benchmark_value,
+                expected_benchmark_gas_used=self.expected_benchmark_gas_used,
+                skip_gas_used_validation=self.skip_gas_used_validation,
             )
         raise Exception(f"Unsupported execute format: {execute_format}")
 
diff --git a/src/ethereum_test_specs/state.py b/src/ethereum_test_specs/state.py
index 24b0cb08169..c9ae05056c9 100644
--- a/src/ethereum_test_specs/state.py
+++ b/src/ethereum_test_specs/state.py
@@ -444,9 +444,13 @@ def execute(
     ) -> BaseExecute:
         """Generate the list of test fixtures."""
         if execute_format == TransactionPost:
+            # Pass gas validation params for benchmark tests
             return TransactionPost(
                 blocks=[[self.tx]],
                 post=self.post,
+                gas_benchmark_value=self.gas_benchmark_value,
+                expected_benchmark_gas_used=self.expected_benchmark_gas_used,
+                skip_gas_used_validation=self.skip_gas_used_validation,
             )
         raise Exception(f"Unsupported execute format: {execute_format}")
 
diff --git a/src/pytest_plugins/execute/execute.py b/src/pytest_plugins/execute/execute.py
index 42676a62113..1c8e36bd39a 100644
--- a/src/pytest_plugins/execute/execute.py
+++ b/src/pytest_plugins/execute/execute.py
@@ -342,6 +342,9 @@ def __init__(self, *args, **kwargs):
                     kwargs["pre"] = pre
                 elif kwargs["pre"] != pre:
                     raise ValueError("The pre-alloc object was modified by the test.")
+                # Pass gas_benchmark_value if not already set
+                if "gas_benchmark_value" not in kwargs:
+                    kwargs["gas_benchmark_value"] = request.getfixturevalue("gas_benchmark_value")
                 kwargs |= {
                     p: request.getfixturevalue(p)
                     for p in cls_fixture_parameters

From 4ae8879986916dcb73fd919e79f234f5e0e6d9f3 Mon Sep 17 00:00:00 2001
From: CPerezz <cperezz19@pm.me>
Date: Tue, 30 Sep 2025 10:23:45 +0200
Subject: [PATCH 2/2] refactor(execute): simplify gas validation implementation

Addresses review comment to make execute mode gas validation cleaner:
- Set expected_benchmark_gas_used to gas_benchmark_value as default in execute parametrizer
- Remove gas_benchmark_value parameter from TransactionPost, StateTest, BlockchainTest, and BaseTest
- Simplify gas validation logic in TransactionPost

This ensures consistent gas validation behavior between fill and execute modes
with a cleaner implementation that sets defaults at the parametrizer level.
---
 .../transaction_post.py                       | 36 ++++++++-----------
 src/ethereum_test_specs/base.py               |  2 --
 src/ethereum_test_specs/blockchain.py         |  1 -
 src/ethereum_test_specs/state.py              |  1 -
 src/pytest_plugins/execute/execute.py         |  8 +++--
 5 files changed, 19 insertions(+), 29 deletions(-)

diff --git a/src/ethereum_test_execution/transaction_post.py b/src/ethereum_test_execution/transaction_post.py
index 4299a4309dd..a94af44db6c 100644
--- a/src/ethereum_test_execution/transaction_post.py
+++ b/src/ethereum_test_execution/transaction_post.py
@@ -21,7 +21,6 @@ class TransactionPost(BaseExecute):
     blocks: List[List[Transaction]]
     post: Alloc
     # Gas validation fields for benchmark tests
-    gas_benchmark_value: int | None = None  # Default expected gas from command-line
     expected_benchmark_gas_used: int | None = None  # Expected total gas to be consumed
     skip_gas_used_validation: bool = False  # Skip gas validation even if expected is set
 
@@ -69,28 +68,21 @@ def execute(
 
         # Perform gas validation if required for benchmarking
         # Ensures benchmark tests consume exactly the expected gas
-        if not self.skip_gas_used_validation:
-            # Use expected_benchmark_gas_used if set, else gas_benchmark_value
-            expected_gas = self.expected_benchmark_gas_used
-            if expected_gas is None:
-                expected_gas = self.gas_benchmark_value
+        if not self.skip_gas_used_validation and self.expected_benchmark_gas_used is not None:
+            total_gas_used = 0
+            # Fetch transaction receipts to get actual gas used
+            for tx_hash in all_tx_hashes:
+                receipt = eth_rpc.get_transaction_receipt(tx_hash)
+                assert receipt is not None, f"Failed to get receipt for transaction {tx_hash}"
+                gas_used = int(receipt["gasUsed"], 16)
+                total_gas_used += gas_used
 
-            # Only validate if we have an expected value
-            if expected_gas is not None:
-                total_gas_used = 0
-                # Fetch transaction receipts to get actual gas used
-                for tx_hash in all_tx_hashes:
-                    receipt = eth_rpc.get_transaction_receipt(tx_hash)
-                    assert receipt is not None, f"Failed to get receipt for transaction {tx_hash}"
-                    gas_used = int(receipt["gasUsed"], 16)
-                    total_gas_used += gas_used
-
-                # Verify that the total gas consumed matches expectations
-                assert total_gas_used == expected_gas, (
-                    f"Total gas used ({total_gas_used}) does not match "
-                    f"expected gas ({expected_gas}), "
-                    f"difference: {total_gas_used - expected_gas}"
-                )
+            # Verify that the total gas consumed matches expectations
+            assert total_gas_used == self.expected_benchmark_gas_used, (
+                f"Total gas used ({total_gas_used}) does not match "
+                f"expected benchmark gas ({self.expected_benchmark_gas_used}), "
+                f"difference: {total_gas_used - self.expected_benchmark_gas_used}"
+            )
 
         for address, account in self.post.root.items():
             balance = eth_rpc.get_balance(address)
diff --git a/src/ethereum_test_specs/base.py b/src/ethereum_test_specs/base.py
index e26b3145cea..3fee6ba4bc1 100644
--- a/src/ethereum_test_specs/base.py
+++ b/src/ethereum_test_specs/base.py
@@ -76,7 +76,6 @@ class BaseTest(BaseModel):
     _gas_optimization: int | None = PrivateAttr(None)
     _gas_optimization_max_gas_limit: int | None = PrivateAttr(None)
 
-    gas_benchmark_value: int | None = None
     expected_benchmark_gas_used: int | None = None
     skip_gas_used_validation: bool = False
 
@@ -125,7 +124,6 @@ def from_test(
         new_instance = cls(
             tag=base_test.tag,
             t8n_dump_dir=base_test.t8n_dump_dir,
-            gas_benchmark_value=base_test.gas_benchmark_value,
             expected_benchmark_gas_used=base_test.expected_benchmark_gas_used,
             skip_gas_used_validation=base_test.skip_gas_used_validation,
             **kwargs,
diff --git a/src/ethereum_test_specs/blockchain.py b/src/ethereum_test_specs/blockchain.py
index 384ba98e116..3633e7ff286 100644
--- a/src/ethereum_test_specs/blockchain.py
+++ b/src/ethereum_test_specs/blockchain.py
@@ -903,7 +903,6 @@ def execute(
             return TransactionPost(
                 blocks=blocks,
                 post=self.post,
-                gas_benchmark_value=self.gas_benchmark_value,
                 expected_benchmark_gas_used=self.expected_benchmark_gas_used,
                 skip_gas_used_validation=self.skip_gas_used_validation,
             )
diff --git a/src/ethereum_test_specs/state.py b/src/ethereum_test_specs/state.py
index c9ae05056c9..917ed8a28ee 100644
--- a/src/ethereum_test_specs/state.py
+++ b/src/ethereum_test_specs/state.py
@@ -448,7 +448,6 @@ def execute(
             return TransactionPost(
                 blocks=[[self.tx]],
                 post=self.post,
-                gas_benchmark_value=self.gas_benchmark_value,
                 expected_benchmark_gas_used=self.expected_benchmark_gas_used,
                 skip_gas_used_validation=self.skip_gas_used_validation,
             )
diff --git a/src/pytest_plugins/execute/execute.py b/src/pytest_plugins/execute/execute.py
index 1c8e36bd39a..62d5aa1cc6c 100644
--- a/src/pytest_plugins/execute/execute.py
+++ b/src/pytest_plugins/execute/execute.py
@@ -342,9 +342,11 @@ def __init__(self, *args, **kwargs):
                     kwargs["pre"] = pre
                 elif kwargs["pre"] != pre:
                     raise ValueError("The pre-alloc object was modified by the test.")
-                # Pass gas_benchmark_value if not already set
-                if "gas_benchmark_value" not in kwargs:
-                    kwargs["gas_benchmark_value"] = request.getfixturevalue("gas_benchmark_value")
+                # Set default for expected_benchmark_gas_used
+                if "expected_benchmark_gas_used" not in kwargs:
+                    kwargs["expected_benchmark_gas_used"] = request.getfixturevalue(
+                        "gas_benchmark_value"
+                    )
                 kwargs |= {
                     p: request.getfixturevalue(p)
                     for p in cls_fixture_parameters