ethereum · danceratopz · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026 · Feb 5, 2026
diff --git a/docs/running_tests/test_formats/blockchain_test_engine_x.md b/docs/running_tests/test_formats/blockchain_test_engine_x.md
@@ -22,6 +22,8 @@ The JSON file path plus the test name are used as the unique test identifier.
 
 The `blockchain_tests_engine_x` directory contains a special directory `pre_alloc` that stores pre-allocation group files used by all tests in this format, one per pre-allocation group with the name of the pre-alloc hash. This folder is essential for test execution and must be present alongside the test fixtures.
 
+**Note:** When fixtures are generated with `--gas-benchmark-values`, benchmark fixtures are written under `gas_limit_XXXXM/` subdirectories. The `pre_alloc` folder remains at the output root (shared across gas limits). Consumers should point to the output root so `pre_alloc` can be found.
+
 ### Pre-Allocation Group File Structure
 
 Each file in the `pre_alloc` folder corresponds to a pre-allocation group identified by a hash:

diff --git a/docs/writing_tests/benchmarks.md b/docs/writing_tests/benchmarks.md
@@ -69,6 +69,16 @@ This mode is designed for gas limit testing, and gas repricing, where it enables
 - `--gas-benchmark-values 1,2,3` runs the test with 1M, 2M, and 3M block gas limits
 - `--fixed-opcode-count 4,5` runs the test with approximately 4K and 5K opcode executions
 
+**Output layout with gas benchmark values:** When `--gas-benchmark-values` is provided and benchmark tests are filled, fixtures are written into per‑value subdirectories under the output path:
+
+```text
+<output>/
+  gas_limit_0001M/...
+  gas_limit_0002M/...
+```
+
+Non‑benchmark fixtures are still written to the root output directory.
+
 ## Developing Benchmarks
 
 Before writing benchmark-specific tests, please refer to the [general documentation](./writing_a_new_test.md) for the fundamentals of writing tests in the EELS framework.

diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/filler/filler.py
@@ -743,6 +743,16 @@ def pytest_configure(config: pytest.Config) -> None:
     if is_help_or_collectonly_mode(config):
         return
 
+    from ..shared.benchmarking import GasBenchmarkValues
+
+    gas_benchmark_values = GasBenchmarkValues.from_config(config)
+    if gas_benchmark_values is not None and config.fixture_output.is_stdout:  # type: ignore[attr-defined]
+        pytest.exit(
+            "--gas-benchmark-values cannot be used with --output=stdout. "
+            "Use a directory output.",
+            returncode=pytest.ExitCode.USAGE_ERROR,
+        )
+
     try:
         # Check whether the directory exists and is not empty; if --clean is
         # set, it will delete it
@@ -1533,9 +1543,31 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
                 if witness_generator is not None:
                     witness_generator(fixture)
 
+                from ..shared.benchmarking import (
+                    GasBenchmarkValues,
+                    format_gas_limit_subdir,
+                )
+
+                output_subdir = None
+                gas_benchmark_values = GasBenchmarkValues.from_config(
+                    request.config
+                )
+                is_benchmark_test = any(
+                    request.node.get_closest_marker(name)
+                    for name in ("benchmark", "stateful", "repricing")
+                )
+                if gas_benchmark_values is not None and is_benchmark_test:
+                    output_subdir = Path(
+                        format_gas_limit_subdir(
+                            gas_benchmark_value,
+                            gas_benchmark_values.root,
+                        )
+                    )
+
                 fixture_path = fixture_collector.add_fixture(
                     node_to_test_info(request.node),
                     fixture,
+                    output_subdir=output_subdir,
                 )
 
                 # NOTE: Use str for compatibility with pytest-dist
@@ -1724,6 +1756,8 @@ def _verify_fixtures_post_merge(
             continue
 
         top_dir = relative_path.parts[0]
+        if top_dir.startswith("gas_limit_") and len(relative_path.parts) > 1:
+            top_dir = relative_path.parts[1]
         fixture_format = dir_to_format.get(top_dir)
         if fixture_format is None:
             continue

diff --git a/...sting/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py b/...sting/src/execution_testing/cli/pytest_commands/plugins/filler/tests/test_benchmarking.py
@@ -1,5 +1,6 @@
 """Test the benchmarking pytest plugin for gas benchmark values."""
 
+import json
 import textwrap
 from pathlib import Path
 from typing import List
@@ -185,6 +186,93 @@ def test_benchmarking_mode_configured_with_option(
     assert any("benchmark-gas-value_30M" in line for line in result.outlines)
 
 
+def test_benchmark_gas_values_split_into_subdirs(
+    pytester: pytest.Pytester, tmp_path: Path
+) -> None:
+    """Ensure per-gas-limit outputs are written to separate directories."""
+    benchmark_marked_module = textwrap.dedent(
+        """\
+        import pytest
+        from execution_testing import (
+            BenchmarkTestFiller,
+            JumpLoopGenerator,
+            Op,
+        )
+
+        @pytest.mark.valid_at("Prague")
+        @pytest.mark.benchmark
+        def test_dummy_benchmark_test(
+            benchmark_test: BenchmarkTestFiller,
+        ) -> None:
+            benchmark_test(
+                target_opcode=Op.JUMPDEST,
+                code_generator=JumpLoopGenerator(attack_block=Op.JUMPDEST),
+            )
+        """
+    )
+    setup_test_directory_structure(
+        pytester, benchmark_marked_module, "test_dummy_benchmark.py"
+    )
+
+    output_dir = tmp_path / "fixtures"
+    result = pytester.runpytest(
+        "-c",
+        "pytest-fill.ini",
+        "--fork",
+        "Prague",
+        "--gas-benchmark-values",
+        "1,2",
+        "-m",
+        "benchmark and blockchain_test and not derived_test",
+        "--no-html",
+        "--skip-index",
+        f"--output={output_dir}",
+        "tests/benchmark/dummy_test_module/",
+        "-q",
+    )
+
+    assert result.ret == 0, f"Fill command failed:\n{result.outlines}"
+
+    gas_1_dir = output_dir / "gas_limit_0001M"
+    gas_2_dir = output_dir / "gas_limit_0002M"
+    assert gas_1_dir.exists()
+    assert gas_2_dir.exists()
+
+    gas_1_files = list(gas_1_dir.rglob("*.json"))
+    gas_2_files = list(gas_2_dir.rglob("*.json"))
+    assert gas_1_files, "Expected fixtures under gas_limit_0001M"
+    assert gas_2_files, "Expected fixtures under gas_limit_0002M"
+
+    def _assert_keys(
+        files: list[Path], expected: str, unexpected: str
+    ) -> None:
+        for file_path in files:
+            data = json.loads(file_path.read_text())
+            assert data, f"Empty fixture file: {file_path}"
+            for key in data.keys():
+                assert expected in key, (
+                    f"Expected {expected} in key {key} ({file_path})"
+                )
+                assert unexpected not in key, (
+                    f"Unexpected {unexpected} in key {key} ({file_path})"
+                )
+
+    _assert_keys(gas_1_files, "benchmark-gas-value_1M", "2M")
+    _assert_keys(gas_2_files, "benchmark-gas-value_2M", "1M")
+
+    root_json = []
+    for json_path in output_dir.rglob("*.json"):
+        rel = json_path.relative_to(output_dir)
+        if not rel.parts:
+            continue
+        if rel.parts[0].startswith("gas_limit_"):
+            continue
+        if rel.parts[0] == ".meta":
+            continue
+        root_json.append(json_path)
+    assert not root_json, f"Unexpected root JSON files: {root_json}"
+
+
 def test_benchmarking_mode_not_configured_without_option(
     pytester: pytest.Pytester,
 ) -> None:

diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py
@@ -14,6 +14,16 @@
 from .execute_fill import OpMode
 
 
+def format_gas_limit_subdir(
+    gas_benchmark_value: int, gas_values_millions: list[int]
+) -> str:
+    """Return a stable, sortable gas-limit subdirectory name."""
+    gas_value_millions = gas_benchmark_value // 1_000_000
+    max_value = max(gas_values_millions) if gas_values_millions else 0
+    width = max(4, len(str(max_value)))
+    return f"gas_limit_{gas_value_millions:0{width}d}M"
+
+
 def pytest_addoption(parser: pytest.Parser) -> None:
     """Add command line options for benchmark tests."""
     benchmark_group = parser.getgroup(
@@ -28,6 +38,8 @@ def pytest_addoption(parser: pytest.Parser) -> None:
         help=(
             "Gas limits (in millions) for benchmark tests. "
             "Example: '100,500' runs tests with 100M and 500M gas. "
+            "Benchmark outputs are grouped under gas_limit_XXXXM/ "
+            "subdirectories. "
             f"Cannot be used with {OpcodeCountsConfig.flag}."
         ),
     )

diff --git a/packages/testing/src/execution_testing/fixtures/collector.py b/packages/testing/src/execution_testing/fixtures/collector.py
@@ -245,12 +245,21 @@ def _get_worker_id(self) -> str | None:
             self._worker_id_cached = True
         return self.worker_id
 
-    def add_fixture(self, info: TestInfo, fixture: BaseFixture) -> Path:
+    def add_fixture(
+        self,
+        info: TestInfo,
+        fixture: BaseFixture,
+        output_subdir: Path | None = None,
+    ) -> Path:
         """Add fixture and immediately stream to partial JSONL file."""
         fixture_basename = self.get_fixture_basename(info)
 
+        effective_output_dir = self.output_dir
+        if output_subdir is not None and self.output_dir.name != "stdout":
+            effective_output_dir = self.output_dir / output_subdir
+
         fixture_path = (
-            self.output_dir
+            effective_output_dir
             / fixture.output_base_dir_name()
             / fixture_basename.with_suffix(fixture.output_file_extension)
         )