-
Notifications
You must be signed in to change notification settings - Fork 10
fix: fix gt4py metrics extractor in the StencilTest benchmarking #1111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
egparedes
merged 8 commits into
C2SM:main
from
egparedes:fix/fix-bench-infra-for-new-gt4py
Mar 16, 2026
+46
−16
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
45ad4a8
Get the metrics key using hooks
egparedes 24e8e25
Merge branch 'main' into fix/fix-bench-infra-for-new-gt4py
egparedes 6c2ed4e
Address copilot issues and other fixes
egparedes 970d6bd
Small refactor and fixes for the case when gt4py metrics are not enabled
egparedes 8d454a3
Cleanups
egparedes 6b8bb86
Merge branch 'main' into fix/fix-bench-infra-for-new-gt4py
egparedes 5510ba9
More fixes
egparedes fe21507
Add reviewer's suggestion
egparedes File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,25 +8,25 @@ | |
|
|
||
| from __future__ import annotations | ||
|
|
||
| import contextlib | ||
| import dataclasses | ||
| import os | ||
| from collections.abc import Callable, Mapping, Sequence | ||
| from typing import Any, ClassVar | ||
| from collections.abc import Callable, Generator, Mapping, Sequence | ||
| from typing import Any, ClassVar, Final | ||
|
|
||
| import gt4py.next as gtx | ||
| import numpy as np | ||
| import pytest | ||
| from gt4py import eve | ||
| from gt4py.next import ( | ||
| config as gtx_config, | ||
| constructors, | ||
| named_collections as gtx_named_collections, | ||
| typing as gtx_typing, | ||
| ) | ||
|
|
||
| # TODO(havogt): import will disappear after FieldOperators support `.compile` | ||
| from gt4py.next.ffront.decorator import FieldOperator | ||
| from gt4py.next.instrumentation import metrics as gtx_metrics | ||
| from gt4py.next.instrumentation import hooks as gtx_hooks, metrics as gtx_metrics | ||
|
|
||
| from icon4py.model.common import model_backends, model_options | ||
| from icon4py.model.common.grid import base | ||
|
|
@@ -91,6 +91,8 @@ def test_and_benchmark( | |
| skip_stenciltest_verification = request.config.getoption( | ||
| "skip_stenciltest_verification" | ||
| ) # skip verification if `--skip-stenciltest-verification` CLI option is set | ||
| skip_stenciltest_benchmark = benchmark is None or not benchmark.enabled | ||
|
|
||
| if not skip_stenciltest_verification: | ||
| reference_outputs = self.reference( | ||
| _ConnectivityConceptFixer( | ||
|
|
@@ -107,7 +109,7 @@ def test_and_benchmark( | |
| input_data=_properly_allocated_input_data, reference_outputs=reference_outputs | ||
| ) | ||
|
|
||
| if benchmark is not None and benchmark.enabled: | ||
| if not skip_stenciltest_benchmark: | ||
| warmup_rounds = int(os.getenv("ICON4PY_STENCIL_TEST_WARMUP_ROUNDS", "1")) | ||
| iterations = int(os.getenv("ICON4PY_STENCIL_TEST_ITERATIONS", "10")) | ||
|
|
||
|
|
@@ -124,24 +126,52 @@ def test_and_benchmark( | |
| ) | ||
|
|
||
| # Collect GT4Py runtime metrics if enabled | ||
| if gtx_config.COLLECT_METRICS_LEVEL > 0: | ||
| if gtx_metrics.is_any_level_enabled(): | ||
| metrics_key = None | ||
| # Run the program one final time to get the metrics key | ||
| METRICS_KEY_EXTRACTOR: Final = "metrics_id_extractor" | ||
|
|
||
| @contextlib.contextmanager | ||
| def _get_metrics_id_program_callback( | ||
| program: gtx_typing.Program, | ||
| args: tuple[Any, ...], | ||
| offset_provider: gtx.common.OffsetProvider, | ||
| enable_jit: bool, | ||
| kwargs: dict[str, Any], | ||
| ) -> Generator[None, None, None]: | ||
| yield | ||
| # Collect the key after running the program to make sure it is set | ||
| nonlocal metrics_key | ||
| metrics_key = gtx_metrics.get_current_source_key() | ||
|
|
||
| gtx_hooks.program_call_context.register( | ||
| _get_metrics_id_program_callback, name=METRICS_KEY_EXTRACTOR | ||
| ) | ||
| _configured_program( | ||
| **_properly_allocated_input_data, offset_provider=grid.connectivities | ||
| ) | ||
| gtx_hooks.program_call_context.remove(METRICS_KEY_EXTRACTOR) | ||
| assert metrics_key is not None, "Metrics key could not be recovered during run." | ||
| assert metrics_key.startswith( | ||
| _configured_program.__name__ | ||
| ), f"Metrics key ({metrics_key}) does not start with the program name ({_configured_program.__name__})" | ||
|
|
||
| assert ( | ||
| len(_configured_program._compiled_programs.compiled_programs) == 1 | ||
| ), "Multiple compiled programs found, cannot extract metrics." | ||
| # Get compiled programs from the _configured_program passed to test | ||
| compiled_programs = _configured_program._compiled_programs.compiled_programs | ||
| # Get the pool key necessary to find the right metrics key. There should be only one compiled program in _configured_program | ||
| pool_key = next(iter(compiled_programs.keys())) | ||
| # Get the metrics key from the pool key to read the corresponding metrics | ||
| metrics_key = _configured_program._compiled_programs._metrics_key_from_pool_key( | ||
| pool_key | ||
| ) | ||
| metrics_data = gtx_metrics.sources | ||
| compute_samples = metrics_data[metrics_key].metrics["compute"].samples | ||
| # exclude warmup iterations, one extra iteration for calibrating pytest-benchmark and one for validation (if executed) | ||
| # exclude: | ||
| # - one for validation (if executed) | ||
| # - one extra warmup round for calibrating pytest-benchmark | ||
| # - warmup iterations | ||
| # - one last round to get the metrics key | ||
| initial_program_iterations_to_skip = warmup_rounds * iterations + ( | ||
| 1 if skip_stenciltest_verification else 2 | ||
| 2 if skip_stenciltest_verification else 3 | ||
| ) | ||
| assert ( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really an issue, but remember that we run the benchmark CI pipeline with |
||
| len(compute_samples) > initial_program_iterations_to_skip | ||
| ), "Not enough samples collected to compute metrics." | ||
| benchmark.extra_info["gtx_metrics"] = compute_samples[ | ||
| initial_program_iterations_to_skip: | ||
| ] | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.