-
Notifications
You must be signed in to change notification settings - Fork 697
[Backend Test] Backend test reporting skeleton #12296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
GregoryComer
merged 1 commit into
pytorch:main
from
GregoryComer:backend-test-reporting
Jul 15, 2025
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| # Test run context management. This is used to determine the test context for reporting | ||
| # purposes. | ||
| class TestContext: | ||
| def __init__(self, test_name: str, flow_name: str, params: dict | None): | ||
| self.test_name = test_name | ||
| self.flow_name = flow_name | ||
| self.params = params | ||
|
|
||
| def __enter__(self): | ||
| global _active_test_context | ||
| import sys | ||
|
|
||
| if _active_test_context is not None: | ||
| print(f"Active context: {_active_test_context.test_name}", file=sys.stderr) | ||
| assert _active_test_context is None | ||
| _active_test_context = self | ||
|
|
||
| def __exit__(self, exc_type, exc_value, traceback): | ||
| global _active_test_context | ||
| _active_test_context = None | ||
|
|
||
|
|
||
| _active_test_context: TestContext | None = None | ||
|
|
||
|
|
||
| def get_active_test_context() -> TestContext | None: | ||
| global _active_test_context | ||
| return _active_test_context |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| from collections import Counter | ||
| from dataclasses import dataclass | ||
| from enum import IntEnum, nonmember | ||
|
|
||
|
|
||
| class TestResult(IntEnum): | ||
| """Represents the result of a test case run, indicating success or a specific failure reason.""" | ||
|
|
||
| SUCCESS = 0 | ||
| """ The test succeeded with the backend delegate part or all of the graph. """ | ||
|
|
||
| SUCCESS_UNDELEGATED = 1 | ||
| """ The test succeeded without the backend delegating anything. """ | ||
|
|
||
| EAGER_FAIL = 2 | ||
| """ The test failed due to the model failing to run in eager mode. """ | ||
|
|
||
| EXPORT_FAIL = 3 | ||
| """ The test failed due to the model failing to export. """ | ||
|
|
||
| LOWER_FAIL = 4 | ||
| """ The test failed due to a failure in partitioning or lowering. """ | ||
|
|
||
| PTE_LOAD_FAIL = 5 | ||
| """ The test failed due to the resulting PTE failing to load. """ | ||
|
|
||
| PTE_RUN_FAIL = 6 | ||
| """ The test failed due to the resulting PTE failing to run. """ | ||
|
|
||
| OUTPUT_MISMATCH_FAIL = 7 | ||
| """ The test failed due to a mismatch between runtime and reference outputs. """ | ||
|
|
||
| UNKNOWN_FAIL = 8 | ||
| """ The test failed in an unknown or unexpected manner. """ | ||
|
|
||
| @nonmember | ||
| def is_success(self): | ||
| return self in {TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED} | ||
|
|
||
| @nonmember | ||
| def is_non_backend_failure(self): | ||
| return self in {TestResult.EAGER_FAIL, TestResult.EAGER_FAIL} | ||
|
|
||
| @nonmember | ||
| def is_backend_failure(self): | ||
| return not self.is_success() and not self.is_non_backend_failure() | ||
|
|
||
| @nonmember | ||
| def display_name(self): | ||
| if self == TestResult.SUCCESS: | ||
| return "Success (Delegated)" | ||
| elif self == TestResult.SUCCESS_UNDELEGATED: | ||
| return "Success (Undelegated)" | ||
| elif self == TestResult.EAGER_FAIL: | ||
| return "Fail (Eager)" | ||
| elif self == TestResult.EXPORT_FAIL: | ||
| return "Fail (Export)" | ||
| elif self == TestResult.LOWER_FAIL: | ||
| return "Fail (Lowering)" | ||
| elif self == TestResult.PTE_LOAD_FAIL: | ||
| return "Fail (PTE Load)" | ||
| elif self == TestResult.PTE_RUN_FAIL: | ||
| return "Fail (PTE Run)" | ||
| elif self == TestResult.OUTPUT_MISMATCH_FAIL: | ||
| return "Fail (Output Mismatch)" | ||
| elif self == TestResult.UNKNOWN_FAIL: | ||
| return "Fail (Other)" | ||
| else: | ||
| raise ValueError(f"Invalid TestResult value: {self}.") | ||
|
|
||
|
|
||
| @dataclass | ||
| class TestCaseSummary: | ||
| """ | ||
| Contains summary results for the execution of a single test case. | ||
| """ | ||
|
|
||
| name: str | ||
| """ The qualified name of the test, not including the flow suffix. """ | ||
|
|
||
| flow: str | ||
| """ The backend-specific flow name. Corresponds to flows registered in backends/test/suite/__init__.py. """ | ||
|
|
||
| params: dict | None | ||
| """ Test-specific parameters, such as dtype. """ | ||
|
|
||
| result: TestResult | ||
| """ The top-level result, such as SUCCESS or LOWER_FAIL. """ | ||
|
|
||
| error: Exception | None | ||
| """ The Python exception object, if any. """ | ||
|
|
||
|
|
||
| class TestSessionState: | ||
| test_case_summaries: list[TestCaseSummary] | ||
|
|
||
| def __init__(self): | ||
| self.test_case_summaries = [] | ||
|
|
||
|
|
||
| @dataclass | ||
| class RunSummary: | ||
| aggregated_results: dict[TestResult, int] | ||
| num_test_cases: int | ||
| test_case_summaries: list[TestCaseSummary] | ||
| total_failed: int | ||
| total_passed: int | ||
| total_skipped: int | ||
|
|
||
| @classmethod | ||
| def from_session(cls, session: TestSessionState) -> "RunSummary": | ||
| # Total each outcome type. | ||
| aggregated_results = dict( | ||
| sorted(Counter(s.result for s in session.test_case_summaries).items()) | ||
| ) | ||
|
|
||
| total_failed = 0 | ||
| total_passed = 0 | ||
| total_skipped = 0 | ||
|
|
||
| for k, v in aggregated_results.items(): | ||
| if k.is_success(): | ||
| total_passed += v | ||
| elif k.is_backend_failure(): | ||
| total_failed += v | ||
| else: | ||
| total_skipped += v | ||
|
|
||
| return cls( | ||
| aggregated_results=aggregated_results, | ||
| num_test_cases=len(session.test_case_summaries), | ||
| test_case_summaries=session.test_case_summaries, | ||
| total_failed=total_failed, | ||
| total_passed=total_passed, | ||
| total_skipped=total_skipped, | ||
| ) | ||
|
|
||
|
|
||
| _active_session: TestSessionState | None = None | ||
|
|
||
|
|
||
| def begin_test_session(): | ||
| global _active_session | ||
|
|
||
| assert _active_session is None, "A test session is already active." | ||
| _active_session = TestSessionState() | ||
|
|
||
|
|
||
| def log_test_summary(summary: TestCaseSummary): | ||
| global _active_session | ||
|
|
||
| if _active_session is not None: | ||
| _active_session.test_case_summaries.append(summary) | ||
|
|
||
|
|
||
| def complete_test_session() -> RunSummary: | ||
| global _active_session | ||
|
|
||
| assert _active_session is not None, "No test session is active." | ||
| summary = RunSummary.from_session(_active_session) | ||
| _active_session = None | ||
|
|
||
| return summary | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.