diff --git a/backends/test/suite/context.py b/backends/test/suite/context.py index 5f12284ae21..16b22b89f87 100644 --- a/backends/test/suite/context.py +++ b/backends/test/suite/context.py @@ -1,8 +1,11 @@ # Test run context management. This is used to determine the test context for reporting # purposes. class TestContext: - def __init__(self, test_name: str, flow_name: str, params: dict | None): + def __init__( + self, test_name: str, test_base_name: str, flow_name: str, params: dict | None + ): self.test_name = test_name + self.test_base_name = test_base_name self.flow_name = flow_name self.params = params diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index e155e3382c5..700baa435fc 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -42,19 +42,19 @@ def _create_test( dtype: torch.dtype, use_dynamic_shapes: bool, ): + dtype_name = str(dtype)[6:] # strip "torch." + test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}" + if use_dynamic_shapes: + test_name += "_dynamic_shape" + def wrapped_test(self): params = { "dtype": dtype, "use_dynamic_shapes": use_dynamic_shapes, } - with TestContext(test_name, flow.name, params): + with TestContext(test_name, test_func.__name__, flow.name, params): test_func(self, flow, dtype, use_dynamic_shapes) - dtype_name = str(dtype)[6:] # strip "torch." - test_name = f"{test_func.__name__}_{flow.name}_{dtype_name}" - if use_dynamic_shapes: - test_name += "_dynamic_shape" - wrapped_test._name = test_func.__name__ # type: ignore wrapped_test._flow = flow # type: ignore @@ -118,6 +118,7 @@ def run_model_test( inputs, flow, context.test_name, + context.test_base_name, context.params, dynamic_shapes=dynamic_shapes, ) diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py index ec335562b39..8f7fbb1bc03 100644 --- a/backends/test/suite/operators/__init__.py +++ b/backends/test/suite/operators/__init__.py @@ -6,6 +6,7 @@ # pyre-unsafe +import copy import os import unittest @@ -90,12 +91,13 @@ def _expand_test(cls, test_name: str): def _make_wrapped_test( test_func: Callable, test_name: str, + test_base_name: str, flow: TestFlow, params: dict | None = None, ): def wrapped_test(self): - with TestContext(test_name, flow.name, params): - test_kwargs = params or {} + with TestContext(test_name, test_base_name, flow.name, params): + test_kwargs = copy.copy(params) or {} test_kwargs["flow"] = flow test_func(self, **test_kwargs) @@ -114,19 +116,22 @@ def _create_test_for_backend( test_type = getattr(test_func, "test_type", TestType.STANDARD) if test_type == TestType.STANDARD: - wrapped_test = _make_wrapped_test(test_func, test_func.__name__, flow) test_name = f"{test_func.__name__}_{flow.name}" + wrapped_test = _make_wrapped_test( + test_func, test_name, test_func.__name__, flow + ) setattr(cls, test_name, wrapped_test) elif test_type == TestType.DTYPE: for dtype in DTYPES: + dtype_name = str(dtype)[6:] # strip "torch." + test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}" wrapped_test = _make_wrapped_test( test_func, + test_name, test_func.__name__, flow, {"dtype": dtype}, ) - dtype_name = str(dtype)[6:] # strip "torch." - test_name = f"{test_func.__name__}_{dtype_name}_{flow.name}" setattr(cls, test_name, wrapped_test) else: raise NotImplementedError(f"Unknown test type {test_type}.") @@ -146,6 +151,7 @@ def _test_op( inputs, flow, context.test_name, + context.test_base_name, context.params, generate_random_test_inputs=generate_random_test_inputs, ) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index ad32a8c74c9..06c8ea952db 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,6 +1,9 @@ +import csv from collections import Counter from dataclasses import dataclass from enum import IntEnum +from functools import reduce +from typing import TextIO class TestResult(IntEnum): @@ -76,12 +79,18 @@ class TestCaseSummary: Contains summary results for the execution of a single test case. """ - name: str - """ The qualified name of the test, not including the flow suffix. """ + backend: str + """ The name of the target backend. """ + + base_name: str + """ The base name of the test, not including flow or parameter suffixes. """ flow: str """ The backend-specific flow name. Corresponds to flows registered in backends/test/suite/__init__.py. """ + name: str + """ The full name of test, including flow and parameter suffixes. """ + params: dict | None """ Test-specific parameters, such as dtype. """ @@ -162,3 +171,43 @@ def complete_test_session() -> RunSummary: _active_session = None return summary + + +def generate_csv_report(summary: RunSummary, output: TextIO): + """Write a run summary report to a file in CSV format.""" + + field_names = [ + "Test ID", + "Test Case", + "Backend", + "Flow", + "Result", + ] + + # Tests can have custom parameters. We'll want to report them here, so we need + # a list of all unique parameter names. + param_names = reduce( + lambda a, b: a.union(b), + ( + set(s.params.keys()) + for s in summary.test_case_summaries + if s.params is not None + ), + set(), + ) + field_names += (s.capitalize() for s in param_names) + + writer = csv.DictWriter(output, field_names) + writer.writeheader() + + for record in summary.test_case_summaries: + row = { + "Test ID": record.name, + "Test Case": record.base_name, + "Backend": record.backend, + "Flow": record.flow, + "Result": record.result.display_name(), + } + if record.params is not None: + row.update({k.capitalize(): v for k, v in record.params.items()}) + writer.writerow(row) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index dd6e3586628..59c4c4a33a4 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -13,6 +13,7 @@ from executorch.backends.test.suite.reporting import ( begin_test_session, complete_test_session, + generate_csv_report, RunSummary, TestCaseSummary, TestResult, @@ -31,6 +32,7 @@ def run_test( # noqa: C901 inputs: Any, flow: TestFlow, test_name: str, + test_base_name: str, params: dict | None, dynamic_shapes: Any | None = None, generate_random_test_inputs: bool = True, @@ -45,8 +47,10 @@ def build_result( result: TestResult, error: Exception | None = None ) -> TestCaseSummary: return TestCaseSummary( - name=test_name, + backend=flow.backend, + base_name=test_base_name, flow=flow.name, + name=test_name, params=params, result=result, error=error, @@ -171,6 +175,12 @@ def parse_args(): parser.add_argument( "-f", "--filter", nargs="?", help="A regular expression filter for test names." ) + parser.add_argument( + "-r", + "--report", + nargs="?", + help="A file to write the test report to, in CSV format.", + ) return parser.parse_args() @@ -199,6 +209,11 @@ def runner_main(): summary = complete_test_session() print_summary(summary) + if args.report is not None: + with open(args.report, "w") as f: + print(f"Writing CSV report to {args.report}.") + generate_csv_report(summary, f) + if __name__ == "__main__": runner_main() diff --git a/backends/test/suite/tests/README.md b/backends/test/suite/tests/README.md new file mode 100644 index 00000000000..09117e1cd31 --- /dev/null +++ b/backends/test/suite/tests/README.md @@ -0,0 +1,3 @@ +# Tests + +This directory contains meta-tests for the backend test suite. As the test suite contains a non-neglible amount of logic, these tests are useful to ensure that the test suite itself is working correctly. diff --git a/backends/test/suite/tests/__init__.py b/backends/test/suite/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py new file mode 100644 index 00000000000..5adda651082 --- /dev/null +++ b/backends/test/suite/tests/test_reporting.py @@ -0,0 +1,106 @@ +import unittest + +from csv import DictReader +from io import StringIO + +import torch + +from ..reporting import ( + generate_csv_report, + RunSummary, + TestCaseSummary, + TestResult, + TestSessionState, +) + +# Test data for simulated test results. +TEST_CASE_SUMMARIES = [ + TestCaseSummary( + backend="backend1", + base_name="test1", + flow="flow1", + name="test1_backend1_flow1", + params=None, + result=TestResult.SUCCESS, + error=None, + ), + TestCaseSummary( + backend="backend2", + base_name="test1", + flow="flow1", + name="test1_backend2_flow1", + params=None, + result=TestResult.LOWER_FAIL, + error=None, + ), + TestCaseSummary( + backend="backend1", + base_name="test2", + flow="flow1", + name="test2_backend1_flow1", + params={"dtype": torch.float32}, + result=TestResult.SUCCESS_UNDELEGATED, + error=None, + ), + TestCaseSummary( + backend="backend2", + base_name="test2", + flow="flow1", + name="test2_backend2_flow1", + params={"use_dynamic_shapes": True}, + result=TestResult.EXPORT_FAIL, + error=None, + ), +] + + +class Reporting(unittest.TestCase): + def test_csv_report_simple(self): + # Verify the format of a simple CSV run report. + session_state = TestSessionState() + session_state.test_case_summaries.extend(TEST_CASE_SUMMARIES) + run_summary = RunSummary.from_session(session_state) + + strio = StringIO() + generate_csv_report(run_summary, strio) + + # Attempt to deserialize and validate the CSV report. + report = DictReader(StringIO(strio.getvalue())) + records = list(report) + self.assertEqual(len(records), 4) + + # Validate first record: test1, backend1, SUCCESS + self.assertEqual(records[0]["Test ID"], "test1_backend1_flow1") + self.assertEqual(records[0]["Test Case"], "test1") + self.assertEqual(records[0]["Backend"], "backend1") + self.assertEqual(records[0]["Flow"], "flow1") + self.assertEqual(records[0]["Result"], "Success (Delegated)") + self.assertEqual(records[0]["Dtype"], "") + self.assertEqual(records[0]["Use_dynamic_shapes"], "") + + # Validate second record: test1, backend2, LOWER_FAIL + self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1") + self.assertEqual(records[1]["Test Case"], "test1") + self.assertEqual(records[1]["Backend"], "backend2") + self.assertEqual(records[1]["Flow"], "flow1") + self.assertEqual(records[1]["Result"], "Fail (Lowering)") + self.assertEqual(records[1]["Dtype"], "") + self.assertEqual(records[1]["Use_dynamic_shapes"], "") + + # Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param + self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1") + self.assertEqual(records[2]["Test Case"], "test2") + self.assertEqual(records[2]["Backend"], "backend1") + self.assertEqual(records[2]["Flow"], "flow1") + self.assertEqual(records[2]["Result"], "Success (Undelegated)") + self.assertEqual(records[2]["Dtype"], str(torch.float32)) + self.assertEqual(records[2]["Use_dynamic_shapes"], "") + + # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param + self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1") + self.assertEqual(records[3]["Test Case"], "test2") + self.assertEqual(records[3]["Backend"], "backend2") + self.assertEqual(records[3]["Flow"], "flow1") + self.assertEqual(records[3]["Result"], "Fail (Export)") + self.assertEqual(records[3]["Dtype"], "") + self.assertEqual(records[3]["Use_dynamic_shapes"], "True")