diff --git a/backends/qualcomm/tests/tester.py b/backends/qualcomm/tests/tester.py index 58dda07ef46..fb34087ac90 100644 --- a/backends/qualcomm/tests/tester.py +++ b/backends/qualcomm/tests/tester.py @@ -52,7 +52,9 @@ def __init__( default_partitioner_cls=QnnPartitioner, ) - def run(self, artifact: ExportedProgram, inputs=None) -> None: + def run( + self, artifact: ExportedProgram, inputs=None, generate_etrecord: bool = False + ) -> None: ep = QnnPassManager().transform_for_export_pipeline(artifact) transform_passes = QnnPassManager().get_to_edge_transform_passes(ep) @@ -61,6 +63,7 @@ def run(self, artifact: ExportedProgram, inputs=None) -> None: transform_passes=transform_passes, partitioner=self.partitioners, compile_config=self.edge_compile_conf, + generate_etrecord=generate_etrecord, ) diff --git a/backends/test/harness/stages/to_edge_transform_and_lower.py b/backends/test/harness/stages/to_edge_transform_and_lower.py index 6c5aa4b541b..0949b633c5d 100644 --- a/backends/test/harness/stages/to_edge_transform_and_lower.py +++ b/backends/test/harness/stages/to_edge_transform_and_lower.py @@ -7,6 +7,7 @@ to_edge_transform_and_lower, ) from executorch.exir.backend.partitioner import Partitioner + from torch.export import ExportedProgram @@ -24,11 +25,14 @@ def __init__( def stage_type(self) -> StageType: return StageType.TO_EDGE_TRANSFORM_AND_LOWER - def run(self, artifact: ExportedProgram, inputs=None) -> None: + def run( + self, artifact: ExportedProgram, inputs=None, generate_etrecord: bool = False + ) -> None: self.edge_dialect_program = to_edge_transform_and_lower( artifact, compile_config=self.edge_compile_conf, partitioner=self.partitioners, + generate_etrecord=generate_etrecord, ) @property diff --git a/backends/test/harness/tester.py b/backends/test/harness/tester.py index 2782fc7bb29..7e5b558aff0 100644 --- a/backends/test/harness/tester.py +++ b/backends/test/harness/tester.py @@ -183,10 +183,10 @@ def _post(self, stage): assert stage_type in self.stages self.stages[stage_type] = stage - def _run_stage(self, stage_instance, inputs=None): + def _run_stage(self, stage_instance, inputs=None, *args, **kwargs): assert isinstance(stage_instance, Stage) prev_stage_artifact = self._pre(stage_instance) - stage_instance.run(prev_stage_artifact, inputs=inputs) + stage_instance.run(prev_stage_artifact, inputs=inputs, *args, **kwargs) # noqa self._post(stage_instance) return self @@ -213,11 +213,14 @@ def to_edge(self, to_edge_stage: Optional[ToEdge] = None): return res def to_edge_transform_and_lower( - self, to_edge_and_transform_stage: Optional[ToEdgeTransformAndLower] = None + self, + to_edge_and_transform_stage: Optional[ToEdgeTransformAndLower] = None, + generate_etrecord: bool = False, ): return self._run_stage( to_edge_and_transform_stage - or self._get_default_stage(StageType.TO_EDGE_TRANSFORM_AND_LOWER) + or self._get_default_stage(StageType.TO_EDGE_TRANSFORM_AND_LOWER), + generate_etrecord=generate_etrecord, ) def run_passes(self, run_passes_stage: Optional[RunPasses] = None): diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index e054bb1685b..22affcaee84 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,12 +1,22 @@ import csv + from collections import Counter from dataclasses import dataclass from datetime import timedelta from enum import IntEnum from functools import reduce -from typing import TextIO +from typing import Any, TextIO from executorch.backends.test.harness.error_statistics import ErrorStatistics +from torch.export import ExportedProgram + + +# Operators that are excluded from the counts returned by count_ops. These are used to +# exclude operatations that are not logically relevant or delegatable to backends. +OP_COUNT_IGNORED_OPS = { + "executorch_call_delegate", + "getitem", +} class TestResult(IntEnum): @@ -115,6 +125,12 @@ class TestCaseSummary: lower_time: timedelta | None = None """ The total runtime of the to_edge_transform_and_lower stage, or none, if the test did not run the quantize stage. """ + delegated_op_counts: Counter | None = None + """ The number of delegated occurances of each operator in the graph. """ + + undelegated_op_counts: Counter | None = None + """ The number of undelegated occurances of each operator in the graph. """ + class TestSessionState: test_case_summaries: list[TestCaseSummary] @@ -164,6 +180,40 @@ def from_session(cls, session: TestSessionState) -> "RunSummary": _active_session: TestSessionState | None = None +def _get_target_name(target: Any) -> str: + """Retrieve a string representation of a node target.""" + if isinstance(target, str): + return target + elif hasattr(target, "name"): + return target.name() # Op overloads have this + elif hasattr(target, "__name__"): + return target.__name__ # Some builtins have this + else: + return str(target) + + +def _count_ops(program: ExportedProgram) -> Counter: + op_names = ( + _get_target_name(n.target) + for n in program.graph.nodes + if n.op == "call_function" + ) + + return Counter(op for op in op_names if op not in OP_COUNT_IGNORED_OPS) + + +def count_ops(program: dict[str, ExportedProgram] | ExportedProgram) -> Counter: + if isinstance(program, ExportedProgram): + return _count_ops(program) + else: + # Sum op counts for all methods in the program. + return reduce( + lambda a, b: a + b, + (_count_ops(p) for p in program.values()), + Counter(), + ) + + def begin_test_session(): global _active_session @@ -188,6 +238,24 @@ def complete_test_session() -> RunSummary: return summary +def _sum_op_counts(counter: Counter | None) -> int | None: + """ + A utility function to count the total number of nodes in an op count dict. + """ + return sum(counter.values()) if counter is not None else None + + +def _serialize_op_counts(counter: Counter | None) -> str: + """ + A utility function to serialize op counts to a string, for the purpose of including + in the test report. + """ + if counter is not None: + return str(dict(sorted(counter.items()))) + else: + return "" + + def generate_csv_report(summary: RunSummary, output: TextIO): """Write a run summary report to a file in CSV format.""" @@ -228,6 +296,14 @@ def generate_csv_report(summary: RunSummary, output: TextIO): f"Output {i} SQNR", ] ) + field_names.extend( + [ + "Delegated Nodes", + "Undelegated Nodes", + "Delegated Ops", + "Undelegated Ops", + ] + ) writer = csv.DictWriter(output, field_names) writer.writeheader() @@ -256,4 +332,9 @@ def generate_csv_report(summary: RunSummary, output: TextIO): row[f"Output {output_idx} Error L2"] = error_stats.error_l2_norm row[f"Output {output_idx} SQNR"] = error_stats.sqnr + row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts) + row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts) + row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts) + row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts) + writer.writerow(row) diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 6ce9c788432..c57483455a3 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -16,11 +16,13 @@ from executorch.backends.test.suite.reporting import ( begin_test_session, complete_test_session, + count_ops, generate_csv_report, RunSummary, TestCaseSummary, TestResult, ) +from executorch.exir import EdgeProgramManager # A list of all runnable test suites and the corresponding python package. @@ -98,7 +100,7 @@ def build_result( lower_start_time = time.perf_counter() try: - tester.to_edge_transform_and_lower() + tester.to_edge_transform_and_lower(generate_etrecord=True) elapsed = time.perf_counter() - lower_start_time extra_stats["lower_time"] = timedelta(seconds=elapsed) except Exception as e: @@ -106,6 +108,17 @@ def build_result( extra_stats["lower_time"] = timedelta(seconds=elapsed) return build_result(TestResult.LOWER_FAIL, e) + # Compute delegation statistics. Use the ETRecord to access the edge dialect graph between + # to_edge and delegation. Note that ETRecord only stores the edge dialect graph for a single + # method currently and assumes it is called "forward". + edge_manager: EdgeProgramManager = tester.get_artifact() + edge_op_counts = count_ops({"forward": edge_manager._etrecord.edge_dialect_program}) + undelegated_op_counts = count_ops(edge_manager._edge_programs) + delegated_op_counts = edge_op_counts - undelegated_op_counts + + extra_stats["delegated_op_counts"] = delegated_op_counts + extra_stats["undelegated_op_counts"] = undelegated_op_counts + is_delegated = any( n.target == torch._higher_order_ops.executorch_call_delegate for n in tester.stages[tester.cur].graph_module.graph.nodes diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index 5adda651082..3b711e45949 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -5,7 +5,10 @@ import torch +from executorch.exir import to_edge + from ..reporting import ( + count_ops, generate_csv_report, RunSummary, TestCaseSummary, @@ -23,6 +26,7 @@ params=None, result=TestResult.SUCCESS, error=None, + tensor_error_statistics=[], ), TestCaseSummary( backend="backend2", @@ -32,6 +36,7 @@ params=None, result=TestResult.LOWER_FAIL, error=None, + tensor_error_statistics=[], ), TestCaseSummary( backend="backend1", @@ -41,6 +46,7 @@ params={"dtype": torch.float32}, result=TestResult.SUCCESS_UNDELEGATED, error=None, + tensor_error_statistics=[], ), TestCaseSummary( backend="backend2", @@ -50,6 +56,7 @@ params={"use_dynamic_shapes": True}, result=TestResult.EXPORT_FAIL, error=None, + tensor_error_statistics=[], ), ] @@ -104,3 +111,32 @@ def test_csv_report_simple(self): self.assertEqual(records[3]["Result"], "Fail (Export)") self.assertEqual(records[3]["Dtype"], "") self.assertEqual(records[3]["Use_dynamic_shapes"], "True") + + def test_count_ops(self): + """ + Verify that the count_ops function correctly counts operator occurances in the edge graph. + """ + + class Model1(torch.nn.Module): + def forward(self, x, y): + return x + y + + class Model2(torch.nn.Module): + def forward(self, x, y): + return x + y * y + + args = (torch.randn(2), torch.randn(2)) + ep1 = torch.export.export(Model1(), args) + ep2 = torch.export.export(Model2(), args) + + ep = to_edge({"forward1": ep1, "forward2": ep2}) + + op_counts = count_ops(ep._edge_programs) + + self.assertEqual( + op_counts, + { + "aten::add.Tensor": 2, + "aten::mul.Tensor": 1, + }, + ) diff --git a/pytest.ini b/pytest.ini index da56ddbd8d5..aae87f242a7 100644 --- a/pytest.ini +++ b/pytest.ini @@ -48,6 +48,8 @@ addopts = # is stable and signal to noise ratio is good (no irrelevant failures). # See https://github.com/pytorch/executorch/discussions/11140 --ignore=backends/test + backends/test/harness/tests + backends/test/suite/tests # backends/xnnpack backends/xnnpack/test/ops --ignore=backends/xnnpack/test/ops/test_bmm.py