[Backend Tester] Report delegation statistics

GregoryComer · GregoryComer · commit 6eba97f6007a · 2025-08-08T16:15:23.000-07:00
ghstack-source-id: 0604da3 ghstack-comment-id: 3115647824 Pull-Request: #12846
diff --git a/backends/qualcomm/tests/tester.py b/backends/qualcomm/tests/tester.py
@@ -52,7 +52,9 @@ def __init__(
             default_partitioner_cls=QnnPartitioner,
         )
 
-    def run(self, artifact: ExportedProgram, inputs=None) -> None:
+    def run(
+        self, artifact: ExportedProgram, inputs=None, generate_etrecord: bool = False
+    ) -> None:
         ep = QnnPassManager().transform_for_export_pipeline(artifact)
         transform_passes = QnnPassManager().get_to_edge_transform_passes(ep)
 
@@ -61,6 +63,7 @@ def run(self, artifact: ExportedProgram, inputs=None) -> None:
             transform_passes=transform_passes,
             partitioner=self.partitioners,
             compile_config=self.edge_compile_conf,
+            generate_etrecord=generate_etrecord,
         )
 
 
diff --git a/backends/test/harness/stages/to_edge_transform_and_lower.py b/backends/test/harness/stages/to_edge_transform_and_lower.py
@@ -7,6 +7,8 @@
     to_edge_transform_and_lower,
 )
 from executorch.exir.backend.partitioner import Partitioner
+
+from sympy.ntheory import generate
 from torch.export import ExportedProgram
 
 
@@ -24,11 +26,14 @@ def __init__(
     def stage_type(self) -> StageType:
         return StageType.TO_EDGE_TRANSFORM_AND_LOWER
 
-    def run(self, artifact: ExportedProgram, inputs=None) -> None:
+    def run(
+        self, artifact: ExportedProgram, inputs=None, generate_etrecord: bool = False
+    ) -> None:
         self.edge_dialect_program = to_edge_transform_and_lower(
             artifact,
             compile_config=self.edge_compile_conf,
             partitioner=self.partitioners,
+            generate_etrecord=generate_etrecord,
         )
 
     @property
diff --git a/backends/test/harness/tester.py b/backends/test/harness/tester.py
@@ -183,10 +183,10 @@ def _post(self, stage):
         assert stage_type in self.stages
         self.stages[stage_type] = stage
 
-    def _run_stage(self, stage_instance, inputs=None):
+    def _run_stage(self, stage_instance, inputs=None, *args, **kwargs):
         assert isinstance(stage_instance, Stage)
         prev_stage_artifact = self._pre(stage_instance)
-        stage_instance.run(prev_stage_artifact, inputs=inputs)
+        stage_instance.run(prev_stage_artifact, inputs=inputs, *args, **kwargs)
         self._post(stage_instance)
         return self
 
@@ -213,11 +213,14 @@ def to_edge(self, to_edge_stage: Optional[ToEdge] = None):
         return res
 
     def to_edge_transform_and_lower(
-        self, to_edge_and_transform_stage: Optional[ToEdgeTransformAndLower] = None
+        self,
+        to_edge_and_transform_stage: Optional[ToEdgeTransformAndLower] = None,
+        generate_etrecord: bool = False,
     ):
         return self._run_stage(
             to_edge_and_transform_stage
-            or self._get_default_stage(StageType.TO_EDGE_TRANSFORM_AND_LOWER)
+            or self._get_default_stage(StageType.TO_EDGE_TRANSFORM_AND_LOWER),
+            generate_etrecord=generate_etrecord,
         )
 
     def run_passes(self, run_passes_stage: Optional[RunPasses] = None):
diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py
@@ -1,12 +1,22 @@
 import csv
+
 from collections import Counter
 from dataclasses import dataclass
 from datetime import timedelta
 from enum import IntEnum
 from functools import reduce
-from typing import TextIO
+from typing import Any, TextIO
 
 from executorch.backends.test.harness.error_statistics import ErrorStatistics
+from torch.export import ExportedProgram
+
+
+# Operators that are excluded from the counts returned by count_ops. These are used to
+# exclude operatations that are not logically relevant or delegatable to backends.
+OP_COUNT_IGNORED_OPS = {
+    "executorch_call_delegate",
+    "getitem",
+}
 
 
 class TestResult(IntEnum):
@@ -115,6 +125,12 @@ class TestCaseSummary:
     lower_time: timedelta | None = None
     """ The total runtime of the to_edge_transform_and_lower stage, or none, if the test did not run the quantize stage. """
 
+    delegated_op_counts: Counter | None = None
+    """ The number of delegated occurances of each operator in the graph. """
+
+    undelegated_op_counts: Counter | None = None
+    """ The number of undelegated occurances of each operator in the graph. """
+
 
 class TestSessionState:
     test_case_summaries: list[TestCaseSummary]
@@ -164,6 +180,40 @@ def from_session(cls, session: TestSessionState) -> "RunSummary":
 _active_session: TestSessionState | None = None
 
 
+def _get_target_name(target: Any) -> str:
+    """Retrieve a string representation of a node target."""
+    if isinstance(target, str):
+        return target
+    elif hasattr(target, "name"):
+        return target.name()  # Op overloads have this
+    elif hasattr(target, "__name__"):
+        return target.__name__  # Some builtins have this
+    else:
+        return str(target)
+
+
+def _count_ops(program: ExportedProgram) -> Counter:
+    op_names = (
+        _get_target_name(n.target)
+        for n in program.graph.nodes
+        if n.op == "call_function"
+    )
+
+    return Counter(op for op in op_names if op not in OP_COUNT_IGNORED_OPS)
+
+
+def count_ops(program: dict[str, ExportedProgram] | ExportedProgram) -> Counter:
+    if isinstance(program, ExportedProgram):
+        return _count_ops(program)
+    else:
+        # Sum op counts for all methods in the program.
+        return reduce(
+            lambda a, b: a + b,
+            (_count_ops(p) for p in program.values()),
+            Counter(),
+        )
+
+
 def begin_test_session():
     global _active_session
 
@@ -188,6 +238,24 @@ def complete_test_session() -> RunSummary:
     return summary
 
 
+def _sum_op_counts(counter: Counter | None) -> int | None:
+    """
+    A utility function to count the total number of nodes in an op count dict.
+    """
+    return sum(counter.values()) if counter is not None else None
+
+
+def _serialize_op_counts(counter: Counter | None) -> str:
+    """
+    A utility function to serialize op counts to a string, for the purpose of including
+    in the test report.
+    """
+    if counter is not None:
+        return str(dict(sorted(counter.items())))
+    else:
+        return ""
+
+
 def generate_csv_report(summary: RunSummary, output: TextIO):
     """Write a run summary report to a file in CSV format."""
 
@@ -228,6 +296,14 @@ def generate_csv_report(summary: RunSummary, output: TextIO):
                 f"Output {i} SQNR",
             ]
         )
+    field_names.extend(
+        [
+            "Delegated Nodes",
+            "Undelegated Nodes",
+            "Delegated Ops",
+            "Undelegated Ops",
+        ]
+    )
 
     writer = csv.DictWriter(output, field_names)
     writer.writeheader()
@@ -256,4 +332,9 @@ def generate_csv_report(summary: RunSummary, output: TextIO):
             row[f"Output {output_idx} Error L2"] = error_stats.error_l2_norm
             row[f"Output {output_idx} SQNR"] = error_stats.sqnr
 
+        row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts)
+        row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts)
+        row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts)
+        row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts)
+
         writer.writerow(row)
diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py
@@ -16,11 +16,13 @@
 from executorch.backends.test.suite.reporting import (
     begin_test_session,
     complete_test_session,
+    count_ops,
     generate_csv_report,
     RunSummary,
     TestCaseSummary,
     TestResult,
 )
+from executorch.exir import EdgeProgramManager
 
 
 # A list of all runnable test suites and the corresponding python package.
@@ -98,14 +100,25 @@ def build_result(
 
     lower_start_time = time.perf_counter()
     try:
-        tester.to_edge_transform_and_lower()
+        tester.to_edge_transform_and_lower(generate_etrecord=True)
         elapsed = time.perf_counter() - lower_start_time
         extra_stats["lower_time"] = timedelta(seconds=elapsed)
     except Exception as e:
         elapsed = time.perf_counter() - lower_start_time
         extra_stats["lower_time"] = timedelta(seconds=elapsed)
         return build_result(TestResult.LOWER_FAIL, e)
 
+    # Compute delegation statistics. Use the ETRecord to access the edge dialect graph between
+    # to_edge and delegation. Note that ETRecord only stores the edge dialect graph for a single
+    # method currently and assumes it is called "forward".
+    edge_manager: EdgeProgramManager = tester.get_artifact()
+    edge_op_counts = count_ops({"forward": edge_manager._etrecord.edge_dialect_program})
+    undelegated_op_counts = count_ops(edge_manager._edge_programs)
+    delegated_op_counts = edge_op_counts - undelegated_op_counts
+
+    extra_stats["delegated_op_counts"] = delegated_op_counts
+    extra_stats["undelegated_op_counts"] = undelegated_op_counts
+
     is_delegated = any(
         n.target == torch._higher_order_ops.executorch_call_delegate
         for n in tester.stages[tester.cur].graph_module.graph.nodes
@@ -127,7 +140,7 @@ def build_result(
         try:
             tester.run_method_and_compare_outputs(
                 inputs=None if generate_random_test_inputs else inputs,
-                statistics_callback=lambda stats: error_statistics.append(stats)
+                statistics_callback=lambda stats: error_statistics.append(stats),
             )
         except AssertionError as e:
             return build_result(TestResult.OUTPUT_MISMATCH_FAIL, e)
diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py
@@ -5,7 +5,10 @@
 
 import torch
 
+from executorch.exir import to_edge
+
 from ..reporting import (
+    count_ops,
     generate_csv_report,
     RunSummary,
     TestCaseSummary,
@@ -23,6 +26,7 @@
         params=None,
         result=TestResult.SUCCESS,
         error=None,
+        tensor_error_statistics=[],
     ),
     TestCaseSummary(
         backend="backend2",
@@ -32,6 +36,7 @@
         params=None,
         result=TestResult.LOWER_FAIL,
         error=None,
+        tensor_error_statistics=[],
     ),
     TestCaseSummary(
         backend="backend1",
@@ -41,6 +46,7 @@
         params={"dtype": torch.float32},
         result=TestResult.SUCCESS_UNDELEGATED,
         error=None,
+        tensor_error_statistics=[],
     ),
     TestCaseSummary(
         backend="backend2",
@@ -50,6 +56,7 @@
         params={"use_dynamic_shapes": True},
         result=TestResult.EXPORT_FAIL,
         error=None,
+        tensor_error_statistics=[],
     ),
 ]
 
@@ -104,3 +111,32 @@ def test_csv_report_simple(self):
         self.assertEqual(records[3]["Result"], "Fail (Export)")
         self.assertEqual(records[3]["Dtype"], "")
         self.assertEqual(records[3]["Use_dynamic_shapes"], "True")
+
+    def test_count_ops(self):
+        """
+        Verify that the count_ops function correctly counts operator occurances in the edge graph.
+        """
+
+        class Model1(torch.nn.Module):
+            def forward(self, x, y):
+                return x + y
+
+        class Model2(torch.nn.Module):
+            def forward(self, x, y):
+                return x + y * y
+
+        args = (torch.randn(2), torch.randn(2))
+        ep1 = torch.export.export(Model1(), args)
+        ep2 = torch.export.export(Model2(), args)
+
+        ep = to_edge({"forward1": ep1, "forward2": ep2})
+
+        op_counts = count_ops(ep._edge_programs)
+
+        self.assertEqual(
+            op_counts,
+            {
+                "aten::add.Tensor": 2,
+                "aten::mul.Tensor": 1,
+            },
+        )
diff --git a/pytest.ini b/pytest.ini
@@ -48,6 +48,8 @@ addopts =
     # is stable and signal to noise ratio is good (no irrelevant failures).
     # See https://github.com/pytorch/executorch/discussions/11140
     --ignore=backends/test
+    backends/test/harness/tests
+    backends/test/suite/tests
     # backends/xnnpack
     backends/xnnpack/test/ops
     --ignore=backends/xnnpack/test/ops/test_bmm.py

Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,9 @@ def __init__(`
`52`	`52`	`default_partitioner_cls=QnnPartitioner,`
`53`	`53`	`)`
`54`	`54`
`55`		`- def run(self, artifact: ExportedProgram, inputs=None) -> None:`
	`55`	`+ def run(`
	`56`	`+ self, artifact: ExportedProgram, inputs=None, generate_etrecord: bool = False`
	`57`	`+ ) -> None:`
`56`	`58`	`ep = QnnPassManager().transform_for_export_pipeline(artifact)`
`57`	`59`	`transform_passes = QnnPassManager().get_to_edge_transform_passes(ep)`
`58`	`60`
`@@ -61,6 +63,7 @@ def run(self, artifact: ExportedProgram, inputs=None) -> None:`
`61`	`63`	`transform_passes=transform_passes,`
`62`	`64`	`partitioner=self.partitioners,`
`63`	`65`	`compile_config=self.edge_compile_conf,`
	`66`	`+ generate_etrecord=generate_etrecord,`
`64`	`67`	`)`
`65`	`68`
`66`	`69`