[Backend Tester] Report delegation statistics

GregoryComer · GregoryComer · commit dd62a070cdc7 · 2025-07-30T23:26:10.000-07:00
ghstack-source-id: cc7e564 ghstack-comment-id: 3115647824 Pull-Request: pytorch#12846
diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py
@@ -1,12 +1,22 @@
 import csv
+
 from collections import Counter
 from dataclasses import dataclass
 from datetime import timedelta
 from enum import IntEnum
 from functools import reduce
-from typing import TextIO
+from typing import Any, TextIO
 
 from executorch.backends.test.harness.error_statistics import ErrorStatistics
+from torch.export import ExportedProgram
+
+
+# Operators that are excluded from the counts returned by count_ops. These are used to
+# exclude operatations that are not logically relevant or delegatable to backends.
+OP_COUNT_IGNORED_OPS = {
+    "executorch_call_delegate",
+    "getitem",
+}
 
 
 class TestResult(IntEnum):
@@ -115,6 +125,12 @@ class TestCaseSummary:
     lower_time: timedelta | None = None
     """ The total runtime of the to_edge_transform_and_lower stage, or none, if the test did not run the quantize stage. """
 
+    delegated_op_counts: Counter | None = None
+    """ The number of delegated occurances of each operator in the graph. """
+
+    undelegated_op_counts: Counter | None = None
+    """ The number of undelegated occurances of each operator in the graph. """
+
 
 class TestSessionState:
     test_case_summaries: list[TestCaseSummary]
@@ -164,6 +180,40 @@ def from_session(cls, session: TestSessionState) -> "RunSummary":
 _active_session: TestSessionState | None = None
 
 
+def _get_target_name(target: Any) -> str:
+    """Retrieve a string representation of a node target."""
+    if isinstance(target, str):
+        return target
+    elif hasattr(target, "name"):
+        return target.name()  # Op overloads have this
+    elif hasattr(target, "__name__"):
+        return target.__name__  # Some builtins have this
+    else:
+        return str(target)
+
+
+def _count_ops(program: ExportedProgram) -> Counter:
+    op_names = (
+        _get_target_name(n.target)
+        for n in program.graph.nodes
+        if n.op == "call_function"
+    )
+
+    return Counter(op for op in op_names if op not in OP_COUNT_IGNORED_OPS)
+
+
+def count_ops(program: dict[str, ExportedProgram] | ExportedProgram) -> Counter:
+    if isinstance(program, ExportedProgram):
+        return _count_ops(program)
+    else:
+        # Sum op counts for all methods in the program.
+        return reduce(
+            lambda a, b: a + b,
+            (_count_ops(p) for p in program.values()),
+            Counter(),
+        )
+
+
 def begin_test_session():
     global _active_session
 
@@ -188,6 +238,24 @@ def complete_test_session() -> RunSummary:
     return summary
 
 
+def _sum_op_counts(counter: Counter | None) -> int | None:
+    """
+    A utility function to count the total number of nodes in an op count dict.
+    """
+    return sum(counter.values()) if counter is not None else None
+
+
+def _serialize_op_counts(counter: Counter | None) -> str:
+    """
+    A utility function to serialize op counts to a string, for the purpose of including
+    in the test report.
+    """
+    if counter is not None:
+        return str(dict(sorted(counter.items())))
+    else:
+        return ""
+
+
 def generate_csv_report(summary: RunSummary, output: TextIO):
     """Write a run summary report to a file in CSV format."""
 
@@ -228,6 +296,14 @@ def generate_csv_report(summary: RunSummary, output: TextIO):
                 f"Output {i} SQNR",
             ]
         )
+    field_names.extend(
+        [
+            "Delegated Nodes",
+            "Undelegated Nodes",
+            "Delegated Ops",
+            "Undelegated Ops",
+        ]
+    )
 
     writer = csv.DictWriter(output, field_names)
     writer.writeheader()
@@ -256,4 +332,9 @@ def generate_csv_report(summary: RunSummary, output: TextIO):
             row[f"Output {output_idx} Error L2"] = error_stats.error_l2_norm
             row[f"Output {output_idx} SQNR"] = error_stats.sqnr
 
+        row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts)
+        row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts)
+        row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts)
+        row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts)
+
         writer.writerow(row)
diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py
@@ -16,11 +16,13 @@
 from executorch.backends.test.suite.reporting import (
     begin_test_session,
     complete_test_session,
+    count_ops,
     generate_csv_report,
     RunSummary,
     TestCaseSummary,
     TestResult,
 )
+from executorch.exir import EdgeProgramManager
 
 
 # A list of all runnable test suites and the corresponding python package.
@@ -106,6 +108,14 @@ def build_result(
         extra_stats["lower_time"] = timedelta(seconds=elapsed)
         return build_result(TestResult.LOWER_FAIL, e)
 
+    edge_manager: EdgeProgramManager = tester.get_artifact()
+    edge_op_counts = count_ops(edge_manager.original_edge_programs)
+    undelegated_op_counts = count_ops(edge_manager._edge_programs)
+    delegated_op_counts = edge_op_counts - undelegated_op_counts
+
+    extra_stats["delegated_op_counts"] = delegated_op_counts
+    extra_stats["undelegated_op_counts"] = undelegated_op_counts
+
     is_delegated = any(
         n.target == torch._higher_order_ops.executorch_call_delegate
         for n in tester.stages[tester.cur].graph_module.graph.nodes
diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py
@@ -5,7 +5,10 @@
 
 import torch
 
+from executorch.exir import to_edge
+
 from ..reporting import (
+    count_ops,
     generate_csv_report,
     RunSummary,
     TestCaseSummary,
@@ -23,6 +26,7 @@
         params=None,
         result=TestResult.SUCCESS,
         error=None,
+        tensor_error_statistics=[],
     ),
     TestCaseSummary(
         backend="backend2",
@@ -32,6 +36,7 @@
         params=None,
         result=TestResult.LOWER_FAIL,
         error=None,
+        tensor_error_statistics=[],
     ),
     TestCaseSummary(
         backend="backend1",
@@ -41,6 +46,7 @@
         params={"dtype": torch.float32},
         result=TestResult.SUCCESS_UNDELEGATED,
         error=None,
+        tensor_error_statistics=[],
     ),
     TestCaseSummary(
         backend="backend2",
@@ -50,6 +56,7 @@
         params={"use_dynamic_shapes": True},
         result=TestResult.EXPORT_FAIL,
         error=None,
+        tensor_error_statistics=[],
     ),
 ]
 
@@ -104,3 +111,32 @@ def test_csv_report_simple(self):
         self.assertEqual(records[3]["Result"], "Fail (Export)")
         self.assertEqual(records[3]["Dtype"], "")
         self.assertEqual(records[3]["Use_dynamic_shapes"], "True")
+
+    def test_count_ops(self):
+        """
+        Verify that the count_ops function correctly counts operator occurances in the edge graph.
+        """
+
+        class Model1(torch.nn.Module):
+            def forward(self, x, y):
+                return x + y
+
+        class Model2(torch.nn.Module):
+            def forward(self, x, y):
+                return x + y * y
+
+        args = (torch.randn(2), torch.randn(2))
+        ep1 = torch.export.export(Model1(), args)
+        ep2 = torch.export.export(Model2(), args)
+
+        ep = to_edge({"forward1": ep1, "forward2": ep2})
+
+        op_counts = count_ops(ep._edge_programs)
+
+        self.assertEqual(
+            op_counts,
+            {
+                "aten::add.Tensor": 2,
+                "aten::mul.Tensor": 1,
+            },
+        )
diff --git a/exir/program/_program.py b/exir/program/_program.py
@@ -1179,6 +1179,7 @@ def _gen_edge_manager_for_partitioners(
         config,
         list(set().union(*ops_set_to_not_decompose_by_program.values())),
     )
+
     return edge_manager
 
 
@@ -1410,6 +1411,8 @@ class EdgeProgramManager:
     Manages the second link in the lowering chain of ATen -> Edge -> ExecuTorch.
     """
 
+    original_edge_programs: dict[str, ExportedProgram] | None = None
+
     def __init__(
         self,
         edge_programs: Union[ExportedProgram, Dict[str, ExportedProgram]],
@@ -1558,12 +1561,17 @@ def to_backend(
 
         new_edge_programs = to_backend(method_to_programs_and_partitioners)
         config = EdgeCompileConfig(_check_ir_validity=False)
-        return EdgeProgramManager(
+        new_edge_manager = EdgeProgramManager(
             new_edge_programs,
             copy.deepcopy(self._config_methods),
             config,
         )
 
+        # Placeholder - not for land
+        new_edge_manager.original_edge_programs = copy.deepcopy(self._edge_programs)
+
+        return new_edge_manager
+
     @et_logger("to_executorch")
     def to_executorch(
         self,
diff --git a/pytest.ini b/pytest.ini
@@ -48,6 +48,8 @@ addopts =
     # is stable and signal to noise ratio is good (no irrelevant failures).
     # See https://github.com/pytorch/executorch/discussions/11140
     --ignore=backends/test
+    backends/test/harness/tests
+    backends/test/suite/tests
     # backends/xnnpack
     backends/xnnpack/test/ops
     --ignore=backends/xnnpack/test/ops/test_bmm.py