[Cadence] move ETDump to OSS (pytorch#14616)

pytorchbot · Zonglin Peng · web-flow · commit b5308f526817 · 2025-09-29T09:46:03.000-04:00
This PR was created by the merge bot to help merge the original PR into the main branch. ghstack PR number: pytorch#14556 by @zonglinpeng ^ Please use this as the source of truth for the PR details, comments, and reviews ghstack PR base: https://github.com/pytorch/executorch/tree/gh/zonglinpeng/4/base ghstack PR head: https://github.com/pytorch/executorch/tree/gh/zonglinpeng/4/head Merge bot PR base: https://github.com/pytorch/executorch/tree/main Merge bot PR head: https://github.com/pytorch/executorch/tree/gh/zonglinpeng/4/orig @diff-train-skip-merge Co-authored-by: Zonglin Peng <zonglinpeng@fb.com>
diff --git a/backends/cadence/runtime/TARGETS b/backends/cadence/runtime/TARGETS
@@ -21,6 +21,7 @@ runtime.python_library(
         "//executorch/devtools/bundled_program/serialize:lib",
         "//executorch/devtools:lib",
         "//executorch/exir:lib",
+        ":etdump",
     ],
 )
 
diff --git a/backends/cadence/runtime/etdump.py b/backends/cadence/runtime/etdump.py
@@ -0,0 +1,173 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+import logging
+import os
+from typing import cast, Optional, Tuple
+
+import torch
+from executorch.devtools import Inspector
+from executorch.devtools.inspector import Event, EventBlock, PerfData
+from executorch.devtools.inspector._inspector_utils import TimeScale
+from tabulate import tabulate
+
+
+class CadenceETDump:
+    def __init__(self, output_dir: str) -> None:
+        self.tensor_dump_dir: str = os.path.join(output_dir, "tensors")
+        self.etdump_path: str = os.path.join(output_dir, "etdump.etdp")
+        self.etrecord_path: Optional[str] = os.path.join(output_dir, "etrecord.bin")
+        self.debug_buffer_path: Optional[str] = os.path.join(
+            output_dir, "debug_output.bin"
+        )
+
+        if not os.path.exists(self.etdump_path):
+            raise RuntimeError(f"{self.etdump_path} does not exist")
+        # pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
+        if not os.path.exists(self.etrecord_path):
+            logging.warning(
+                "ETRecord not found, intermediate tensors will not be dumped"
+            )
+            self.etrecord_path = None
+        # pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
+        if not os.path.exists(self.debug_buffer_path):
+            logging.warning(
+                "Debug buffer not found, intermediate tensors will not be dumped"
+            )
+            self.debug_buffer_path = None
+
+        self.et_inspector: Inspector = Inspector(
+            etdump_path=self.etdump_path,
+            debug_buffer_path=self.debug_buffer_path,
+            etrecord=self.etrecord_path,
+            source_time_scale=TimeScale.CYCLES,
+            target_time_scale=TimeScale.CYCLES,
+        )
+
+    def get_outputs(self, log_to_stdout: bool = False) -> Tuple[torch.Tensor]:
+        output = [
+            event_block.run_output
+            for event_block in self.et_inspector.event_blocks
+            if event_block.name == "Execute"
+        ]
+        logging.debug(f"[CadenceETDump] output: {output}")
+        return output[0]
+
+    def get_execute_event_block(self) -> EventBlock:
+        exec_blocks = [
+            eb for eb in self.et_inspector.event_blocks if eb.name == "Execute"
+        ]
+        return exec_blocks[0]
+
+    def should_include_event(self, event: Event) -> bool:
+        # exclude duplicate events
+        if event.name in ("OPERATOR_CALL", "Method::execute"):
+            return False
+
+        # exclude custom multi-zion events
+        if event.name.startswith("DELEGATE_ZION"):
+            return False
+
+        return True
+
+    def print_summary(
+        self,
+        bundled_prog_size: Optional[int] = None,
+        external_link: Optional[str] = None,
+    ) -> None:
+        """
+        Print performance summary with optional program size and external link.
+
+        Args:
+            bundled_prog_size: Size of the bundled program in bytes (optional)
+            external_link: External analytics/monitoring link (optional, e.g., Scuba link for Meta internal use)
+        """
+        block = self.get_execute_event_block()
+        op_events = [e for e in block.events if self.should_include_event(e)]
+        op_time_sum = sum([cast(PerfData, e.perf_data).avg for e in op_events])
+
+        overall_event = [ev for ev in block.events if ev.name == "Method::execute"]
+        if not len(overall_event) == 1:
+            logging.warning(
+                f"Expected one 'Method::execute' event, found {len(overall_event)}"
+            )
+
+        total_cycles = cast(PerfData, overall_event[0].perf_data).avg
+        op_cycles = op_time_sum
+
+        # Build table data and headers dynamically based on what's provided
+        table_data = [
+            "{:,.0f}".format(total_cycles),
+            "{:,.0f}".format(op_cycles),
+            "{:,.0f}".format(total_cycles - op_cycles),
+            "{:.2%}".format((total_cycles - op_cycles) / total_cycles),
+        ]
+        headers = [
+            "Total Cycles",
+            "Cycles in Ops",
+            "Other Cycles",
+            "Framework Tax (%)",
+        ]
+
+        # Add optional fields if provided
+        if bundled_prog_size is not None:
+            table_data.append("{:,.0f}".format(bundled_prog_size))
+            headers.append("Bundled Program Size (bytes)")
+
+        if external_link is not None:
+            table_data.append(external_link)
+            headers.append("External Link")
+
+        logging.info(
+            "Performance Summary:\n%s",
+            tabulate(
+                [table_data],
+                headers=headers,
+                tablefmt="outline",
+            ),
+        )
+
+    def print_event_block(self) -> None:
+        logging.info("Profiled events:")
+        if logging.getLogger().level <= logging.INFO:
+            self.et_inspector.print_data_tabular()
+
+    def dump_intermediate_tensors(self) -> None:
+        if self.etrecord_path is None:
+            logging.info("[CadenceETDump] Intermediate tensors not available")
+            return
+
+        logging.info(
+            f"[CadenceETDump] Dumping intermediate tensors to {self.tensor_dump_dir}"
+        )
+        os.makedirs(self.tensor_dump_dir, exist_ok=True)
+        exec_blocks = [
+            eb for eb in self.et_inspector.event_blocks if eb.name == "Execute"
+        ]
+        if len(exec_blocks) > 1:
+            logging.warning(
+                f'Found {len(exec_blocks)} "Execute" blocks, using the first one and ignoring the rest.'
+            )
+        block = exec_blocks[0]
+
+        # OPERATOR_CALL events are duplicates that contain framework tax data. We don't need them
+        op_events = [e for e in block.events if e.name != "OPERATOR_CALL"]
+        torch.set_printoptions(profile="full")
+
+        for event in op_events:
+            instr_id = event._instruction_id
+            if not event.debug_data:
+                logging.debug(
+                    f"Missing intermediate tensor data for {event.name} ({instr_id=})"
+                )
+                continue
+
+            with open(f"{self.tensor_dump_dir}/{instr_id}.txt", "w") as f:
+                for dd in event.debug_data:
+                    f.write(f"{str(dd)}\n\n")
+        torch.set_printoptions(profile="default")
diff --git a/backends/cadence/runtime/runtime.py b/backends/cadence/runtime/runtime.py
@@ -9,18 +9,17 @@
 
 import logging
 import numbers
-import os
 import tempfile
-from typing import Any, Optional, Sequence, Tuple, Union
+from typing import Any, Optional, Sequence, Union
 
 import executorch.exir.schema as et_schema
 
 import numpy as np
 import torch
 
 from executorch.backends.cadence.runtime import utils
+from executorch.backends.cadence.runtime.etdump import CadenceETDump
 from executorch.backends.cadence.runtime.executor import Executor
-from executorch.devtools import Inspector
 from executorch.exir import ExecutorchProgramManager
 from executorch.exir._serialize._program import deserialize_pte_binary
 from executorch.exir.schema import DataLocation
@@ -30,90 +29,6 @@
 from torch.utils._pytree import TreeSpec
 
 
-class CadenceETDump:
-    def __init__(self, output_dir: str) -> None:
-        self.tensor_dump_dir: str = os.path.join(output_dir, "tensors")
-        self.etdump_path: str = os.path.join(output_dir, "etdump.etdp")
-        self.etrecord_path: Optional[str] = os.path.join(output_dir, "etrecord.bin")
-        self.debug_buffer_path: Optional[str] = os.path.join(
-            output_dir, "debug_output.bin"
-        )
-
-        if not os.path.exists(self.etdump_path):
-            raise RuntimeError(f"{self.etdump_path} does not exist")
-        # pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
-        if not os.path.exists(self.etrecord_path):
-            logging.warning(
-                "ETRecord not found, intermediate tensors will not be dumped"
-            )
-            self.etrecord_path = None
-        # pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
-        if not os.path.exists(self.debug_buffer_path):
-            logging.warning(
-                "Debug buffer not found, intermediate tensors will not be dumped"
-            )
-            self.debug_buffer_path = None
-
-        self.et_inspector: Inspector = Inspector(
-            etdump_path=self.etdump_path,
-            debug_buffer_path=self.debug_buffer_path,
-            etrecord=self.etrecord_path,
-        )
-
-    def get_outputs(self, log_to_stdout: bool = False) -> Tuple[torch.Tensor]:
-        output = [
-            event_block.run_output
-            for event_block in self.et_inspector.event_blocks
-            if event_block.name == "Execute"
-        ]
-        logging.debug(f"[ETdump] output: {output}")
-        return output[0]
-
-    def print_event_block(self) -> None:
-        logging.debug("[ETdump] data tabular:")
-        if logging.getLogger().level <= logging.DEBUG:
-            self.et_inspector.print_data_tabular()
-
-    def print_event_data(self) -> None:
-        logging.debug("[ETdump] event data ")
-        for event_block in self.et_inspector.event_blocks:
-            for event in event_block.events:
-                logging.debug(event)
-
-    def dump_intermediate_tensors(self) -> None:
-        if self.etrecord_path is None:
-            logging.info("[ETdump] Intermediate tensors not available")
-            return
-
-        logging.info(f"[ETdump] Dumping intermediate tensors to {self.tensor_dump_dir}")
-        os.makedirs(self.tensor_dump_dir, exist_ok=True)
-        exec_blocks = [
-            eb for eb in self.et_inspector.event_blocks if eb.name == "Execute"
-        ]
-        if len(exec_blocks) > 1:
-            logging.warning(
-                f'Found {len(exec_blocks)} "Execute" blocks, using the first one and ignoring the rest.'
-            )
-        block = exec_blocks[0]
-
-        # OPERATOR_CALL events are duplicates that contain framework tax data. We don't need them
-        op_events = [e for e in block.events if e.name != "OPERATOR_CALL"]
-        torch.set_printoptions(profile="full")
-
-        for event in op_events:
-            instr_id = event._instruction_id
-            if not event.debug_data:
-                logging.debug(
-                    f"Missing intermediate tensor data for {event.name} ({instr_id=})"
-                )
-                continue
-
-            with open(f"{self.tensor_dump_dir}/{instr_id}.txt", "w") as f:
-                for dd in event.debug_data:
-                    f.write(f"{str(dd)}\n\n")
-        torch.set_printoptions(profile="default")
-
-
 def get_op_names(program: et_schema.Program, execution_plan_id: int = 0) -> set[str]:
     """
     Get the list of operators from a Program
@@ -162,6 +77,9 @@ def run(
     etdump = CadenceETDump(output_dir=working_dir)
     outputs = etdump.get_outputs()
 
+    # Print performance summary
+    etdump.print_summary()
+
     assert isinstance(out_spec, TreeSpec)
     outputs = torch.utils._pytree.tree_unflatten(outputs, out_spec)
 
diff --git a/backends/cadence/runtime/targets.bzl b/backends/cadence/runtime/targets.bzl
@@ -13,3 +13,17 @@ def define_common_targets():
             "//executorch/runtime/platform:platform",
         ],
     )
+
+    runtime.python_library(
+        name = "etdump",
+        srcs = ["etdump.py"],
+        visibility = [
+            "//executorch/backends/cadence/...",
+            "@EXECUTORCH_CLIENTS"
+        ],
+        deps = [
+            "fbcode//executorch/devtools:lib",
+            "fbcode//executorch/devtools/inspector:inspector_utils",
+            "fbsource//third-party/pypi/tabulate:tabulate",
+        ],
+    )

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@ runtime.python_library(`
`21`	`21`	`"//executorch/devtools/bundled_program/serialize:lib",`
`22`	`22`	`"//executorch/devtools:lib",`
`23`	`23`	`"//executorch/exir:lib",`
	`24`	`+ ":etdump",`
`24`	`25`	`],`
`25`	`26`	`)`
`26`	`27`