Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backends/cadence/runtime/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ runtime.python_library(
"//executorch/devtools/bundled_program/serialize:lib",
"//executorch/devtools:lib",
"//executorch/exir:lib",
":etdump",
],
)

Expand Down
173 changes: 173 additions & 0 deletions backends/cadence/runtime/etdump.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

import logging
import os
from typing import cast, Optional, Tuple

import torch
from executorch.devtools import Inspector
from executorch.devtools.inspector import Event, EventBlock, PerfData
from executorch.devtools.inspector._inspector_utils import TimeScale
from tabulate import tabulate


class CadenceETDump:
def __init__(self, output_dir: str) -> None:
self.tensor_dump_dir: str = os.path.join(output_dir, "tensors")
self.etdump_path: str = os.path.join(output_dir, "etdump.etdp")
self.etrecord_path: Optional[str] = os.path.join(output_dir, "etrecord.bin")
self.debug_buffer_path: Optional[str] = os.path.join(
output_dir, "debug_output.bin"
)

if not os.path.exists(self.etdump_path):
raise RuntimeError(f"{self.etdump_path} does not exist")
# pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
if not os.path.exists(self.etrecord_path):
logging.warning(
"ETRecord not found, intermediate tensors will not be dumped"
)
self.etrecord_path = None
# pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
if not os.path.exists(self.debug_buffer_path):
logging.warning(
"Debug buffer not found, intermediate tensors will not be dumped"
)
self.debug_buffer_path = None

self.et_inspector: Inspector = Inspector(
etdump_path=self.etdump_path,
debug_buffer_path=self.debug_buffer_path,
etrecord=self.etrecord_path,
source_time_scale=TimeScale.CYCLES,
target_time_scale=TimeScale.CYCLES,
)

def get_outputs(self, log_to_stdout: bool = False) -> Tuple[torch.Tensor]:
output = [
event_block.run_output
for event_block in self.et_inspector.event_blocks
if event_block.name == "Execute"
]
logging.debug(f"[CadenceETDump] output: {output}")
return output[0]

def get_execute_event_block(self) -> EventBlock:
exec_blocks = [
eb for eb in self.et_inspector.event_blocks if eb.name == "Execute"
]
return exec_blocks[0]

def should_include_event(self, event: Event) -> bool:
# exclude duplicate events
if event.name in ("OPERATOR_CALL", "Method::execute"):
return False

# exclude custom multi-zion events
if event.name.startswith("DELEGATE_ZION"):
return False

return True

def print_summary(
self,
bundled_prog_size: Optional[int] = None,
external_link: Optional[str] = None,
) -> None:
"""
Print performance summary with optional program size and external link.

Args:
bundled_prog_size: Size of the bundled program in bytes (optional)
external_link: External analytics/monitoring link (optional, e.g., Scuba link for Meta internal use)
"""
block = self.get_execute_event_block()
op_events = [e for e in block.events if self.should_include_event(e)]
op_time_sum = sum([cast(PerfData, e.perf_data).avg for e in op_events])

overall_event = [ev for ev in block.events if ev.name == "Method::execute"]
if not len(overall_event) == 1:
logging.warning(
f"Expected one 'Method::execute' event, found {len(overall_event)}"
)

total_cycles = cast(PerfData, overall_event[0].perf_data).avg
op_cycles = op_time_sum

# Build table data and headers dynamically based on what's provided
table_data = [
"{:,.0f}".format(total_cycles),
"{:,.0f}".format(op_cycles),
"{:,.0f}".format(total_cycles - op_cycles),
"{:.2%}".format((total_cycles - op_cycles) / total_cycles),
]
headers = [
"Total Cycles",
"Cycles in Ops",
"Other Cycles",
"Framework Tax (%)",
]

# Add optional fields if provided
if bundled_prog_size is not None:
table_data.append("{:,.0f}".format(bundled_prog_size))
headers.append("Bundled Program Size (bytes)")

if external_link is not None:
table_data.append(external_link)
headers.append("External Link")

logging.info(
"Performance Summary:\n%s",
tabulate(
[table_data],
headers=headers,
tablefmt="outline",
),
)

def print_event_block(self) -> None:
logging.info("Profiled events:")
if logging.getLogger().level <= logging.INFO:
self.et_inspector.print_data_tabular()

def dump_intermediate_tensors(self) -> None:
if self.etrecord_path is None:
logging.info("[CadenceETDump] Intermediate tensors not available")
return

logging.info(
f"[CadenceETDump] Dumping intermediate tensors to {self.tensor_dump_dir}"
)
os.makedirs(self.tensor_dump_dir, exist_ok=True)
exec_blocks = [
eb for eb in self.et_inspector.event_blocks if eb.name == "Execute"
]
if len(exec_blocks) > 1:
logging.warning(
f'Found {len(exec_blocks)} "Execute" blocks, using the first one and ignoring the rest.'
)
block = exec_blocks[0]

# OPERATOR_CALL events are duplicates that contain framework tax data. We don't need them
op_events = [e for e in block.events if e.name != "OPERATOR_CALL"]
torch.set_printoptions(profile="full")

for event in op_events:
instr_id = event._instruction_id
if not event.debug_data:
logging.debug(
f"Missing intermediate tensor data for {event.name} ({instr_id=})"
)
continue

with open(f"{self.tensor_dump_dir}/{instr_id}.txt", "w") as f:
for dd in event.debug_data:
f.write(f"{str(dd)}\n\n")
torch.set_printoptions(profile="default")
92 changes: 5 additions & 87 deletions backends/cadence/runtime/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,17 @@

import logging
import numbers
import os
import tempfile
from typing import Any, Optional, Sequence, Tuple, Union
from typing import Any, Optional, Sequence, Union

import executorch.exir.schema as et_schema

import numpy as np
import torch

from executorch.backends.cadence.runtime import utils
from executorch.backends.cadence.runtime.etdump import CadenceETDump
from executorch.backends.cadence.runtime.executor import Executor
from executorch.devtools import Inspector
from executorch.exir import ExecutorchProgramManager
from executorch.exir._serialize._program import deserialize_pte_binary
from executorch.exir.schema import DataLocation
Expand All @@ -30,90 +29,6 @@
from torch.utils._pytree import TreeSpec


class CadenceETDump:
def __init__(self, output_dir: str) -> None:
self.tensor_dump_dir: str = os.path.join(output_dir, "tensors")
self.etdump_path: str = os.path.join(output_dir, "etdump.etdp")
self.etrecord_path: Optional[str] = os.path.join(output_dir, "etrecord.bin")
self.debug_buffer_path: Optional[str] = os.path.join(
output_dir, "debug_output.bin"
)

if not os.path.exists(self.etdump_path):
raise RuntimeError(f"{self.etdump_path} does not exist")
# pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
if not os.path.exists(self.etrecord_path):
logging.warning(
"ETRecord not found, intermediate tensors will not be dumped"
)
self.etrecord_path = None
# pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
if not os.path.exists(self.debug_buffer_path):
logging.warning(
"Debug buffer not found, intermediate tensors will not be dumped"
)
self.debug_buffer_path = None

self.et_inspector: Inspector = Inspector(
etdump_path=self.etdump_path,
debug_buffer_path=self.debug_buffer_path,
etrecord=self.etrecord_path,
)

def get_outputs(self, log_to_stdout: bool = False) -> Tuple[torch.Tensor]:
output = [
event_block.run_output
for event_block in self.et_inspector.event_blocks
if event_block.name == "Execute"
]
logging.debug(f"[ETdump] output: {output}")
return output[0]

def print_event_block(self) -> None:
logging.debug("[ETdump] data tabular:")
if logging.getLogger().level <= logging.DEBUG:
self.et_inspector.print_data_tabular()

def print_event_data(self) -> None:
logging.debug("[ETdump] event data ")
for event_block in self.et_inspector.event_blocks:
for event in event_block.events:
logging.debug(event)

def dump_intermediate_tensors(self) -> None:
if self.etrecord_path is None:
logging.info("[ETdump] Intermediate tensors not available")
return

logging.info(f"[ETdump] Dumping intermediate tensors to {self.tensor_dump_dir}")
os.makedirs(self.tensor_dump_dir, exist_ok=True)
exec_blocks = [
eb for eb in self.et_inspector.event_blocks if eb.name == "Execute"
]
if len(exec_blocks) > 1:
logging.warning(
f'Found {len(exec_blocks)} "Execute" blocks, using the first one and ignoring the rest.'
)
block = exec_blocks[0]

# OPERATOR_CALL events are duplicates that contain framework tax data. We don't need them
op_events = [e for e in block.events if e.name != "OPERATOR_CALL"]
torch.set_printoptions(profile="full")

for event in op_events:
instr_id = event._instruction_id
if not event.debug_data:
logging.debug(
f"Missing intermediate tensor data for {event.name} ({instr_id=})"
)
continue

with open(f"{self.tensor_dump_dir}/{instr_id}.txt", "w") as f:
for dd in event.debug_data:
f.write(f"{str(dd)}\n\n")
torch.set_printoptions(profile="default")


def get_op_names(program: et_schema.Program, execution_plan_id: int = 0) -> set[str]:
"""
Get the list of operators from a Program
Expand Down Expand Up @@ -162,6 +77,9 @@ def run(
etdump = CadenceETDump(output_dir=working_dir)
outputs = etdump.get_outputs()

# Print performance summary
etdump.print_summary()

assert isinstance(out_spec, TreeSpec)
outputs = torch.utils._pytree.tree_unflatten(outputs, out_spec)

Expand Down
14 changes: 14 additions & 0 deletions backends/cadence/runtime/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,17 @@ def define_common_targets():
"//executorch/runtime/platform:platform",
],
)

runtime.python_library(
name = "etdump",
srcs = ["etdump.py"],
visibility = [
"//executorch/backends/cadence/...",
"@EXECUTORCH_CLIENTS"
],
deps = [
"fbcode//executorch/devtools:lib",
"fbcode//executorch/devtools/inspector:inspector_utils",
"fbsource//third-party/pypi/tabulate:tabulate",
],
)
Loading