Skip to content

Commit c96beeb

Browse files
author
Zonglin Peng
committed
[Cadence] move ETDump to OSS
This diff moves the ETDump functionality to OSS by creating a new Python library, `etdump`, and exposing it to the Cadence backend. The new library is built in both `fbcode` and `xplat` and is included in the Cadence runtime. **Changes** * **fbcode/executorch/backends/cadence/runtime(targets.bzl)**: Addition of a new Python library, `etdump`, which includes the `etdump.py` file and specifies the visibility and dependencies. * **fbcode/executorch/backends/cadence/runtime(etdump.py)**: A new file that contains the `CadenceETDump` class, which is responsible for handling ETDump functionality. * **fbcode/executorch/backends/cadence/runtime(runtime.py)** and **xplat/executorch/backends/cadence/runtime(runtime.py)**: Modifications to the runtime to include the `CadenceETDump` class and remove unnecessary imports. * **xplat/executorch/backends/cadence/runtime(targets.bzl)**: Addition of a new Python library, `etdump`, which includes the `etdump.py` file and specifies the visibility and dependencies. Differential Revision: [D83188440](https://our.internmc.facebook.com/intern/diff/D83188440/) [ghstack-poisoned]
1 parent 685e795 commit c96beeb

File tree

4 files changed

+193
-87
lines changed

4 files changed

+193
-87
lines changed

backends/cadence/runtime/TARGETS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ runtime.python_library(
2121
"//executorch/devtools/bundled_program/serialize:lib",
2222
"//executorch/devtools:lib",
2323
"//executorch/exir:lib",
24+
":etdump",
2425
],
2526
)
2627

backends/cadence/runtime/etdump.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# pyre-strict
8+
9+
import logging
10+
import os
11+
from typing import cast, Optional, Tuple
12+
13+
import torch
14+
from executorch.devtools import Inspector
15+
from executorch.devtools.inspector import Event, EventBlock, PerfData
16+
from executorch.devtools.inspector._inspector_utils import TimeScale
17+
from tabulate import tabulate
18+
19+
20+
class CadenceETDump:
21+
def __init__(self, output_dir: str) -> None:
22+
self.tensor_dump_dir: str = os.path.join(output_dir, "tensors")
23+
self.etdump_path: str = os.path.join(output_dir, "etdump.etdp")
24+
self.etrecord_path: Optional[str] = os.path.join(output_dir, "etrecord.bin")
25+
self.debug_buffer_path: Optional[str] = os.path.join(
26+
output_dir, "debug_output.bin"
27+
)
28+
29+
if not os.path.exists(self.etdump_path):
30+
raise RuntimeError(f"{self.etdump_path} does not exist")
31+
# pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
32+
if not os.path.exists(self.etrecord_path):
33+
logging.warning(
34+
"ETRecord not found, intermediate tensors will not be dumped"
35+
)
36+
self.etrecord_path = None
37+
# pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
38+
if not os.path.exists(self.debug_buffer_path):
39+
logging.warning(
40+
"Debug buffer not found, intermediate tensors will not be dumped"
41+
)
42+
self.debug_buffer_path = None
43+
44+
self.et_inspector: Inspector = Inspector(
45+
etdump_path=self.etdump_path,
46+
debug_buffer_path=self.debug_buffer_path,
47+
etrecord=self.etrecord_path,
48+
source_time_scale=TimeScale.CYCLES,
49+
target_time_scale=TimeScale.CYCLES,
50+
)
51+
52+
def get_outputs(self, log_to_stdout: bool = False) -> Tuple[torch.Tensor]:
53+
output = [
54+
event_block.run_output
55+
for event_block in self.et_inspector.event_blocks
56+
if event_block.name == "Execute"
57+
]
58+
logging.debug(f"[CadenceETDump] output: {output}")
59+
return output[0]
60+
61+
def get_execute_event_block(self) -> EventBlock:
62+
exec_blocks = [
63+
eb for eb in self.et_inspector.event_blocks if eb.name == "Execute"
64+
]
65+
return exec_blocks[0]
66+
67+
def should_include_event(self, event: Event) -> bool:
68+
# exclude duplicate events
69+
if event.name in ("OPERATOR_CALL", "Method::execute"):
70+
return False
71+
72+
# exclude custom multi-zion events
73+
if event.name.startswith("DELEGATE_ZION"):
74+
return False
75+
76+
return True
77+
78+
def print_summary(
79+
self,
80+
bundled_prog_size: Optional[int] = None,
81+
external_link: Optional[str] = None,
82+
) -> None:
83+
"""
84+
Print performance summary with optional program size and external link.
85+
86+
Args:
87+
bundled_prog_size: Size of the bundled program in bytes (optional)
88+
external_link: External analytics/monitoring link (optional, e.g., Scuba link for Meta internal use)
89+
"""
90+
block = self.get_execute_event_block()
91+
op_events = [e for e in block.events if self.should_include_event(e)]
92+
op_time_sum = sum([cast(PerfData, e.perf_data).avg for e in op_events])
93+
94+
overall_event = [ev for ev in block.events if ev.name == "Method::execute"]
95+
if not len(overall_event) == 1:
96+
logging.warning(
97+
f"Expected one 'Method::execute' event, found {len(overall_event)}"
98+
)
99+
100+
total_cycles = cast(PerfData, overall_event[0].perf_data).avg
101+
op_cycles = op_time_sum
102+
103+
# Build table data and headers dynamically based on what's provided
104+
table_data = [
105+
"{:,.0f}".format(total_cycles),
106+
"{:,.0f}".format(op_cycles),
107+
"{:,.0f}".format(total_cycles - op_cycles),
108+
"{:.2%}".format((total_cycles - op_cycles) / total_cycles),
109+
]
110+
headers = [
111+
"Total Cycles",
112+
"Cycles in Ops",
113+
"Other Cycles",
114+
"Framework Tax (%)",
115+
]
116+
117+
# Add optional fields if provided
118+
if bundled_prog_size is not None:
119+
table_data.append("{:,.0f}".format(bundled_prog_size))
120+
headers.append("Bundled Program Size (bytes)")
121+
122+
if external_link is not None:
123+
table_data.append(external_link)
124+
headers.append("External Link")
125+
126+
logging.info(
127+
"Performance Summary:\n%s",
128+
tabulate(
129+
[table_data],
130+
headers=headers,
131+
tablefmt="outline",
132+
),
133+
)
134+
135+
def print_event_block(self) -> None:
136+
logging.info("Profiled events:")
137+
if logging.getLogger().level <= logging.INFO:
138+
self.et_inspector.print_data_tabular()
139+
140+
def dump_intermediate_tensors(self) -> None:
141+
if self.etrecord_path is None:
142+
logging.info("[CadenceETDump] Intermediate tensors not available")
143+
return
144+
145+
logging.info(
146+
f"[CadenceETDump] Dumping intermediate tensors to {self.tensor_dump_dir}"
147+
)
148+
os.makedirs(self.tensor_dump_dir, exist_ok=True)
149+
exec_blocks = [
150+
eb for eb in self.et_inspector.event_blocks if eb.name == "Execute"
151+
]
152+
if len(exec_blocks) > 1:
153+
logging.warning(
154+
f'Found {len(exec_blocks)} "Execute" blocks, using the first one and ignoring the rest.'
155+
)
156+
block = exec_blocks[0]
157+
158+
# OPERATOR_CALL events are duplicates that contain framework tax data. We don't need them
159+
op_events = [e for e in block.events if e.name != "OPERATOR_CALL"]
160+
torch.set_printoptions(profile="full")
161+
162+
for event in op_events:
163+
instr_id = event._instruction_id
164+
if not event.debug_data:
165+
logging.debug(
166+
f"Missing intermediate tensor data for {event.name} ({instr_id=})"
167+
)
168+
continue
169+
170+
with open(f"{self.tensor_dump_dir}/{instr_id}.txt", "w") as f:
171+
for dd in event.debug_data:
172+
f.write(f"{str(dd)}\n\n")
173+
torch.set_printoptions(profile="default")

backends/cadence/runtime/runtime.py

Lines changed: 5 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,17 @@
99

1010
import logging
1111
import numbers
12-
import os
1312
import tempfile
14-
from typing import Any, Optional, Sequence, Tuple, Union
13+
from typing import Any, Optional, Sequence, Union
1514

1615
import executorch.exir.schema as et_schema
1716

1817
import numpy as np
1918
import torch
2019

2120
from executorch.backends.cadence.runtime import utils
21+
from executorch.backends.cadence.runtime.etdump import CadenceETDump
2222
from executorch.backends.cadence.runtime.executor import Executor
23-
from executorch.devtools import Inspector
2423
from executorch.exir import ExecutorchProgramManager
2524
from executorch.exir._serialize._program import deserialize_pte_binary
2625
from executorch.exir.schema import DataLocation
@@ -30,90 +29,6 @@
3029
from torch.utils._pytree import TreeSpec
3130

3231

33-
class CadenceETDump:
34-
def __init__(self, output_dir: str) -> None:
35-
self.tensor_dump_dir: str = os.path.join(output_dir, "tensors")
36-
self.etdump_path: str = os.path.join(output_dir, "etdump.etdp")
37-
self.etrecord_path: Optional[str] = os.path.join(output_dir, "etrecord.bin")
38-
self.debug_buffer_path: Optional[str] = os.path.join(
39-
output_dir, "debug_output.bin"
40-
)
41-
42-
if not os.path.exists(self.etdump_path):
43-
raise RuntimeError(f"{self.etdump_path} does not exist")
44-
# pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
45-
if not os.path.exists(self.etrecord_path):
46-
logging.warning(
47-
"ETRecord not found, intermediate tensors will not be dumped"
48-
)
49-
self.etrecord_path = None
50-
# pyre-ignore[6]: os.path.exists expects str, but got Optional[str]
51-
if not os.path.exists(self.debug_buffer_path):
52-
logging.warning(
53-
"Debug buffer not found, intermediate tensors will not be dumped"
54-
)
55-
self.debug_buffer_path = None
56-
57-
self.et_inspector: Inspector = Inspector(
58-
etdump_path=self.etdump_path,
59-
debug_buffer_path=self.debug_buffer_path,
60-
etrecord=self.etrecord_path,
61-
)
62-
63-
def get_outputs(self, log_to_stdout: bool = False) -> Tuple[torch.Tensor]:
64-
output = [
65-
event_block.run_output
66-
for event_block in self.et_inspector.event_blocks
67-
if event_block.name == "Execute"
68-
]
69-
logging.debug(f"[ETdump] output: {output}")
70-
return output[0]
71-
72-
def print_event_block(self) -> None:
73-
logging.debug("[ETdump] data tabular:")
74-
if logging.getLogger().level <= logging.DEBUG:
75-
self.et_inspector.print_data_tabular()
76-
77-
def print_event_data(self) -> None:
78-
logging.debug("[ETdump] event data ")
79-
for event_block in self.et_inspector.event_blocks:
80-
for event in event_block.events:
81-
logging.debug(event)
82-
83-
def dump_intermediate_tensors(self) -> None:
84-
if self.etrecord_path is None:
85-
logging.info("[ETdump] Intermediate tensors not available")
86-
return
87-
88-
logging.info(f"[ETdump] Dumping intermediate tensors to {self.tensor_dump_dir}")
89-
os.makedirs(self.tensor_dump_dir, exist_ok=True)
90-
exec_blocks = [
91-
eb for eb in self.et_inspector.event_blocks if eb.name == "Execute"
92-
]
93-
if len(exec_blocks) > 1:
94-
logging.warning(
95-
f'Found {len(exec_blocks)} "Execute" blocks, using the first one and ignoring the rest.'
96-
)
97-
block = exec_blocks[0]
98-
99-
# OPERATOR_CALL events are duplicates that contain framework tax data. We don't need them
100-
op_events = [e for e in block.events if e.name != "OPERATOR_CALL"]
101-
torch.set_printoptions(profile="full")
102-
103-
for event in op_events:
104-
instr_id = event._instruction_id
105-
if not event.debug_data:
106-
logging.debug(
107-
f"Missing intermediate tensor data for {event.name} ({instr_id=})"
108-
)
109-
continue
110-
111-
with open(f"{self.tensor_dump_dir}/{instr_id}.txt", "w") as f:
112-
for dd in event.debug_data:
113-
f.write(f"{str(dd)}\n\n")
114-
torch.set_printoptions(profile="default")
115-
116-
11732
def get_op_names(program: et_schema.Program, execution_plan_id: int = 0) -> set[str]:
11833
"""
11934
Get the list of operators from a Program
@@ -162,6 +77,9 @@ def run(
16277
etdump = CadenceETDump(output_dir=working_dir)
16378
outputs = etdump.get_outputs()
16479

80+
# Print performance summary
81+
etdump.print_summary()
82+
16583
assert isinstance(out_spec, TreeSpec)
16684
outputs = torch.utils._pytree.tree_unflatten(outputs, out_spec)
16785

backends/cadence/runtime/targets.bzl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,17 @@ def define_common_targets():
1313
"//executorch/runtime/platform:platform",
1414
],
1515
)
16+
17+
runtime.python_library(
18+
name = "etdump",
19+
srcs = ["etdump.py"],
20+
visibility = [
21+
"//executorch/backends/cadence/...",
22+
"@EXECUTORCH_CLIENTS"
23+
],
24+
deps = [
25+
"fbcode//executorch/devtools:lib",
26+
"fbcode//executorch/devtools/inspector:inspector_utils",
27+
"fbsource//third-party/pypi/tabulate:tabulate",
28+
],
29+
)

0 commit comments

Comments
 (0)