Skip to content

Commit 5f2e827

Browse files
authored
Arm Backend: Overlay FVP trace data in Model Explorer (#15411)
Adds a script to enable the visualisation of Ethos U PMU trace data. The script retrieves the FVP trace output, vela's debug database and then uses this to create a pandas dataframe which of the run's duration cycles. This data is then overlayed in model explorer.
1 parent 5d268c7 commit 5f2e827

File tree

2 files changed

+290
-11
lines changed

2 files changed

+290
-11
lines changed

examples/arm/run.sh

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ toolchain=arm-none-eabi-gcc
4242
select_ops_list="aten::_softmax.out"
4343
qdq_fusion_op=false
4444
model_explorer=false
45+
perf_overlay=false
4546

4647
function help() {
4748
echo "Usage: $(basename $0) [options]"
@@ -72,7 +73,8 @@ function help() {
7273
echo " --et_build_root=<FOLDER> Executorch build output root folder to use, defaults to ${et_build_root}"
7374
echo " --scratch-dir=<FOLDER> Path to your Ethos-U scrach dir if you not using default ${ethos_u_scratch_dir}"
7475
echo " --qdq_fusion_op Enable QDQ fusion op"
75-
echo " --model_explorer Generate and open a visual graph of the compiled model."
76+
echo " --model_explorer Enable model explorer to visualize TOSA graph."
77+
echo " --perf_overlay With --model_explorer, include performance data from FVP PMU trace."
7678
exit 0
7779
}
7880

@@ -102,11 +104,17 @@ for arg in "$@"; do
102104
--scratch-dir=*) ethos_u_scratch_dir="${arg#*=}" ; scratch_dir_set=true ;;
103105
--qdq_fusion_op) qdq_fusion_op=true;;
104106
--model_explorer) model_explorer=true ;;
107+
--perf_overlay) perf_overlay=true ;;
105108
*)
106109
;;
107110
esac
108111
done
109112

113+
if [ "$perf_overlay" = true ] && [ "$model_explorer" != true ]; then
114+
echo "Error: --perf_overlay requires --model_explorer" >&2
115+
exit 1
116+
fi
117+
110118
if ! [[ ${pte_placement} == "elf" ]]; then
111119
if ! [[ "$pte_placement" =~ ^0x[0-9a-fA-F]{1,16}$ ]]; then
112120
echo "ERROR: Placing the PTE in memory failed, address is larger then 64bit $pte_placement"
@@ -204,6 +212,7 @@ bundleio_flag=""
204212
etrecord_flag=""
205213
et_dump_flag=""
206214
qdq_fusion_op_flag=""
215+
fvp_pmu_flag=""
207216
if [ "$build_with_etdump" = true ] ; then
208217
et_dump_flag="--etdump"
209218
etrecord_flag="--etrecord"
@@ -273,6 +282,11 @@ for i in "${!test_model[@]}"; do
273282
output_folder=${et_build_root}/${model_short_name}
274283
fi
275284

285+
if [ "$perf_overlay" = true ] ; then
286+
model_compiler_flags+="--enable_debug_mode tosa"
287+
fvp_pmu_flag="--trace_file=${output_folder}/pmu_trace.gz"
288+
fi
289+
276290
mkdir -p ${output_folder}
277291
output_folder=$(realpath ${output_folder})
278292
pte_file="${output_folder}/${model_filename_ext}"
@@ -330,14 +344,18 @@ for i in "${!test_model[@]}"; do
330344
if [ "$build_only" = false ] ; then
331345
# Execute the executor_runner on FVP Simulator
332346

333-
backends/arm/scripts/run_fvp.sh --elf=${elf_file} ${model_data} --target=$target ${etrecord_flag}
347+
backends/arm/scripts/run_fvp.sh --elf=${elf_file} ${model_data} --target=$target ${etrecord_flag} ${fvp_pmu_flag}
334348
fi
335349
set +x
336350
fi
337351

338352
if [ "$model_explorer" = true ]; then
339353
tosa_flatbuffer_path=$(find ${output_folder} -name "*TOSA*.tosa" | head -n 1)
340-
python3 ${script_dir}/visualize.py ${tosa_flatbuffer_path}
354+
perf_flags=""
355+
if [ "$perf_overlay" = true ]; then
356+
perf_flags+="--trace ${output_folder}/pmu_trace.gz --tables ${output_folder}/output/out_debug.xml"
357+
fi
358+
python3 ${script_dir}/visualize.py --model_path ${tosa_flatbuffer_path} ${perf_flags}
341359
fi
342360
done
343361

examples/arm/visualize.py

Lines changed: 269 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,289 @@
44
# LICENSE file in the root directory of this source tree.
55

66
import argparse
7+
import gzip
8+
import io
9+
import json
10+
import xml.etree.ElementTree as ET
11+
from pathlib import Path
712

8-
import model_explorer
13+
from typing import Any, Callable, Dict, Iterable, NamedTuple, Union
14+
15+
import pandas as pd
916

1017
from executorch.devtools.visualization.visualization_utils import (
1118
visualize_model_explorer,
1219
)
20+
from model_explorer import config as model_explorer_config, node_data_builder as ndb
21+
22+
COMPILER_OP_ID = "scheduled_id"
23+
24+
25+
class Tables(NamedTuple):
26+
queue: pd.DataFrame
27+
group: pd.DataFrame
28+
perf: pd.DataFrame
29+
source: pd.DataFrame
30+
31+
32+
def parse_tables(tables_path: Path) -> Tables:
33+
"""
34+
Parse the XML debug tables file and extract required tables as pandas DataFrames.
35+
"""
36+
required_tables = {"queue", "group", "perf", "source"}
37+
try:
38+
tree = ET.parse(tables_path)
39+
except ET.ParseError as e:
40+
raise ValueError(f"Failed to parse XML tables file {tables_path}: {e}")
41+
42+
tables: Dict[str, pd.DataFrame] = {}
43+
for table in tree.getroot().findall("table"):
44+
name = table.attrib.get("name")
45+
if name in required_tables:
46+
text = table.text or ""
47+
tables[name] = pd.read_csv(io.StringIO(text))
48+
49+
missing = required_tables - tables.keys()
50+
if missing:
51+
raise ValueError(f"Missing required tables in XML: {missing}")
52+
53+
return Tables(**tables)
54+
55+
56+
def get_trace_file_objects(trace_file_path: Path) -> list[Dict[str, Any]]:
57+
"""
58+
Load and return the 'traceEvents' list from a gzip-compressed JSON trace file.
59+
"""
60+
try:
61+
with gzip.open(trace_file_path, "rt", encoding="utf-8") as file:
62+
data = json.load(file)
63+
except (OSError, json.JSONDecodeError) as e:
64+
raise ValueError(f"Failed to read or parse trace file {trace_file_path}: {e}")
65+
66+
if "traceEvents" not in data:
67+
raise KeyError(f"'traceEvents' key not found in {trace_file_path}")
68+
69+
return data["traceEvents"]
70+
71+
72+
def get_subops(df_group: pd.DataFrame) -> set:
73+
return set(df_group[df_group["id"] != df_group["group_id"]]["id"])
74+
75+
76+
def transform_events(
77+
objects: Iterable[Dict[str, Any]], queue_df: pd.DataFrame, sub_ops: set
78+
) -> None:
79+
"""
80+
Annotate the 'queue' table in-place with duration based on trace events.
81+
"""
82+
queue_df_len = len(queue_df)
83+
offsets = queue_df["offset"].astype(int)
84+
85+
start_ts, cmd_index, chain_len = 0, 0, 1
86+
87+
def is_end_of_command(qread_offset: int, end_idx: int) -> bool:
88+
if end_idx >= queue_df_len:
89+
return qread_offset > offsets[cmd_index]
90+
return qread_offset == offsets[end_idx]
91+
92+
for event in (e for e in objects if e.get("tid") == "qread"):
93+
if cmd_index >= queue_df_len:
94+
break
95+
96+
qread_offset = 4 * int(event["args"]["qread"])
97+
98+
end_idx = cmd_index + chain_len
99+
if is_end_of_command(qread_offset, end_idx):
100+
end_ts = int(event["ts"]) - 1
101+
queue_df.loc[cmd_index, ["duration"]] = [
102+
end_ts - start_ts,
103+
]
104+
start_ts = end_ts
105+
cmd_index += chain_len
106+
chain_len = 1
107+
while (cmd_index + chain_len <= queue_df_len - 1) and queue_df.iloc[
108+
cmd_index + chain_len
109+
]["scheduled_id"] in sub_ops:
110+
chain_len += 1
111+
112+
113+
Agg = Union[str, Callable[[pd.Series], Any]]
114+
115+
116+
def list_unique(s: pd.Series) -> list[Any]:
117+
return sorted(set(s.dropna()))
118+
119+
120+
def build_perf_df(tables: Tables) -> tuple[pd.DataFrame, pd.DataFrame]:
121+
"""
122+
Build a performance DataFrame summarizing queue metrics grouped by source_id.
123+
Returns a tuple of (perf_df, cmd_to_op_df) where cmd_to_op_df is needed for unmapped op tracking.
124+
"""
125+
tables.queue["cmd_id"] = tables.queue.index
126+
127+
excluded = {"optimised_id", "scheduled_id", "offset"}
128+
col_funcs: Dict[str, Agg] = {
129+
c: "sum" for c in tables.queue.columns if c not in excluded
130+
}
131+
132+
col_funcs.update({"cmdstream_id": list_unique, "cmd_id": list_unique})
133+
134+
cmd_to_op_df = tables.queue.groupby(COMPILER_OP_ID).agg(col_funcs).reset_index()
135+
136+
opt_df = (
137+
pd.merge(tables.perf[["id", "source_id"]], tables.group, on="id", how="left")
138+
.rename(columns={"id": COMPILER_OP_ID})
139+
.merge(cmd_to_op_df, on=COMPILER_OP_ID, how="inner")
140+
)
141+
142+
exclude_columns = ["source_id"]
143+
src_col_funcs: Dict[str, Agg] = {
144+
col: "sum" for col in opt_df.columns if col not in exclude_columns
145+
}
146+
src_col_funcs[COMPILER_OP_ID] = list_unique
147+
148+
perf_df = opt_df.groupby("source_id").agg(src_col_funcs).reset_index()
149+
150+
return perf_df, cmd_to_op_df
151+
152+
153+
def check_unmapped_ops(
154+
tables: Tables, src_df: pd.DataFrame, cmd_to_op_df: pd.DataFrame
155+
) -> None:
156+
"""
157+
Identify operators in the performance data that are not mapped to any source operation.
158+
"""
159+
opt_ids_in_src_table = set()
160+
for opt_ids in src_df[COMPILER_OP_ID].dropna():
161+
if type(opt_ids) is list:
162+
opt_ids_in_src_table.update(opt_ids)
163+
164+
opt_df = pd.merge(
165+
tables.perf[["id", "source_id"]], tables.group, on="id", how="left"
166+
)
167+
opt_df = opt_df.rename(columns={"id": COMPILER_OP_ID})
168+
opt_df = pd.merge(opt_df, cmd_to_op_df, on=COMPILER_OP_ID, how="inner")
169+
170+
unmapped_operators = opt_df[
171+
~opt_df[COMPILER_OP_ID].isin(list(opt_ids_in_src_table))
172+
]
173+
174+
if not unmapped_operators.empty:
175+
print("Warning: There are unmapped operators in the performance data.")
176+
print(unmapped_operators)
177+
return None
178+
179+
180+
def build_src_df(tables: Tables, perf_df: pd.DataFrame) -> pd.DataFrame:
181+
"""
182+
Merge source table with performance metrics and total NPU cycles.
183+
Returns a tuple of (src_df, cmd_to_op_df) where df_cmd_to_op is needed for unmapped op tracking.
184+
"""
185+
return pd.merge(
186+
tables.source.rename(columns={"id": "source_id"})[["ext_key", "source_id"]],
187+
perf_df,
188+
on="source_id",
189+
how="left",
190+
).merge(
191+
tables.perf[["source_id", "npu_cycles"]]
192+
.groupby("source_id")
193+
.sum(numeric_only=True)
194+
.reset_index(),
195+
on="source_id",
196+
how="left",
197+
)
198+
199+
200+
def get_model_node_data(df: pd.DataFrame) -> ndb.ModelNodeData:
201+
"""
202+
Convert source-level metrics into ModelExplorer node data for duration.
203+
"""
204+
durations = df["duration"].fillna(0).astype(int)
205+
206+
duration_results: Dict[str, ndb.NodeDataResult] = {}
207+
208+
for src, dur in zip(df["ext_key"], durations):
209+
node_id = f"main/op{int(src)}"
210+
duration_results[node_id] = ndb.NodeDataResult(value=int(dur))
211+
212+
gradient = [
213+
ndb.GradientItem(stop=0.0, bgColor="#ffffff"),
214+
ndb.GradientItem(stop=0.1, bgColor="#33FF00"),
215+
ndb.GradientItem(stop=0.2, bgColor="#66FF00"),
216+
ndb.GradientItem(stop=0.5, bgColor="#FFFF00"),
217+
ndb.GradientItem(stop=0.7, bgColor="#FF6600"),
218+
ndb.GradientItem(stop=1.0, bgColor="#FF0000"),
219+
]
220+
221+
return ndb.ModelNodeData(
222+
graphsData={
223+
"main": ndb.GraphNodeData(results=duration_results, gradient=gradient)
224+
}
225+
)
226+
227+
228+
def build_overlay_data(trace_path: Path, tables_path: Path) -> ndb.ModelNodeData:
229+
"""
230+
Build ModelExplorer node data from trace and tables files.
231+
"""
232+
tables = parse_tables(tables_path)
233+
events = get_trace_file_objects(trace_path)
234+
transform_events(events, tables.queue, get_subops(tables.group))
235+
perf_df, cmd_to_op_df = build_perf_df(tables)
236+
src_df = build_src_df(tables, perf_df)
237+
check_unmapped_ops(tables, src_df, cmd_to_op_df)
238+
239+
return get_model_node_data(src_df)
240+
241+
242+
def validate_file_exists(file_path: Path) -> None:
243+
if not file_path.exists():
244+
raise FileNotFoundError(f"{file_path} not found")
245+
246+
247+
def validate_perf_mode_args(trace: str, tables: str) -> None:
248+
if not (trace and tables):
249+
raise ValueError(
250+
"Both --trace and --tables must be provided for perf mode, or neither for default mode"
251+
)
13252

14253

15254
def main() -> None:
16255
parser = argparse.ArgumentParser(
17256
description="Visualize a model using model explorer."
18257
)
19-
parser.add_argument("model_path", type=str, help="Path to the model file.")
258+
parser.add_argument(
259+
"--model_path", required=True, type=str, help="Path to the model file"
260+
)
261+
parser.add_argument(
262+
"--trace",
263+
required=False,
264+
help="(perf mode) PMU trace JSON.gz file with performance data",
265+
)
266+
parser.add_argument(
267+
"--tables",
268+
required=False,
269+
help="(perf mode) Vela debug database tables XML file",
270+
)
271+
20272
args = parser.parse_args()
273+
model_file = Path(args.model_path).resolve()
274+
validate_file_exists(model_file)
21275

22-
config = model_explorer.config()
23-
(config.add_model_from_path(args.model_path))
276+
config = model_explorer_config().add_model_from_path(str(model_file))
24277

25-
visualize_model_explorer(
26-
config=config,
27-
extensions=["tosa_adapter_model_explorer"],
28-
)
278+
if args.trace or args.tables:
279+
validate_perf_mode_args(args.trace, args.tables)
280+
trace_file = Path(args.trace).resolve()
281+
tables_file = Path(args.tables).resolve()
282+
validate_file_exists(trace_file)
283+
validate_file_exists(tables_file)
284+
285+
config.add_node_data(
286+
"Duration (Cycles)", build_overlay_data(trace_file, tables_file)
287+
)
288+
289+
visualize_model_explorer(config=config, extensions=["tosa_adapter_model_explorer"])
29290

30291

31292
if __name__ == "__main__":

0 commit comments

Comments
 (0)