Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions examples/arm/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ toolchain=arm-none-eabi-gcc
select_ops_list="aten::_softmax.out"
qdq_fusion_op=false
model_explorer=false
perf_overlay=false

function help() {
echo "Usage: $(basename $0) [options]"
Expand Down Expand Up @@ -72,7 +73,8 @@ function help() {
echo " --et_build_root=<FOLDER> Executorch build output root folder to use, defaults to ${et_build_root}"
echo " --scratch-dir=<FOLDER> Path to your Ethos-U scrach dir if you not using default ${ethos_u_scratch_dir}"
echo " --qdq_fusion_op Enable QDQ fusion op"
echo " --model_explorer Generate and open a visual graph of the compiled model."
echo " --model_explorer Enable model explorer to visualize TOSA graph."
echo " --perf_overlay With --model_explorer, include performance data from FVP PMU trace."
exit 0
}

Expand Down Expand Up @@ -102,11 +104,17 @@ for arg in "$@"; do
--scratch-dir=*) ethos_u_scratch_dir="${arg#*=}" ; scratch_dir_set=true ;;
--qdq_fusion_op) qdq_fusion_op=true;;
--model_explorer) model_explorer=true ;;
--perf_overlay) perf_overlay=true ;;
*)
;;
esac
done

if [ "$perf_overlay" = true ] && [ "$model_explorer" != true ]; then
echo "Error: --perf_overlay requires --model_explorer" >&2
exit 1
fi

if ! [[ ${pte_placement} == "elf" ]]; then
if ! [[ "$pte_placement" =~ ^0x[0-9a-fA-F]{1,16}$ ]]; then
echo "ERROR: Placing the PTE in memory failed, address is larger then 64bit $pte_placement"
Expand Down Expand Up @@ -204,6 +212,7 @@ bundleio_flag=""
etrecord_flag=""
et_dump_flag=""
qdq_fusion_op_flag=""
fvp_pmu_flag=""
if [ "$build_with_etdump" = true ] ; then
et_dump_flag="--etdump"
etrecord_flag="--etrecord"
Expand Down Expand Up @@ -273,6 +282,11 @@ for i in "${!test_model[@]}"; do
output_folder=${et_build_root}/${model_short_name}
fi

if [ "$perf_overlay" = true ] ; then
model_compiler_flags+="--enable_debug_mode tosa"
fvp_pmu_flag="--trace_file=${output_folder}/pmu_trace.gz"
fi

mkdir -p ${output_folder}
output_folder=$(realpath ${output_folder})
pte_file="${output_folder}/${model_filename_ext}"
Expand Down Expand Up @@ -330,14 +344,18 @@ for i in "${!test_model[@]}"; do
if [ "$build_only" = false ] ; then
# Execute the executor_runner on FVP Simulator

backends/arm/scripts/run_fvp.sh --elf=${elf_file} ${model_data} --target=$target ${etrecord_flag}
backends/arm/scripts/run_fvp.sh --elf=${elf_file} ${model_data} --target=$target ${etrecord_flag} ${fvp_pmu_flag}
fi
set +x
fi

if [ "$model_explorer" = true ]; then
tosa_flatbuffer_path=$(find ${output_folder} -name "*TOSA*.tosa" | head -n 1)
python3 ${script_dir}/visualize.py ${tosa_flatbuffer_path}
perf_flags=""
if [ "$perf_overlay" = true ]; then
perf_flags+="--trace ${output_folder}/pmu_trace.gz --tables ${output_folder}/output/out_debug.xml"
fi
python3 ${script_dir}/visualize.py --model_path ${tosa_flatbuffer_path} ${perf_flags}
fi
done

Expand Down
277 changes: 269 additions & 8 deletions examples/arm/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,289 @@
# LICENSE file in the root directory of this source tree.

import argparse
import gzip
import io
import json
import xml.etree.ElementTree as ET
from pathlib import Path

import model_explorer
from typing import Any, Callable, Dict, Iterable, NamedTuple, Union

import pandas as pd

from executorch.devtools.visualization.visualization_utils import (
visualize_model_explorer,
)
from model_explorer import config as model_explorer_config, node_data_builder as ndb

COMPILER_OP_ID = "scheduled_id"


class Tables(NamedTuple):
queue: pd.DataFrame
group: pd.DataFrame
perf: pd.DataFrame
source: pd.DataFrame


def parse_tables(tables_path: Path) -> Tables:
"""
Parse the XML debug tables file and extract required tables as pandas DataFrames.
"""
required_tables = {"queue", "group", "perf", "source"}
try:
tree = ET.parse(tables_path)
except ET.ParseError as e:
raise ValueError(f"Failed to parse XML tables file {tables_path}: {e}")

tables: Dict[str, pd.DataFrame] = {}
for table in tree.getroot().findall("table"):
name = table.attrib.get("name")
if name in required_tables:
text = table.text or ""
tables[name] = pd.read_csv(io.StringIO(text))

missing = required_tables - tables.keys()
if missing:
raise ValueError(f"Missing required tables in XML: {missing}")

return Tables(**tables)


def get_trace_file_objects(trace_file_path: Path) -> list[Dict[str, Any]]:
"""
Load and return the 'traceEvents' list from a gzip-compressed JSON trace file.
"""
try:
with gzip.open(trace_file_path, "rt", encoding="utf-8") as file:
data = json.load(file)
except (OSError, json.JSONDecodeError) as e:
raise ValueError(f"Failed to read or parse trace file {trace_file_path}: {e}")

if "traceEvents" not in data:
raise KeyError(f"'traceEvents' key not found in {trace_file_path}")

return data["traceEvents"]


def get_subops(df_group: pd.DataFrame) -> set:
return set(df_group[df_group["id"] != df_group["group_id"]]["id"])


def transform_events(
objects: Iterable[Dict[str, Any]], queue_df: pd.DataFrame, sub_ops: set
) -> None:
"""
Annotate the 'queue' table in-place with duration based on trace events.
"""
queue_df_len = len(queue_df)
offsets = queue_df["offset"].astype(int)

start_ts, cmd_index, chain_len = 0, 0, 1

def is_end_of_command(qread_offset: int, end_idx: int) -> bool:
if end_idx >= queue_df_len:
return qread_offset > offsets[cmd_index]
return qread_offset == offsets[end_idx]

for event in (e for e in objects if e.get("tid") == "qread"):
if cmd_index >= queue_df_len:
break

qread_offset = 4 * int(event["args"]["qread"])

end_idx = cmd_index + chain_len
if is_end_of_command(qread_offset, end_idx):
end_ts = int(event["ts"]) - 1
queue_df.loc[cmd_index, ["duration"]] = [
end_ts - start_ts,
]
start_ts = end_ts
cmd_index += chain_len
chain_len = 1
while (cmd_index + chain_len <= queue_df_len - 1) and queue_df.iloc[
cmd_index + chain_len
]["scheduled_id"] in sub_ops:
chain_len += 1


Agg = Union[str, Callable[[pd.Series], Any]]


def list_unique(s: pd.Series) -> list[Any]:
return sorted(set(s.dropna()))


def build_perf_df(tables: Tables) -> tuple[pd.DataFrame, pd.DataFrame]:
"""
Build a performance DataFrame summarizing queue metrics grouped by source_id.
Returns a tuple of (perf_df, cmd_to_op_df) where cmd_to_op_df is needed for unmapped op tracking.
"""
tables.queue["cmd_id"] = tables.queue.index

excluded = {"optimised_id", "scheduled_id", "offset"}
col_funcs: Dict[str, Agg] = {
c: "sum" for c in tables.queue.columns if c not in excluded
}

col_funcs.update({"cmdstream_id": list_unique, "cmd_id": list_unique})

cmd_to_op_df = tables.queue.groupby(COMPILER_OP_ID).agg(col_funcs).reset_index()

opt_df = (
pd.merge(tables.perf[["id", "source_id"]], tables.group, on="id", how="left")
.rename(columns={"id": COMPILER_OP_ID})
.merge(cmd_to_op_df, on=COMPILER_OP_ID, how="inner")
)

exclude_columns = ["source_id"]
src_col_funcs: Dict[str, Agg] = {
col: "sum" for col in opt_df.columns if col not in exclude_columns
}
src_col_funcs[COMPILER_OP_ID] = list_unique

perf_df = opt_df.groupby("source_id").agg(src_col_funcs).reset_index()

return perf_df, cmd_to_op_df


def check_unmapped_ops(
tables: Tables, src_df: pd.DataFrame, cmd_to_op_df: pd.DataFrame
) -> None:
"""
Identify operators in the performance data that are not mapped to any source operation.
"""
opt_ids_in_src_table = set()
for opt_ids in src_df[COMPILER_OP_ID].dropna():
if type(opt_ids) is list:
opt_ids_in_src_table.update(opt_ids)

opt_df = pd.merge(
tables.perf[["id", "source_id"]], tables.group, on="id", how="left"
)
opt_df = opt_df.rename(columns={"id": COMPILER_OP_ID})
opt_df = pd.merge(opt_df, cmd_to_op_df, on=COMPILER_OP_ID, how="inner")

unmapped_operators = opt_df[
~opt_df[COMPILER_OP_ID].isin(list(opt_ids_in_src_table))
]

if not unmapped_operators.empty:
print("Warning: There are unmapped operators in the performance data.")
print(unmapped_operators)
return None


def build_src_df(tables: Tables, perf_df: pd.DataFrame) -> pd.DataFrame:
"""
Merge source table with performance metrics and total NPU cycles.
Returns a tuple of (src_df, cmd_to_op_df) where df_cmd_to_op is needed for unmapped op tracking.
"""
return pd.merge(
tables.source.rename(columns={"id": "source_id"})[["ext_key", "source_id"]],
perf_df,
on="source_id",
how="left",
).merge(
tables.perf[["source_id", "npu_cycles"]]
.groupby("source_id")
.sum(numeric_only=True)
.reset_index(),
on="source_id",
how="left",
)


def get_model_node_data(df: pd.DataFrame) -> ndb.ModelNodeData:
"""
Convert source-level metrics into ModelExplorer node data for duration.
"""
durations = df["duration"].fillna(0).astype(int)

duration_results: Dict[str, ndb.NodeDataResult] = {}

for src, dur in zip(df["ext_key"], durations):
node_id = f"main/op{int(src)}"
duration_results[node_id] = ndb.NodeDataResult(value=int(dur))

gradient = [
ndb.GradientItem(stop=0.0, bgColor="#ffffff"),
ndb.GradientItem(stop=0.1, bgColor="#33FF00"),
ndb.GradientItem(stop=0.2, bgColor="#66FF00"),
ndb.GradientItem(stop=0.5, bgColor="#FFFF00"),
ndb.GradientItem(stop=0.7, bgColor="#FF6600"),
ndb.GradientItem(stop=1.0, bgColor="#FF0000"),
]

return ndb.ModelNodeData(
graphsData={
"main": ndb.GraphNodeData(results=duration_results, gradient=gradient)
}
)


def build_overlay_data(trace_path: Path, tables_path: Path) -> ndb.ModelNodeData:
"""
Build ModelExplorer node data from trace and tables files.
"""
tables = parse_tables(tables_path)
events = get_trace_file_objects(trace_path)
transform_events(events, tables.queue, get_subops(tables.group))
perf_df, cmd_to_op_df = build_perf_df(tables)
src_df = build_src_df(tables, perf_df)
check_unmapped_ops(tables, src_df, cmd_to_op_df)

return get_model_node_data(src_df)


def validate_file_exists(file_path: Path) -> None:
if not file_path.exists():
raise FileNotFoundError(f"{file_path} not found")


def validate_perf_mode_args(trace: str, tables: str) -> None:
if not (trace and tables):
raise ValueError(
"Both --trace and --tables must be provided for perf mode, or neither for default mode"
)


def main() -> None:
parser = argparse.ArgumentParser(
description="Visualize a model using model explorer."
)
parser.add_argument("model_path", type=str, help="Path to the model file.")
parser.add_argument(
"--model_path", required=True, type=str, help="Path to the model file"
)
parser.add_argument(
"--trace",
required=False,
help="(perf mode) PMU trace JSON.gz file with performance data",
)
parser.add_argument(
"--tables",
required=False,
help="(perf mode) Vela debug database tables XML file",
)

args = parser.parse_args()
model_file = Path(args.model_path).resolve()
validate_file_exists(model_file)

config = model_explorer.config()
(config.add_model_from_path(args.model_path))
config = model_explorer_config().add_model_from_path(str(model_file))

visualize_model_explorer(
config=config,
extensions=["tosa_adapter_model_explorer"],
)
if args.trace or args.tables:
validate_perf_mode_args(args.trace, args.tables)
trace_file = Path(args.trace).resolve()
tables_file = Path(args.tables).resolve()
validate_file_exists(trace_file)
validate_file_exists(tables_file)

config.add_node_data(
"Duration (Cycles)", build_overlay_data(trace_file, tables_file)
)

visualize_model_explorer(config=config, extensions=["tosa_adapter_model_explorer"])


if __name__ == "__main__":
Expand Down
Loading