|  | 
| 4 | 4 | # LICENSE file in the root directory of this source tree. | 
| 5 | 5 | 
 | 
| 6 | 6 | import argparse | 
|  | 7 | +import gzip | 
|  | 8 | +import io | 
|  | 9 | +import json | 
|  | 10 | +import xml.etree.ElementTree as ET | 
|  | 11 | +from pathlib import Path | 
| 7 | 12 | 
 | 
| 8 |  | -import model_explorer | 
|  | 13 | +from typing import Any, Callable, Dict, Iterable, NamedTuple, Union | 
|  | 14 | + | 
|  | 15 | +import pandas as pd | 
| 9 | 16 | 
 | 
| 10 | 17 | from executorch.devtools.visualization.visualization_utils import ( | 
| 11 | 18 |     visualize_model_explorer, | 
| 12 | 19 | ) | 
|  | 20 | +from model_explorer import config as model_explorer_config, node_data_builder as ndb | 
|  | 21 | + | 
|  | 22 | +COMPILER_OP_ID = "scheduled_id" | 
|  | 23 | + | 
|  | 24 | + | 
|  | 25 | +class Tables(NamedTuple): | 
|  | 26 | +    queue: pd.DataFrame | 
|  | 27 | +    group: pd.DataFrame | 
|  | 28 | +    perf: pd.DataFrame | 
|  | 29 | +    source: pd.DataFrame | 
|  | 30 | + | 
|  | 31 | + | 
|  | 32 | +def parse_tables(tables_path: Path) -> Tables: | 
|  | 33 | +    """ | 
|  | 34 | +    Parse the XML debug tables file and extract required tables as pandas DataFrames. | 
|  | 35 | +    """ | 
|  | 36 | +    required_tables = {"queue", "group", "perf", "source"} | 
|  | 37 | +    try: | 
|  | 38 | +        tree = ET.parse(tables_path) | 
|  | 39 | +    except ET.ParseError as e: | 
|  | 40 | +        raise ValueError(f"Failed to parse XML tables file {tables_path}: {e}") | 
|  | 41 | + | 
|  | 42 | +    tables: Dict[str, pd.DataFrame] = {} | 
|  | 43 | +    for table in tree.getroot().findall("table"): | 
|  | 44 | +        name = table.attrib.get("name") | 
|  | 45 | +        if name in required_tables: | 
|  | 46 | +            text = table.text or "" | 
|  | 47 | +            tables[name] = pd.read_csv(io.StringIO(text)) | 
|  | 48 | + | 
|  | 49 | +    missing = required_tables - tables.keys() | 
|  | 50 | +    if missing: | 
|  | 51 | +        raise ValueError(f"Missing required tables in XML: {missing}") | 
|  | 52 | + | 
|  | 53 | +    return Tables(**tables) | 
|  | 54 | + | 
|  | 55 | + | 
|  | 56 | +def get_trace_file_objects(trace_file_path: Path) -> list[Dict[str, Any]]: | 
|  | 57 | +    """ | 
|  | 58 | +    Load and return the 'traceEvents' list from a gzip-compressed JSON trace file. | 
|  | 59 | +    """ | 
|  | 60 | +    try: | 
|  | 61 | +        with gzip.open(trace_file_path, "rt", encoding="utf-8") as file: | 
|  | 62 | +            data = json.load(file) | 
|  | 63 | +    except (OSError, json.JSONDecodeError) as e: | 
|  | 64 | +        raise ValueError(f"Failed to read or parse trace file {trace_file_path}: {e}") | 
|  | 65 | + | 
|  | 66 | +    if "traceEvents" not in data: | 
|  | 67 | +        raise KeyError(f"'traceEvents' key not found in {trace_file_path}") | 
|  | 68 | + | 
|  | 69 | +    return data["traceEvents"] | 
|  | 70 | + | 
|  | 71 | + | 
|  | 72 | +def get_subops(df_group: pd.DataFrame) -> set: | 
|  | 73 | +    return set(df_group[df_group["id"] != df_group["group_id"]]["id"]) | 
|  | 74 | + | 
|  | 75 | + | 
|  | 76 | +def transform_events( | 
|  | 77 | +    objects: Iterable[Dict[str, Any]], queue_df: pd.DataFrame, sub_ops: set | 
|  | 78 | +) -> None: | 
|  | 79 | +    """ | 
|  | 80 | +    Annotate the 'queue' table in-place with duration based on trace events. | 
|  | 81 | +    """ | 
|  | 82 | +    queue_df_len = len(queue_df) | 
|  | 83 | +    offsets = queue_df["offset"].astype(int) | 
|  | 84 | + | 
|  | 85 | +    start_ts, cmd_index, chain_len = 0, 0, 1 | 
|  | 86 | + | 
|  | 87 | +    def is_end_of_command(qread_offset: int, end_idx: int) -> bool: | 
|  | 88 | +        if end_idx >= queue_df_len: | 
|  | 89 | +            return qread_offset > offsets[cmd_index] | 
|  | 90 | +        return qread_offset == offsets[end_idx] | 
|  | 91 | + | 
|  | 92 | +    for event in (e for e in objects if e.get("tid") == "qread"): | 
|  | 93 | +        if cmd_index >= queue_df_len: | 
|  | 94 | +            break | 
|  | 95 | + | 
|  | 96 | +        qread_offset = 4 * int(event["args"]["qread"]) | 
|  | 97 | + | 
|  | 98 | +        end_idx = cmd_index + chain_len | 
|  | 99 | +        if is_end_of_command(qread_offset, end_idx): | 
|  | 100 | +            end_ts = int(event["ts"]) - 1 | 
|  | 101 | +            queue_df.loc[cmd_index, ["duration"]] = [ | 
|  | 102 | +                end_ts - start_ts, | 
|  | 103 | +            ] | 
|  | 104 | +            start_ts = end_ts | 
|  | 105 | +            cmd_index += chain_len | 
|  | 106 | +            chain_len = 1 | 
|  | 107 | +            while (cmd_index + chain_len <= queue_df_len - 1) and queue_df.iloc[ | 
|  | 108 | +                cmd_index + chain_len | 
|  | 109 | +            ]["scheduled_id"] in sub_ops: | 
|  | 110 | +                chain_len += 1 | 
|  | 111 | + | 
|  | 112 | + | 
|  | 113 | +Agg = Union[str, Callable[[pd.Series], Any]] | 
|  | 114 | + | 
|  | 115 | + | 
|  | 116 | +def list_unique(s: pd.Series) -> list[Any]: | 
|  | 117 | +    return sorted(set(s.dropna())) | 
|  | 118 | + | 
|  | 119 | + | 
|  | 120 | +def build_perf_df(tables: Tables) -> tuple[pd.DataFrame, pd.DataFrame]: | 
|  | 121 | +    """ | 
|  | 122 | +    Build a performance DataFrame summarizing queue metrics grouped by source_id. | 
|  | 123 | +    Returns a tuple of (perf_df, cmd_to_op_df) where cmd_to_op_df is needed for unmapped op tracking. | 
|  | 124 | +    """ | 
|  | 125 | +    tables.queue["cmd_id"] = tables.queue.index | 
|  | 126 | + | 
|  | 127 | +    excluded = {"optimised_id", "scheduled_id", "offset"} | 
|  | 128 | +    col_funcs: Dict[str, Agg] = { | 
|  | 129 | +        c: "sum" for c in tables.queue.columns if c not in excluded | 
|  | 130 | +    } | 
|  | 131 | + | 
|  | 132 | +    col_funcs.update({"cmdstream_id": list_unique, "cmd_id": list_unique}) | 
|  | 133 | + | 
|  | 134 | +    cmd_to_op_df = tables.queue.groupby(COMPILER_OP_ID).agg(col_funcs).reset_index() | 
|  | 135 | + | 
|  | 136 | +    opt_df = ( | 
|  | 137 | +        pd.merge(tables.perf[["id", "source_id"]], tables.group, on="id", how="left") | 
|  | 138 | +        .rename(columns={"id": COMPILER_OP_ID}) | 
|  | 139 | +        .merge(cmd_to_op_df, on=COMPILER_OP_ID, how="inner") | 
|  | 140 | +    ) | 
|  | 141 | + | 
|  | 142 | +    exclude_columns = ["source_id"] | 
|  | 143 | +    src_col_funcs: Dict[str, Agg] = { | 
|  | 144 | +        col: "sum" for col in opt_df.columns if col not in exclude_columns | 
|  | 145 | +    } | 
|  | 146 | +    src_col_funcs[COMPILER_OP_ID] = list_unique | 
|  | 147 | + | 
|  | 148 | +    perf_df = opt_df.groupby("source_id").agg(src_col_funcs).reset_index() | 
|  | 149 | + | 
|  | 150 | +    return perf_df, cmd_to_op_df | 
|  | 151 | + | 
|  | 152 | + | 
|  | 153 | +def check_unmapped_ops( | 
|  | 154 | +    tables: Tables, src_df: pd.DataFrame, cmd_to_op_df: pd.DataFrame | 
|  | 155 | +) -> None: | 
|  | 156 | +    """ | 
|  | 157 | +    Identify operators in the performance data that are not mapped to any source operation. | 
|  | 158 | +    """ | 
|  | 159 | +    opt_ids_in_src_table = set() | 
|  | 160 | +    for opt_ids in src_df[COMPILER_OP_ID].dropna(): | 
|  | 161 | +        if type(opt_ids) is list: | 
|  | 162 | +            opt_ids_in_src_table.update(opt_ids) | 
|  | 163 | + | 
|  | 164 | +    opt_df = pd.merge( | 
|  | 165 | +        tables.perf[["id", "source_id"]], tables.group, on="id", how="left" | 
|  | 166 | +    ) | 
|  | 167 | +    opt_df = opt_df.rename(columns={"id": COMPILER_OP_ID}) | 
|  | 168 | +    opt_df = pd.merge(opt_df, cmd_to_op_df, on=COMPILER_OP_ID, how="inner") | 
|  | 169 | + | 
|  | 170 | +    unmapped_operators = opt_df[ | 
|  | 171 | +        ~opt_df[COMPILER_OP_ID].isin(list(opt_ids_in_src_table)) | 
|  | 172 | +    ] | 
|  | 173 | + | 
|  | 174 | +    if not unmapped_operators.empty: | 
|  | 175 | +        print("Warning: There are unmapped operators in the performance data.") | 
|  | 176 | +        print(unmapped_operators) | 
|  | 177 | +    return None | 
|  | 178 | + | 
|  | 179 | + | 
|  | 180 | +def build_src_df(tables: Tables, perf_df: pd.DataFrame) -> pd.DataFrame: | 
|  | 181 | +    """ | 
|  | 182 | +    Merge source table with performance metrics and total NPU cycles. | 
|  | 183 | +    Returns a tuple of (src_df, cmd_to_op_df) where df_cmd_to_op is needed for unmapped op tracking. | 
|  | 184 | +    """ | 
|  | 185 | +    return pd.merge( | 
|  | 186 | +        tables.source.rename(columns={"id": "source_id"})[["ext_key", "source_id"]], | 
|  | 187 | +        perf_df, | 
|  | 188 | +        on="source_id", | 
|  | 189 | +        how="left", | 
|  | 190 | +    ).merge( | 
|  | 191 | +        tables.perf[["source_id", "npu_cycles"]] | 
|  | 192 | +        .groupby("source_id") | 
|  | 193 | +        .sum(numeric_only=True) | 
|  | 194 | +        .reset_index(), | 
|  | 195 | +        on="source_id", | 
|  | 196 | +        how="left", | 
|  | 197 | +    ) | 
|  | 198 | + | 
|  | 199 | + | 
|  | 200 | +def get_model_node_data(df: pd.DataFrame) -> ndb.ModelNodeData: | 
|  | 201 | +    """ | 
|  | 202 | +    Convert source-level metrics into ModelExplorer node data for duration. | 
|  | 203 | +    """ | 
|  | 204 | +    durations = df["duration"].fillna(0).astype(int) | 
|  | 205 | + | 
|  | 206 | +    duration_results: Dict[str, ndb.NodeDataResult] = {} | 
|  | 207 | + | 
|  | 208 | +    for src, dur in zip(df["ext_key"], durations): | 
|  | 209 | +        node_id = f"main/op{int(src)}" | 
|  | 210 | +        duration_results[node_id] = ndb.NodeDataResult(value=int(dur)) | 
|  | 211 | + | 
|  | 212 | +    gradient = [ | 
|  | 213 | +        ndb.GradientItem(stop=0.0, bgColor="#ffffff"), | 
|  | 214 | +        ndb.GradientItem(stop=0.1, bgColor="#33FF00"), | 
|  | 215 | +        ndb.GradientItem(stop=0.2, bgColor="#66FF00"), | 
|  | 216 | +        ndb.GradientItem(stop=0.5, bgColor="#FFFF00"), | 
|  | 217 | +        ndb.GradientItem(stop=0.7, bgColor="#FF6600"), | 
|  | 218 | +        ndb.GradientItem(stop=1.0, bgColor="#FF0000"), | 
|  | 219 | +    ] | 
|  | 220 | + | 
|  | 221 | +    return ndb.ModelNodeData( | 
|  | 222 | +        graphsData={ | 
|  | 223 | +            "main": ndb.GraphNodeData(results=duration_results, gradient=gradient) | 
|  | 224 | +        } | 
|  | 225 | +    ) | 
|  | 226 | + | 
|  | 227 | + | 
|  | 228 | +def build_overlay_data(trace_path: Path, tables_path: Path) -> ndb.ModelNodeData: | 
|  | 229 | +    """ | 
|  | 230 | +    Build ModelExplorer node data from trace and tables files. | 
|  | 231 | +    """ | 
|  | 232 | +    tables = parse_tables(tables_path) | 
|  | 233 | +    events = get_trace_file_objects(trace_path) | 
|  | 234 | +    transform_events(events, tables.queue, get_subops(tables.group)) | 
|  | 235 | +    perf_df, cmd_to_op_df = build_perf_df(tables) | 
|  | 236 | +    src_df = build_src_df(tables, perf_df) | 
|  | 237 | +    check_unmapped_ops(tables, src_df, cmd_to_op_df) | 
|  | 238 | + | 
|  | 239 | +    return get_model_node_data(src_df) | 
|  | 240 | + | 
|  | 241 | + | 
|  | 242 | +def validate_file_exists(file_path: Path) -> None: | 
|  | 243 | +    if not file_path.exists(): | 
|  | 244 | +        raise FileNotFoundError(f"{file_path} not found") | 
|  | 245 | + | 
|  | 246 | + | 
|  | 247 | +def validate_perf_mode_args(trace: str, tables: str) -> None: | 
|  | 248 | +    if not (trace and tables): | 
|  | 249 | +        raise ValueError( | 
|  | 250 | +            "Both --trace and --tables must be provided for perf mode, or neither for default mode" | 
|  | 251 | +        ) | 
| 13 | 252 | 
 | 
| 14 | 253 | 
 | 
| 15 | 254 | def main() -> None: | 
| 16 | 255 |     parser = argparse.ArgumentParser( | 
| 17 | 256 |         description="Visualize a model using model explorer." | 
| 18 | 257 |     ) | 
| 19 |  | -    parser.add_argument("model_path", type=str, help="Path to the model file.") | 
|  | 258 | +    parser.add_argument( | 
|  | 259 | +        "--model_path", required=True, type=str, help="Path to the model file" | 
|  | 260 | +    ) | 
|  | 261 | +    parser.add_argument( | 
|  | 262 | +        "--trace", | 
|  | 263 | +        required=False, | 
|  | 264 | +        help="(perf mode) PMU trace JSON.gz file with performance data", | 
|  | 265 | +    ) | 
|  | 266 | +    parser.add_argument( | 
|  | 267 | +        "--tables", | 
|  | 268 | +        required=False, | 
|  | 269 | +        help="(perf mode) Vela debug database tables XML file", | 
|  | 270 | +    ) | 
|  | 271 | + | 
| 20 | 272 |     args = parser.parse_args() | 
|  | 273 | +    model_file = Path(args.model_path).resolve() | 
|  | 274 | +    validate_file_exists(model_file) | 
| 21 | 275 | 
 | 
| 22 |  | -    config = model_explorer.config() | 
| 23 |  | -    (config.add_model_from_path(args.model_path)) | 
|  | 276 | +    config = model_explorer_config().add_model_from_path(str(model_file)) | 
| 24 | 277 | 
 | 
| 25 |  | -    visualize_model_explorer( | 
| 26 |  | -        config=config, | 
| 27 |  | -        extensions=["tosa_adapter_model_explorer"], | 
| 28 |  | -    ) | 
|  | 278 | +    if args.trace or args.tables: | 
|  | 279 | +        validate_perf_mode_args(args.trace, args.tables) | 
|  | 280 | +        trace_file = Path(args.trace).resolve() | 
|  | 281 | +        tables_file = Path(args.tables).resolve() | 
|  | 282 | +        validate_file_exists(trace_file) | 
|  | 283 | +        validate_file_exists(tables_file) | 
|  | 284 | + | 
|  | 285 | +        config.add_node_data( | 
|  | 286 | +            "Duration (Cycles)", build_overlay_data(trace_file, tables_file) | 
|  | 287 | +        ) | 
|  | 288 | + | 
|  | 289 | +    visualize_model_explorer(config=config, extensions=["tosa_adapter_model_explorer"]) | 
| 29 | 290 | 
 | 
| 30 | 291 | 
 | 
| 31 | 292 | if __name__ == "__main__": | 
|  | 
0 commit comments