Skip to content

Commit 56502a7

Browse files
limintangfacebook-github-bot
authored andcommitted
Add a parameter to output delegate summary in llama export (#8174)
Summary: Print delegation summary when the verbose parameter is set. Differential Revision: D68991594
1 parent 0a936e0 commit 56502a7

File tree

3 files changed

+24
-1
lines changed

3 files changed

+24
-1
lines changed

devtools/backend_debug/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from executorch.devtools.backend_debug.delegation_info import (
88
DelegationBreakdown,
99
get_delegation_info,
10+
print_delegation_info,
1011
)
1112

12-
__all__ = ["DelegationBreakdown", "get_delegation_info"]
13+
__all__ = ["DelegationBreakdown", "get_delegation_info", "print_delegation_info"]

devtools/backend_debug/delegation_info.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import re
88
from collections import defaultdict
99
from dataclasses import asdict, dataclass
10+
from tabulate import tabulate
1011
from typing import Dict
1112

1213
import pandas as pd
@@ -174,3 +175,10 @@ def _insert_op_occurrences_dict(node_name: str, delegated: bool) -> None:
174175
num_delegated_subgraphs=delegated_subgraph_counter,
175176
delegation_by_operator=op_occurrences_dict,
176177
)
178+
179+
180+
def print_delegation_info(graph_module: torch.fx.GraphModule):
181+
delegation_info = get_delegation_info(graph_module)
182+
print(delegation_info.get_summary())
183+
df = delegation_info.get_operator_delegation_dataframe()
184+
print(tabulate(df, headers="keys", tablefmt="fancy_grid"))

examples/qualcomm/oss_scripts/llama/llama.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@
5454
from executorch.examples.models.llama.source_transformation.quantize import (
5555
get_quant_embedding_transform,
5656
)
57+
58+
from executorch.devtools.backend_debug import print_delegation_info
59+
5760
from executorch.examples.models.llama.tokenizer.tiktoken import Tokenizer as Tiktoken
5861
from executorch.examples.qualcomm.oss_scripts.llama.model.static_llama import (
5962
LlamaModel,
@@ -389,6 +392,7 @@ def lowering_modules(
389392
num_sharding=1,
390393
passes_job=OrderedDict(),
391394
shared_buffer=False,
395+
verbose=False,
392396
):
393397
executorch_config = ExecutorchBackendConfig(
394398
# For shared buffer, user must pass the memory address
@@ -440,6 +444,10 @@ def lowering_modules(
440444
edge_prog_mgr = edge_prog_mgr.to_backend(partitioner)
441445
if num_sharding > 1:
442446
update_spill_fill_size(edge_prog_mgr.exported_program())
447+
448+
if verbose:
449+
print_delegation_info(edge_prog_mgr.exported_program().graph_module)
450+
443451
exec_prog_mgr = edge_prog_mgr.to_executorch(config=executorch_config)
444452
with open(f"{work_space}/{self.pte_filename}.pte", "wb") as file:
445453
exec_prog_mgr.write_to_file(file)
@@ -667,6 +675,10 @@ def compile(args, pte_filename, tokenizer):
667675
)
668676
compiler_specs[0][0].value = option_to_flatbuffer(qnn_executorch_options)
669677

678+
if args.verbose:
679+
for exported_program in exported_programs:
680+
print_delegation_info(exported_program.graph_module)
681+
670682
executorch_config = ExecutorchBackendConfig(
671683
# For shared buffer, user must pass the memory address
672684
# which is allocated by RPC memory to executor runner.
@@ -980,6 +992,8 @@ def _build_parser():
980992
help="Fallback to cpu embedding operator and type of embedding quantization, '<bitwidth>,<groupsize>', e.g., '4,32'.",
981993
)
982994

995+
parser.add_argument("-v", "--verbose", action="store_true")
996+
983997
return parser
984998

985999

0 commit comments

Comments
 (0)