Skip to content
25 changes: 16 additions & 9 deletions devtools/inspector/_inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ def _populate_debugging_related_fields(

def _associate_with_op_graph_nodes(
self,
debug_handle_to_op_node_map: Dict[int, OperatorNode],
debug_handle_to_op_node_map: Dict[int, List[OperatorNode]],
) -> None:
"""
Helper function to populate the stack_traces, module_hierarchy and op_types attributes
Expand All @@ -676,14 +676,21 @@ def _associate_with_op_graph_nodes(
debug_handles = [debug_handles]

for handle in debug_handles:
node = debug_handle_to_op_node_map.get(handle)
# Attach node metadata including stack traces, module hierarchy and op_types to this event
if node is not None and (metadata := node.metadata) is not None:
self.stack_traces[node.name] = metadata.get("stack_trace")
self.module_hierarchy[node.name] = metadata.get("nn_module_stack")
if node.op:
# TODO: consider having this as a dict from node.name -> node.op
self.op_types += [node.op]
nodes = debug_handle_to_op_node_map.get(handle, None)
if nodes is None:
continue

for node in nodes:
# Attach node metadata including stack traces, module hierarchy and op_types to this event
if node is not None and (metadata := node.metadata) is not None:
if node.name not in self.stack_traces:
self.stack_traces[node.name] = metadata.get("stack_trace")
self.module_hierarchy[node.name] = metadata.get(
"nn_module_stack"
)
if node.op:
# TODO: consider having this as a dict from node.name -> node.op
self.op_types += [node.op]


@dataclass
Expand Down
28 changes: 18 additions & 10 deletions devtools/inspector/_inspector_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,14 +303,23 @@ def gen_graphs_from_etrecord(
return op_graph_map


# One debug handle should only be associated with one node. We are in the middle of migrating debug handle generation
# from graph after to_edge to graph after torch.export, one every debug handle in exported graph may be associated with multiple nodes in to_edge
Comment on lines +306 to +307
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put before and after state

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i've updated the comment to reflect the function behavior before, during and after the migration.

# graph. After fully migration, we should bring the bring type as well as the #node check back.
#
# Before migration: returned Dict for 1 debug handle to 1 node in to_edge graph
# During migration: returned Dict for 1 debug handle to multiple nodes in to_edge graph
# After migration: returned Dict for 1 debug handle to 1 node in exported graph
#
# TODO(gasoonjia): recover the return type to Dict[int, List[OperatorNode], reenable the #node check.
def create_debug_handle_to_op_node_mapping(
op_graph: OperatorGraph,
) -> Dict[int, OperatorNode]:
) -> Dict[int, List[OperatorNode]]:
"""
Recursive function to traverse all the operator graph nodes of input op_graph and build a mapping
from each debug handle to the operator node that contains the debug handle in its metadata.
"""
debug_handle_to_op_node_map: Dict[int, OperatorNode] = {}
debug_handle_to_op_node_map: Dict[int, List[OperatorNode]] = {}

# Recursively searches through the metadata of nodes
def _extract_debug_handles(graph: OperatorGraph):
Expand All @@ -320,14 +329,13 @@ def _extract_debug_handles(graph: OperatorGraph):
if isinstance(element, OperatorNode) and element.metadata is not None:
metadata = element.metadata
debug_handle = metadata.get("debug_handle")
if debug_handle is not None:
existing_entry = debug_handle_to_op_node_map.get(debug_handle)
if existing_entry is not None:
raise ValueError(
f"Duplicated debug handle {str(debug_handle)} shared between {element.name} and {existing_entry.name}. "
"No two op nodes of the same graph should have the same debug handle."
)
debug_handle_to_op_node_map[debug_handle] = element
if debug_handle is None:
continue

if debug_handle not in debug_handle_to_op_node_map:
debug_handle_to_op_node_map[debug_handle] = []

debug_handle_to_op_node_map[debug_handle].append(element)

# Start traversing
_extract_debug_handles(op_graph)
Expand Down
15 changes: 13 additions & 2 deletions devtools/inspector/tests/inspector_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,11 @@ def test_inspector_associate_with_op_graph_nodes_single_debug_handle(self):

# Call the method that's under testing and verify
event_with_single_debug_handle._associate_with_op_graph_nodes(
{debug_handle: node_0}
{
debug_handle: [
node_0,
]
}
)

expected_stack_traces = {"node_0": "stack_trace_relu"}
Expand Down Expand Up @@ -226,7 +230,14 @@ def test_inspector_associate_with_op_graph_nodes_multiple_debug_handles(self):

# Call the method that's under testing and verify
event_with_multiple_debug_handles._associate_with_op_graph_nodes(
{debug_handles[0]: node_0, debug_handles[1]: node_1}
{
debug_handles[0]: [
node_0,
],
debug_handles[1]: [
node_1,
],
}
)

expected_stack_traces = {
Expand Down
53 changes: 22 additions & 31 deletions devtools/inspector/tests/inspector_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,25 +62,17 @@ def get_expected_intermediate_outputs():
Returns the expected outputs of the debug handles and intermediate output mapping for this model for the given input.
"""
return {
(10,): torch.tensor([[[[7.7000, 6.7000], [4.7000, 3.7000]]]]),
(11,): torch.tensor([[7.7000, 6.7000, 4.7000, 3.7000]]),
(12,): torch.tensor(
[
[0.1000, 0.5000],
[0.2000, 0.6000],
[0.3000, 0.7000],
[0.4000, 0.8000],
]
),
(13,): torch.tensor([[5.0000, 14.1200]]),
(14,): torch.tensor([[5.5000, 13.6200]]),
(15,): torch.tensor([[5.4000, 13.5200]]),
(16,): torch.tensor([[10.8000, 6.7600]]),
(17,): torch.tensor([3.0000, 1.5000]),
(18,): torch.tensor([[3.6000, 4.5067]]),
(19,): torch.tensor([[3.6000, 4.5067]]),
(20,): torch.tensor([[0.9734, 0.9891]]),
(21,): [torch.tensor([[0.9734]]), torch.tensor([[0.9891]])],
(1,): torch.tensor([[[[7.7000, 6.7000], [4.7000, 3.7000]]]]),
(2,): torch.tensor([[7.7000, 6.7000, 4.7000, 3.7000]]),
(3,): torch.tensor([[5.0000, 14.1200]]),
(4,): torch.tensor([[5.5000, 13.6200]]),
(5,): torch.tensor([[5.4000, 13.5200]]),
(6,): torch.tensor([[10.8000, 6.7600]]),
(7,): torch.tensor([3.0000, 1.5000]),
(8,): torch.tensor([[3.6000, 4.5067]]),
(9,): torch.tensor([[3.6000, 4.5067]]),
(10,): torch.tensor([[0.9734, 0.9891]]),
(11,): [torch.tensor([[0.9734]]), torch.tensor([[0.9891]])],
}

@staticmethod
Expand All @@ -89,18 +81,17 @@ def get_expected_debug_handle_to_op_name():
Returns the expected debug handle and op name mapping for this model for the given input.
"""
return {
(10,): "aten_convolution_default",
(11,): "aten_view_copy_default",
(12,): "aten_permute_copy_default",
(13,): "aten_addmm_default",
(14,): "aten_add_tensor",
(15,): "aten_sub_tensor",
(16,): "aten_mul_tensor",
(17,): "aten_add_tensor_1",
(18,): "aten_div_tensor",
(19,): "aten_relu_default",
(20,): "aten_sigmoid_default",
(21,): "aten_split_with_sizes_copy_default",
(1,): "aten_convolution_default",
(2,): "aten_view_copy_default",
(3,): "aten_addmm_default",
(4,): "aten_add_tensor",
(5,): "aten_sub_tensor",
(6,): "aten_mul_tensor",
(7,): "aten_add_tensor_1",
(8,): "aten_div_tensor",
(9,): "aten_relu_default",
(10,): "aten_sigmoid_default",
(11,): "aten_split_with_sizes_copy_default",
}


Expand Down
16 changes: 12 additions & 4 deletions devtools/inspector/tests/inspector_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,7 +583,9 @@ def gen_mock_operator_graph_with_expected_map() -> (
"nn_module_stack": "module_hierarchy_relu",
},
)
mapping[111] = node_fused_conv_relu
mapping[111] = [
node_fused_conv_relu,
]
node_sin = OperatorNode(
"sin",
[node_fused_conv_relu],
Expand All @@ -594,7 +596,9 @@ def gen_mock_operator_graph_with_expected_map() -> (
"nn_module_stack": "module_hierarchy_sin",
},
)
mapping[222] = node_sin
mapping[222] = [
node_sin,
]
node_cos = OperatorNode(
"cos",
[node_sin],
Expand All @@ -605,7 +609,9 @@ def gen_mock_operator_graph_with_expected_map() -> (
"nn_module_stack": "module_hierarchy_cos",
},
)
mapping[333] = node_cos
mapping[333] = [
node_cos,
]
node_div = OperatorNode(
"div",
[node_cos],
Expand All @@ -616,7 +622,9 @@ def gen_mock_operator_graph_with_expected_map() -> (
"nn_module_stack": "module_hierarchy_div",
},
)
mapping[444] = node_div
mapping[444] = [
node_div,
]
node_output = ValueNode("output", [node_div])
return (
OperatorGraph(
Expand Down
60 changes: 52 additions & 8 deletions exir/passes/debug_handle_generator_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,75 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from typing import Dict

from executorch.exir.graph_module import bfs_trace_with_node_process
from executorch.exir.pass_base import ExportPass
from torch.export import ExportedProgram
from torch.fx import GraphModule
from torch.fx import GraphModule, Node
from torch.fx.passes.infra.pass_base import PassResult


class DebugHandleGeneratorPass(ExportPass):
def call(self, graph_module: GraphModule) -> PassResult:
"""Lower a quantized reference model (with reference quantized operator patterns)
to executorch backend, that has a canonical set of quantized operators
"""Generate debug handles for each node in the graph module and its submodule except
placeholder and output nodes. The debug handle is generated starting from 1 and
incrementally. The debug handle of a node is the same as the node sharing the same
greatest ancestor node in the export flow.
"""

index = 1
FROM_NODE_KEY = "from_node"
DEBUG_HANDLE_KEY = "debug_handle"

source_node_id_to_debug_handle: Dict[str, int] = {}

def _get_greatest_ancestor_node_identifier(node: Node) -> str:
"""Get the identifier of the greatest ancestor node of the given node.
The identifier is the concatenation of the node name and graph id of the
greatest ancestor node, where the graph id is the unique id for every graph
module in the export flow and node name is unique within the same graph module.
"""

node_source = node.meta[FROM_NODE_KEY]
node_source = node_source[-1]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is only populated for unlifted graph when .module is called? if so how is it guranteed that this is present

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no it is not only populate that .module is explicitly called. In our case it is guarantee generated after run_decompostition so we will have that when generating debug handle.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is guarantee generated after run_decompostition so we will have that when generating debug handle.

There in my question around to_edge_transform_and_lower. although that specifically is unrelated to this pr


while len(node_source.from_node) > 0:
node_source = node_source.from_node[-1]

return node_source.name + str(node_source.graph_id)

def _extract_debug_handles_from_node(node: Node) -> None:
"""
Generate a debug handle based on node's oldest ancestor node's name
and graph id, or return None if the node does not need to be traced.
"""

if node.op == "placeholder" or node.op == "output":
# placeholder and output nodes don't have debug handle
return

assert (
FROM_NODE_KEY in node.meta
), f"Node {node} does not have meta key {FROM_NODE_KEY}"

greatest_ancestor_node_id = _get_greatest_ancestor_node_identifier(node)

debug_handle = (
len(source_node_id_to_debug_handle) + 1
if greatest_ancestor_node_id not in source_node_id_to_debug_handle
else source_node_id_to_debug_handle[greatest_ancestor_node_id]
)
Comment on lines +61 to +65
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok one of my concern is this: is this gonna impact any existing debug utils that rely on debug handle? earlier each node had its own debug handle, but not we are giving a bunch of nodes that might belong to the same original node (e.g. full aten graph nodes?).

Other question is how does this work for delegates that use to_edge_tranform_and_lower, if this pass is run only after to_edge?

Copy link
Contributor Author

@Gasoonjia Gasoonjia Jul 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For first question, yes like devtool and some other infra or utils are depending on the debgu handle and in the following diffs i will do the migration. There're some testcases rely on debug handle for check and I've talked with the owners and they are ok with the update. Now ci is all clean.

For second question, not sure if I fully understand your concern, but our current debug handle generation pass also locates at the end of to_edge. Our update does not change the location. But we do have concern about whether or not the change of debug handle generation may impact the correctness of the debug handels in the downstream e.g. in delegate, though ci is all clean. And we will pay attention to it when working on the numeric descrepancy dtector on delegated model.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually we should discuss this second paragraph further. THe issue is that to_edge_tranform_and_lower happens before we run decomposition, is that right? If so you dont have debug handles generated no? If no debug handles then no debug information. I would like to be either wrong about my assumption or we need to fix it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the first paragraph, do you forsee any disruption in existing user flows for debugging? That would be one thing that would make me block this PR, so having an answer on that would be nice

Copy link
Contributor Author

@Gasoonjia Gasoonjia Jul 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issue is that to_edge_tranform_and_lower happens before we run decomposition, is that right?

No operator decomposition should be happened inside to_edge_transform_and_lower. to_edge stage happened in side to_edge_transform_and_lower and operator decompostition happened inside to_edge stage. https://github.com/pytorch/executorch/blob/main/exir/program/_program.py#L1129

forsee any disruption in existing user flows for debugging

no. One heart break context is no debug tool is actually using debug handle right now. The numerical discrepancy detector will be the first one.


def _extract_debug_handles_from_node(node):
nonlocal index
node.meta["debug_handle"] = index
index += 1
source_node_id_to_debug_handle[greatest_ancestor_node_id] = debug_handle
node.meta[DEBUG_HANDLE_KEY] = debug_handle

bfs_trace_with_node_process(graph_module, _extract_debug_handles_from_node)

return PassResult(graph_module, True)


# TODO(gasoonjia): generate missing debug handles using `from_node` info
def generate_missing_debug_handles(ep: ExportedProgram):
"""
This pass is used to generate missing debug handles for the graph module and its submodules.
Expand Down
Loading
Loading