Add functionality to map runtime debug_handles to op names

Juntian Liu · web-flow · commit ecb85cecba92 · 2025-06-26T00:58:55.000-07:00
Differential Revision: D77266536 Pull Request resolved: pytorch#11987
diff --git a/devtools/inspector/_inspector.py b/devtools/inspector/_inspector.py
@@ -1161,12 +1161,15 @@ def _consume_etrecord(self) -> None:
         )
 
     # TODO: Make it more extensible to further merge overlapping debug handles
-    def _get_runtime_intermediate_outputs(self) -> Dict[Tuple[int, ...], Any]:
+    def _get_runtime_intermediate_outputs_and_op_names(
+        self,
+    ) -> Tuple[Dict[Tuple[int, ...], Any], Dict[Tuple[int, ...], str]]:
         """
-        Retrieve the raw runtime intermediate outputs(debug handles and value mappings)
-        from the event blocks. These outputs will be processed later to merge overlapping debug handles.
+        Retrieve the runtime intermediate outputs(debug handles and intermediate values mappings)
+        from the event blocks, along with the corresponding debug handles and op names mapping.
         """
         debug_handle_to_output = {}
+        debug_handle_to_op_name = {}
         for event_block in self.event_blocks:
             for event in event_block.events:
                 # Skip OPERATOR_CALL events to avoid double-counting and exclude framework tax
@@ -1175,20 +1178,23 @@ def _get_runtime_intermediate_outputs(self) -> Dict[Tuple[int, ...], Any]:
                     or not event.op_types
                 ):
                     continue
-                # Normalize debug_handles to a tuple
-                debug_handles = event.debug_handles
-                if isinstance(debug_handles, int):
-                    debug_handles = (debug_handles,)
+                # Normalize debug_handle to a tuple
+                debug_handle = event.debug_handles
+                if isinstance(debug_handle, int):
+                    debug_handle = (debug_handle,)
                 else:
-                    debug_handles = tuple(debug_handles)
-                current_entry = debug_handle_to_output.get(debug_handles, (-1, None))
-                # When event has same debug handles, only keep the one with the largest instruction id
+                    debug_handle = tuple(debug_handle)
+                current_entry = debug_handle_to_output.get(debug_handle, (-1, None))
+                # When event has same debug_handle, only keep the one with the largest instruction id
                 if event._instruction_id > current_entry[0]:
-                    debug_handle_to_output[debug_handles] = (
+                    debug_handle_to_output[debug_handle] = (
                         event._instruction_id,
                         event.debug_data,
                     )
-        return {k: v[1] for k, v in debug_handle_to_output.items()}
+                    debug_handle_to_op_name[debug_handle] = event.name
+        return {
+            k: v[1] for k, v in debug_handle_to_output.items()
+        }, debug_handle_to_op_name
 
     def to_dataframe(
         self,
@@ -1364,8 +1370,12 @@ def calculate_numeric_gap(self, distance: str = "MSE") -> pd.DataFrame:
             raise ValueError(
                 "The aot intermediate outputs is required but not populated."
             )
+        # The runtime_op_names will be used later to map runtime debug_handle to op_name
+        runtime_intermediate_outputs, runtime_op_names = (
+            self._get_runtime_intermediate_outputs_and_op_names()
+        )
         mapping = map_runtime_aot_intermediate_outputs(
-            self._aot_intermediate_outputs, self._get_runtime_intermediate_outputs()
+            self._aot_intermediate_outputs, runtime_intermediate_outputs
         )
         metric = distance.strip().upper()
         if metric == "MSE":
diff --git a/devtools/inspector/tests/inspector_test.py b/devtools/inspector/tests/inspector_test.py
@@ -537,7 +537,7 @@ def test_consume_etrecord_populates_correct_aot_intermediate_outputs(self):
                     )
                 )
 
-    def test_get_runtime_intermediate_outputs(self):
+    def test_get_runtime_intermediate_outputs_and_op_names(self):
         # Create a context manager to patch functions called by Inspector.__init__
         with patch.object(
             _inspector, "parse_etrecord", return_value=None
@@ -560,25 +560,39 @@ def test_get_runtime_intermediate_outputs(self):
                 EventBlock(name=EVENT_BLOCK_NAME, events=self._gen_random_events())
             ]
 
-            runtime_outputs = inspector_instance._get_runtime_intermediate_outputs()
-            # This output should be a dictionary with 5 keys
+            runtime_outputs, op_names = (
+                inspector_instance._get_runtime_intermediate_outputs_and_op_names()
+            )
+            # These outputs and op_names dictionaries should all have 5 keys
             self.assertEqual(
                 len(runtime_outputs),
                 5,
             )
-            # Check that keys (0,) and (1,) are not in the dictionary(skip OPERATOR_CALL and op_types are empty)
+            self.assertEqual(
+                len(op_names),
+                5,
+            )
+
+            # Check that keys (0,) and (1,) are not in these two dictionaries(skip OPERATOR_CALL and op_types are empty)
             self.assertNotIn((0,), runtime_outputs)
             self.assertNotIn((1,), runtime_outputs)
+            self.assertNotIn((0,), op_names)
+            self.assertNotIn((1,), op_names)
 
             # Same debug_handle but different instruction_id, should record the last one
             self.assertIn((4,), runtime_outputs)
+            self.assertIn((4,), op_names)
             self.assertTrue(
                 torch.equal(runtime_outputs[(4,)][0], torch.tensor([4.0, 5.0, 6.0]))
             )
+            self.assertEqual(op_names[(4,)], "op_3")
+
             # Check that keys (5,) to (8,) are in the dictionary and have values of the correct size
             for key in range(5, 9):
                 self.assertIn((key,), runtime_outputs)
+                self.assertIn((key,), op_names)
                 self.assertEqual(len(runtime_outputs[(key,)]), RAW_DATA_SIZE)
+                self.assertEqual(op_names[(key,)], f"op_{key-1}")
 
     def test_calculate_numeric_gap(self):
         # Create a context manager to patch functions called by Inspector.__init__
@@ -608,8 +622,8 @@ def test_calculate_numeric_gap(self):
             }
 
             inspector_instance._aot_intermediate_outputs = aot_intermediate_outputs
-            inspector_instance._get_runtime_intermediate_outputs = (
-                lambda: runtime_intermediate_outputs
+            inspector_instance._get_runtime_intermediate_outputs_and_op_names = (
+                lambda: (runtime_intermediate_outputs, {})
             )
 
             df = inspector_instance.calculate_numeric_gap(distance="L1")