Add functionality to map runtime debug_handles to op names (#11987)

Juntian Liu · facebook-github-bot · commit b7a9cf84fc22 · 2025-06-25T17:46:13.000-07:00
Summary:

This PR adds a functionality to map runtime debug handles to operator names. It will be used later to enhance how numerical  discrepancy results are shown, making it easier for users to understand.

Differential Revision: D77266536
diff --git a/devtools/inspector/_inspector.py b/devtools/inspector/_inspector.py
@@ -1161,12 +1161,16 @@ def _consume_etrecord(self) -> None:
         )
 
     # TODO: Make it more extensible to further merge overlapping debug handles
-    def _get_runtime_intermediate_outputs(self) -> Dict[Tuple[int, ...], Any]:
+    def _get_runtime_intermediate_outputs_and_op_names(
+        self,
+    ) -> Tuple[Dict[Tuple[int, ...], Any], Dict[Tuple[int, ...], str]]:
         """
-        Retrieve the raw runtime intermediate outputs(debug handles and value mappings)
-        from the event blocks. These outputs will be processed later to merge overlapping debug handles.
+        Retrieve the runtime intermediate outputs(debug handles and intermediate values mappings)
+        from the event blocks, along with the corresponding debug handles and op names mapping
+        These outputs will be processed later to merge overlapping debug handles.
         """
         debug_handle_to_output = {}
+        debug_handle_to_op_name = {}
         for event_block in self.event_blocks:
             for event in event_block.events:
                 # Skip OPERATOR_CALL events to avoid double-counting and exclude framework tax
@@ -1175,20 +1179,23 @@ def _get_runtime_intermediate_outputs(self) -> Dict[Tuple[int, ...], Any]:
                     or not event.op_types
                 ):
                     continue
-                # Normalize debug_handles to a tuple
-                debug_handles = event.debug_handles
-                if isinstance(debug_handles, int):
-                    debug_handles = (debug_handles,)
+                # Normalize debug_handle to a tuple
+                debug_handle = event.debug_handles
+                if isinstance(debug_handle, int):
+                    debug_handle = (debug_handle,)
                 else:
-                    debug_handles = tuple(debug_handles)
-                current_entry = debug_handle_to_output.get(debug_handles, (-1, None))
-                # When event has same debug handles, only keep the one with the largest instruction id
+                    debug_handle = tuple(debug_handle)
+                current_entry = debug_handle_to_output.get(debug_handle, (-1, None))
+                # When event has same debug_handle, only keep the one with the largest instruction id
                 if event._instruction_id > current_entry[0]:
-                    debug_handle_to_output[debug_handles] = (
+                    debug_handle_to_output[debug_handle] = (
                         event._instruction_id,
                         event.debug_data,
                     )
-        return {k: v[1] for k, v in debug_handle_to_output.items()}
+                    debug_handle_to_op_name[debug_handle] = event.name
+        return {
+            k: v[1] for k, v in debug_handle_to_output.items()
+        }, debug_handle_to_op_name
 
     def to_dataframe(
         self,
@@ -1364,8 +1371,12 @@ def calculate_numeric_gap(self, distance: str = "MSE") -> pd.DataFrame:
             raise ValueError(
                 "The aot intermediate outputs is required but not populated."
             )
+        # The runtime_op_names will be used later to map runtime debug_handle to op_name
+        runtime_intermediate_outputs, runtime_op_names = (
+            self._get_runtime_intermediate_outputs_and_op_names()
+        )
         mapping = map_runtime_aot_intermediate_outputs(
-            self._aot_intermediate_outputs, self._get_runtime_intermediate_outputs()
+            self._aot_intermediate_outputs, runtime_intermediate_outputs
         )
         metric = distance.strip().upper()
         if metric == "MSE":
diff --git a/devtools/inspector/tests/inspector_test.py b/devtools/inspector/tests/inspector_test.py
@@ -537,7 +537,7 @@ def test_consume_etrecord_populates_correct_aot_intermediate_outputs(self):
                     )
                 )
 
-    def test_get_runtime_intermediate_outputs(self):
+    def test_get_runtime_intermediate_outputs_and_op_names(self):
         # Create a context manager to patch functions called by Inspector.__init__
         with patch.object(
             _inspector, "parse_etrecord", return_value=None
@@ -560,25 +560,39 @@ def test_get_runtime_intermediate_outputs(self):
                 EventBlock(name=EVENT_BLOCK_NAME, events=self._gen_random_events())
             ]
 
-            runtime_outputs = inspector_instance._get_runtime_intermediate_outputs()
-            # This output should be a dictionary with 5 keys
+            runtime_outputs, op_names = (
+                inspector_instance._get_runtime_intermediate_outputs_and_op_names()
+            )
+            # These outputs and op_names dictionaries should all have 5 keys
             self.assertEqual(
                 len(runtime_outputs),
                 5,
             )
-            # Check that keys (0,) and (1,) are not in the dictionary(skip OPERATOR_CALL and op_types are empty)
+            self.assertEqual(
+                len(op_names),
+                5,
+            )
+
+            # Check that keys (0,) and (1,) are not in these two dictionaries(skip OPERATOR_CALL and op_types are empty)
             self.assertNotIn((0,), runtime_outputs)
             self.assertNotIn((1,), runtime_outputs)
+            self.assertNotIn((0,), op_names)
+            self.assertNotIn((1,), op_names)
 
             # Same debug_handle but different instruction_id, should record the last one
             self.assertIn((4,), runtime_outputs)
+            self.assertIn((4,), op_names)
             self.assertTrue(
                 torch.equal(runtime_outputs[(4,)][0], torch.tensor([4.0, 5.0, 6.0]))
             )
+            self.assertEqual(op_names[(4,)], "op_3")
+
             # Check that keys (5,) to (8,) are in the dictionary and have values of the correct size
             for key in range(5, 9):
                 self.assertIn((key,), runtime_outputs)
+                self.assertIn((key,), op_names)
                 self.assertEqual(len(runtime_outputs[(key,)]), RAW_DATA_SIZE)
+                self.assertEqual(op_names[(key,)], f"op_{key-1}")
 
     def test_calculate_numeric_gap(self):
         # Create a context manager to patch functions called by Inspector.__init__
@@ -608,8 +622,8 @@ def test_calculate_numeric_gap(self):
             }
 
             inspector_instance._aot_intermediate_outputs = aot_intermediate_outputs
-            inspector_instance._get_runtime_intermediate_outputs = (
-                lambda: runtime_intermediate_outputs
+            inspector_instance._get_runtime_intermediate_outputs_and_op_names = (
+                lambda: (runtime_intermediate_outputs, {})
             )
 
             df = inspector_instance.calculate_numeric_gap(distance="L1")