[PP] make runtime dbg log print custom actions (pytorch#167113)

wconstab · pytorchmergebot · commit f72772b184ff · 2025-11-06T04:20:50.000Z
Previously the log only printed if the default implementation for an action was used, now it prints before dispatching to custom registered actions. Tested by running on autoparallel graph runner and observing forward pass action logged Pull Request resolved: pytorch#167113 Approved by: https://github.com/sanketpurandare, https://github.com/Skylion007
diff --git a/torch/distributed/pipelining/schedules.py b/torch/distributed/pipelining/schedules.py
@@ -2033,12 +2033,6 @@ def _perform_action(action: _Action) -> None:
             is_next_stage_on_this_rank = stage_idx + 1 in stage_index_to_stage
             is_prev_stage_on_this_rank = stage_idx - 1 in stage_index_to_stage
 
-            logger.debug(
-                "_PipelineScheduleRuntime running time_step %d, action %s",
-                time_step,
-                action,
-            )
-
             # TODO(whc) it's not actually safe to use _batch_p2p here in the uncommon case the model has skip-connections,
             # since we do not want to batch up ops between more than a pair of ranks.  _sorted_batch_p2p would be
             # safe to use instead.
@@ -2191,6 +2185,11 @@ def _perform_action(action: _Action) -> None:
         # count either full_backward or backward_weight together, to determine when to sync DP grads
         self.backward_counter.clear()
         for time_step, action in enumerate(self.pipeline_order_with_comms[self.rank]):
+            logger.debug(
+                "_PipelineScheduleRuntime running time_step %d, action %s",
+                time_step,
+                action,
+            )
             try:
                 with record_function(_get_profiler_function_name(action)):
                     if action.computation_type in self._comp_type_to_function_map: