Reland"Fix different seq length (pytorch#167481)" (pytorch#168144)

Microve · pytorchmergebot · commit 6fa7791bab27 · 2025-11-20T06:41:35.000Z
Differential Revision: D87413883 Pull Request resolved: pytorch#168144 Approved by: https://github.com/eellison
diff --git a/torch/_inductor/scheduler.py b/torch/_inductor/scheduler.py
@@ -2714,12 +2714,22 @@ def _init(self, nodes: list[ir.Operation]) -> None:
             if (
                 used_non_deterministic_runtime_estimations()
                 and config_comms.runtime_estimations_align_across_all_distributed_ranks
-            ):
-                from .comms import (
-                    align_runtime_estimations_across_all_distributed_ranks,
+                and (
+                    config.runtime_estimations_mms_benchmark
+                    or config_comms.runtime_estimations_use_nccl_lib_estimations
                 )
+            ):
+                has_collectives = False
+                for node in self.nodes:
+                    if is_collective(node.node):
+                        has_collectives = True
+                        break
+                if has_collectives:
+                    from .comms import (
+                        align_runtime_estimations_across_all_distributed_ranks,
+                    )
 
-                align_runtime_estimations_across_all_distributed_ranks(self.nodes)
+                    align_runtime_estimations_across_all_distributed_ranks(self.nodes)
 
             from torch._logging import trace_structured
 
@@ -2742,8 +2752,11 @@ def _init(self, nodes: list[ir.Operation]) -> None:
         self.process_grouped_nodes()
 
         if (
+            # pyrefly: ignore[unbound-name]
             config.graph_partition
+            # pyrefly: ignore[unbound-name]
             and config.triton.cudagraphs
+            # pyrefly: ignore[unbound-name]
             and config.triton.reorder_for_reducing_graph_partitions
         ):
             self.nodes = self.maybe_reorder_for_minimizing_partition(self.nodes)
@@ -2755,6 +2768,7 @@ def _init(self, nodes: list[ir.Operation]) -> None:
             self.insert_memory_check_nodes()
 
         log_ir_post_fusion(self.nodes)
+        # pyrefly: ignore[unbound-name]
         V.debug.graph_diagram(self.nodes)
         self.debug_draw_graph()