Fix inductor collective runtime units (pytorch#168055)

soulitzer · pytorchmergebot · commit f077ecab9243 · 2025-11-18T16:38:31.000Z
Pull Request resolved: pytorch#168055 Approved by: https://github.com/eellison ghstack dependencies: pytorch#166536
diff --git a/torch/_inductor/comm_analysis.py b/torch/_inductor/comm_analysis.py
@@ -321,7 +321,7 @@ def estimate_nccl_collective_runtime_impl(
 
 def estimate_nccl_collective_runtime(node: ir.IRNode) -> float:
     """
-    Returns estimated NCCL collective runtime in nanoseconds (ns).
+    Returns estimated NCCL collective runtime in nanoseconds (ms).
 
     The following heuristics are copied from https://github.com/NVIDIA/nccl/blob/master/src/graph/tuning.cc.
     We aim to estimate the runtime as accurately as possible.
@@ -355,7 +355,7 @@ def estimate_nccl_collective_runtime_from_fx_node(
     use_nccl_estimator: bool = True,
 ) -> float:
     """
-    Returns estimated NCCL collective runtime in nanoseconds (ns).
+    Returns estimated NCCL collective runtime in nanoseconds (ms).
 
     The following heuristics are copied from https://github.com/NVIDIA/nccl/blob/master/src/graph/tuning.cc.
     We aim to estimate the runtime as accurately as possible.