Improve reporting tools (#2787)

kiya00 · Copilot · web-flow · commit 8adba6ee7c42 · 2025-12-12T14:26:14.000Z
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/thunder/dynamo/benchmark_utils.py b/thunder/dynamo/benchmark_utils.py
@@ -150,7 +150,7 @@ def compile(self, fn, *, inputs, **kwargs):
 
     # to_source will always use symbolic trace
     def to_source(self, fn_name):
-        return f"TorchInductorSpecification.torch_inductor({fn_name}, inputs)"
+        return f"TorchInductorSpecification.torch_inductor({fn_name}, inputs, skip_symbolic_trace={self.skip_symbolic_trace})"
 
     def import_str(self):
         return ["import torch", "from thunder.dynamo.benchmark_utils import TorchInductorSpecification"]
@@ -353,6 +353,12 @@ def time(self, stmt="pass", setup="pass", globals=None) -> Measurement:
             Measurement: A benchmarking result containing execution time statistics, see :class:`torch.utils.benchmark.utils.common.Measurement`.
         """
         t = TorchBenchmarkTimer(stmt=stmt, setup=setup, globals=globals, timer=self.inner_timer)
+        # If the timer measures an extremely short execution time, adaptive_autorange may hang.
+        # To prevent this, we perform a preliminary run to check for such cases, e.g. measure kernel time on a cpu-only graph.
+        # If detected, we return the time of a single run, avoiding potential hangs.
+        pre_run = t.timeit(1)
+        if pre_run.median <= 1e-9:
+            return pre_run
         measurement = t.adaptive_autorange(
             threshold=self.threshold, min_run_time=self.min_run_time, max_run_time=self.max_run_time
         )
diff --git a/thunder/dynamo/report.py b/thunder/dynamo/report.py
@@ -520,7 +520,7 @@ def write_repro(
         code_str = f"{code_str}\n{main_code.format(graph_name=self.graph_name)}\n{comment_str}"
 
         if file_name is None:
-            file_name = f"{self.graph_name}.py"
+            file_name = f"{self.graph_name}_{compile_fn.name}_repro.py"
         with open(folder / file_name, "w") as f:
             print(code_str, file=f)
         format_python_file(folder / file_name)
@@ -633,7 +633,7 @@ def write_benchmark(
 
         code_str = f"{code_str}\n{main_code.format(graph_name=self.graph_name)}\n{comment_str}"
         if file_name is None:
-            file_name = f"{self.graph_name}.py"
+            file_name = f"{self.graph_name}_{compile_fn.name}_{time_fn.name}_benchmark.py"
         with open(folder / file_name, "w") as f:
             print(code_str, file=f)
         format_python_file(folder / file_name)
@@ -924,7 +924,7 @@ def write_nvfuser_benchmark(self, folder, time_fn: TimerInterface, file_name=Non
 {comment_str}
 """
         if file_name is None:
-            file_name = f"{self.name}_benchmark_nvfuser.py"
+            file_name = f"{self.name}_benchmark_nvfuser_{time_fn.name}.py"
         with open(folder / file_name, "w") as f:
             print(code_str, file=f)
         format_python_file(folder / file_name)
@@ -983,7 +983,7 @@ def write_inductor_benchmark(self, folder: PathLike, time_fn: TimerInterface, fi
 print(measurement)
 """
         if file_name is None:
-            file_name = f"{self.name}_benchmark_inductor.py"
+            file_name = f"{self.name}_benchmark_inductor_{time_fn.name}.py"
         with open(folder / file_name, "w") as f:
             f.write(code_str)
         format_python_file(folder / file_name)
@@ -1428,22 +1428,39 @@ def save_thunderfx_repros(
     Saves reproduction scripts for ThunderFX subgraphs.
 
     This function:
-    1. Creates a folder structure to organize the repros
-    .
-    └── graph0
-        ├── fusion_reports
-        │   ├── graph0_thunder_0_nvFusion0_forward_repro_nvfuser.py
-        │   ├── graph0_thunder_0_nvFusion1_forward_repro_nvfuser.py
-        │   ├── graph0_thunder_0_nvFusion2_backward_repro_nvfuser.py
-        ├── graph0_thunder_0_bwd_trace.py
-        ├── graph0_thunder_0_fwd_trace.py
-        └── graph0_thunder_0.py
+    1. Creates a folder structure to organize the repro or benchmark scripts:
+
+       If use_benchmark is True:
+       graph0/
+       ├── fusion_reports/
+       │   ├── graph0_thunder_0_nvFusion0_forward_benchmark_inductor_KernelTime.py
+       │   ├── graph0_thunder_0_nvFusion0_forward_benchmark_inductor_WallTimeWithMemoryUsage.py
+       │   ├── graph0_thunder_0_nvFusion0_forward_benchmark_nvfuser_KernelTime.py
+       │   └── graph0_thunder_0_nvFusion0_forward_benchmark_nvfuser_WallTimeWithMemoryUsage.py
+       ├── graph0_repro_torchcompile.py
+       ├── graph0_thunder_0_bwd_trace.py
+       ├── graph0_thunder_0_fwd_trace.py
+       ├── graph0_thunder_0_inductor_KernelTime_benchmark.py
+       ├── graph0_thunder_0_inductor_WallTimeWithMemoryUsage_benchmark.py
+       ├── graph0_thunder_0_thunder_KernelTime_benchmark.py
+       └── graph0_thunder_0_thunder_WallTimeWithMemoryUsage_benchmark.py
+
+       If use_benchmark is False:
+       graph0/
+       ├── fusion_reports/
+       │   ├── graph0_thunder_0_nvFusion0_forward_repro_inductor.py
+       │   └── graph0_thunder_0_nvFusion0_forward_repro_nvfuser.py
+       ├── graph0_repro_torchcompile.py
+       ├── graph0_thunder_0_fwd_trace.py
+       ├── graph0_thunder_0_bwd_trace.py
+       ├── graph0_thunder_0_inductor_repro.py
+       └── graph0_thunder_0_thunder_repro.py
 
     2. For each Thunder FX graph and its subgraphs:
-        - Checks runnability if requested
-        - Saves benchmark or repro scripts
-        - Saves trace information if requested
-        - Saves nvFusion repros if requested
+       - Checks runnability if requested
+       - Saves benchmark or repro scripts
+       - Saves trace information if requested
+       - Saves nvFusion repros if requested
 
     Args:
         fn: The callable to analyze
@@ -1452,7 +1469,7 @@ def save_thunderfx_repros(
         check_runnability: If True, checks if graphs can run with Thunder
         save_fusion: If True, saves nvFusion repros
         save_trace: If True, saves trace information
-        stream: Stream to write output log informationto
+        stream: Stream to write output log information to
         force_overwrite: If True, overwrites existing folder at folder_path
         **compile_kwargs: Keyword arguments for Thunder and torch.compile
 
@@ -1472,6 +1489,7 @@ def inner_fn(*args, **kwargs):
         for thunder_fxgraph_report in thunder_fxgraph_reports:
             graph_folder = folder_path / thunder_fxgraph_report.graph_name
             graph_folder.mkdir(exist_ok=True, parents=True)
+            thunder_fxgraph_report.write_inductor_repro(graph_folder)
             for split_report in thunder_fxgraph_report.subgraph_reports:
                 if check_runnability or save_trace or save_fusion:
                     try:
@@ -1484,22 +1502,38 @@ def inner_fn(*args, **kwargs):
                         continue
                     else:
                         stream.write(f"Successfully ran the {split_report.graph_name} using Thunder\n")
+
+                from torch._inductor.compile_fx import graph_returns_tuple
+
+                # torch._inductor.compile requires the output to be tuple, if not, the symbolic trace is necessary
+                skip_symbolic_trace = graph_returns_tuple(split_report.graph)
+                torchinductor = TorchInductorSpecification(skip_symbolic_trace=skip_symbolic_trace)
                 if use_benchmark:
-                    split_report.write_benchmark(graph_folder, thunderjit, WallTime)
+                    split_report.write_benchmark(graph_folder, thunderjit, WallTimeWithMemoryUsage)
+                    split_report.write_benchmark(graph_folder, thunderjit, KernelTime)
+
+                    split_report.write_benchmark(graph_folder, torchinductor, WallTimeWithMemoryUsage)
+                    split_report.write_benchmark(graph_folder, torchinductor, KernelTime)
                 else:
                     split_report.write_repro(graph_folder, thunderjit)
+                    split_report.write_repro(graph_folder, torchinductor)
                 if save_trace:
                     with open(graph_folder / f"{split_report.graph_name}_fwd_trace.py", "w") as f:
                         f.write(str(split_report.fwd_trc))
-                    with open(graph_folder / f"{split_report.graph_name}_bwd_trace.py", "w") as f:
-                        f.write(str(split_report.bwd_trc))
+                    if split_report.bwd_trc is not None:
+                        with open(graph_folder / f"{split_report.graph_name}_bwd_trace.py", "w") as f:
+                            f.write(str(split_report.bwd_trc))
                 if save_fusion:
                     fusion_folder = graph_folder / "fusion_reports"
                     fusion_folder.mkdir(exist_ok=True, parents=True)
                     for fusion_report in split_report.fusion_reports:
                         if use_benchmark:
-                            fusion_report.write_nvfuser_benchmark(fusion_folder, WallTime)
+                            fusion_report.write_nvfuser_benchmark(fusion_folder, WallTimeWithMemoryUsage)
+                            fusion_report.write_inductor_benchmark(fusion_folder, WallTimeWithMemoryUsage)
+                            fusion_report.write_nvfuser_benchmark(fusion_folder, KernelTime)
+                            fusion_report.write_inductor_benchmark(fusion_folder, KernelTime)
                         else:
                             fusion_report.write_nvfuser_repro(fusion_folder)
+                            fusion_report.write_inductor_repro(fusion_folder)
 
     return inner_fn
diff --git a/thunder/tests/test_dynamo.py b/thunder/tests/test_dynamo.py
@@ -1575,7 +1575,7 @@ def foo(x):
     assert len(thunder_fx_graph_report.subgraph_reports) == 1  # cos
     thunder_split_report = thunder_fx_graph_report.subgraph_reports[0]
 
-    torchinductor = TorchInductorSpecification()
+    torchinductor = TorchInductorSpecification(skip_symbolic_trace=False)
     thunder_split_report.run_benchmark(torchinductor, WallTime)
     thunder_split_report.run_repro(torchinductor)
     thunder_split_report.write_benchmark(tmp_path, torchinductor, WallTime)
@@ -1601,14 +1601,14 @@ def foo(x):
     results = fx_report(foo)(x)
     with patch("thunder.dynamo.report.FXGraphReport.run_repro", side_effect=Exception("run_Repro raises exception")):
         save_failing_repros(results.fx_graph_reports, TorchCompileSpecification(), tmp_path)
-    assert os.path.exists(tmp_path / "graph0.py")
+    assert os.path.exists(tmp_path / "graph0_torchcompile_repro.py")
 
     # Tests for thunder split reports
     thunder_fxgraph_reports = get_thunder_fxgraph_reports(foo)(x)
     assert len(thunder_fxgraph_reports) == 1
     with patch("thunder.dynamo.report.FXGraphReport.run_repro", side_effect=Exception("run_Repro raises exception")):
         save_failing_repros(thunder_fxgraph_reports[0].subgraph_reports, ThunderCompileSpecification(), tmp_path)
-    assert os.path.exists(tmp_path / "graph0_thunder_0.py")
+    assert os.path.exists(tmp_path / "graph0_thunder_0_thunder_repro.py")
 
     # Tests for check_consistency
     def wrapped_fn(x):
@@ -1622,12 +1622,12 @@ def compile(self, fn, **kwargs):
     save_failing_repros(
         results.fx_graph_reports, _BadCompileSpecification(), tmp_path / "consistency", check_consistency=False
     )
-    assert not os.path.exists(tmp_path / "consistency" / "graph0.py")
+    assert not os.path.exists(tmp_path / "consistency" / "graph0_torcheager_repro.py")
 
     save_failing_repros(
         results.fx_graph_reports, _BadCompileSpecification(), tmp_path / "consistency", check_consistency=True
     )
-    assert os.path.exists(tmp_path / "consistency" / "graph0.py")
+    assert os.path.exists(tmp_path / "consistency" / "graph0_torcheager_repro.py")
 
 
 @requiresCUDA
@@ -1935,3 +1935,60 @@ def fn():
         "is a `torch.cuda.Stream` method which is not supported by Thunder" in getattr(reason, "info", "")
         for reason in split_reasons
     )
+
+
+@requiresCUDA
+@pytest.mark.parametrize("use_benchmark", (True, False), ids=("benchmark", "repro"))
+def test_save_thunderfx_repros(use_benchmark, tmp_path):
+    from thunder.dynamo.report import save_thunderfx_repros
+
+    x = torch.ones(2, 2, device="cuda", requires_grad=False)
+
+    def foo(x):
+        # torch.sinc has automatic fallback registered,
+        # so that operation will be given to inductor.
+        x = x.exp()
+        torch._dynamo.graph_break()
+        return torch.sinc(x) + torch.cos(x)
+
+    save_thunderfx_repros(
+        foo,
+        tmp_path,
+        use_benchmark=use_benchmark,
+        check_runnability=True,
+        save_fusion=True,
+        save_trace=True,
+        force_overwrite=True,
+        disable_torch_autograd=True,
+    )(x)
+
+    # Checks the scripts are generated correctly
+    subdirs = [d for d in os.listdir(tmp_path) if os.path.isdir(os.path.join(tmp_path, d))]
+    for d in subdirs:
+        assert d.startswith("graph"), f"{d} is not graph folder"
+    assert len(subdirs) == 2, f"it should be 2 graphs, but in fact {subdirs}"
+
+    num_backend = 2
+    num_traces = 1  # forward only
+    if use_benchmark:
+        num_g_files = num_backend * 2 + num_traces + 1
+        num_fusion_files = num_backend * 2
+    else:
+        num_g_files = num_backend + num_traces + 1
+        num_fusion_files = num_backend
+
+    for d in subdirs:
+        graph_dir = os.path.join(tmp_path, d)
+        # Checks if fusion_reports directory exists
+        fusion_reports_dir = os.path.join(graph_dir, "fusion_reports")
+        assert os.path.isdir(fusion_reports_dir), f"{fusion_reports_dir} doesn't exist"
+        # Checks if graph directory has the correct number of files
+        g_files = [f for f in os.listdir(graph_dir) if os.path.isfile(os.path.join(graph_dir, f))]
+        assert len(g_files) == num_g_files, f"{graph_dir} should have {num_g_files} files, but in fact {g_files}"
+        # Checks if fusion_reports directory has the correct number of files
+        fusion_files = [
+            f for f in os.listdir(fusion_reports_dir) if os.path.isfile(os.path.join(fusion_reports_dir, f))
+        ]
+        assert len(fusion_files) == num_fusion_files, (
+            f"{fusion_reports_dir} should have {num_fusion_files} files, but in fact {fusion_files}"
+        )