[BACKEND] Retain mlir reproducer temporaries from prior run pass pipelines (#8113)

plotfi · web-flow · commit 625c8cb4545c · 2025-09-10T13:13:11.000-04:00
Currently MLIR reproducers for each pass pipeline run overrides the previous `TRITON_REPRODUCER_PATH` path. This change allows for including a reproducer suffix when calling pm.run() to allow for retaining all previously run pipeline reproducers prior to the most recently run pass pipeline. This is important to add because with multiple pipelines, it is necessary to retain all previous pipelines reproducers to reproduce the full compilation sequence. - [X] I am not making a trivial change, such as fixing a typo in a comment. - [X] I have written a PR description following these [rules](https://cbea.ms/git-commit/#why-not-how). - [X] I have run `pre-commit run --from-ref origin/main --to-ref HEAD`. - Select one of the following. - [X] I have added tests. - `/python/test` for end-to-end tests - Select one of the following. - [X] I have not added any `lit` tests.
diff --git a/python/src/ir.cc b/python/src/ir.cc
@@ -1856,7 +1856,7 @@ void init_triton_ir(py::module &&m) {
            })
       .def(
           "run",
-          [](PassManager &self, ModuleOp &mod) {
+          [](PassManager &self, ModuleOp &mod, std::string repro_pipeline_tag) {
             // TODO: maybe dump module to file and print error for better
             // diagnostics
 
@@ -1867,6 +1867,11 @@ void init_triton_ir(py::module &&m) {
             auto reproducerPath =
                 triton::tools::getStrEnv("TRITON_REPRODUCER_PATH");
             if (!reproducerPath.empty()) {
+              if (reproducerPath != "-") {
+                std::string repro_suffix =
+                    "." + repro_pipeline_tag + ".repro.mlir";
+                reproducerPath += repro_suffix;
+              }
               auto anchorName = self.getOpAnchorName();
               auto passes = self.getPasses();
               Operation *op = mod.getOperation();
diff --git a/python/test/unit/language/test_reproducer.py b/python/test/unit/language/test_reproducer.py
@@ -1,5 +1,6 @@
 import triton
 import re
+import os
 
 
 def test_triton_reproducer_path(monkeypatch, tmp_path):
@@ -13,17 +14,25 @@ def triton_():
     # We need an temp empty file for MLIR to write the reproducer to, and then
     # the TRITON_REPRODUCER_PATH env var enables crash the reproduction
     # generation in MLIR.
-    repro_path = tmp_path / "repro.mlir"
-    repro_path.touch()
+    repro_path = tmp_path / "repro_prefix"
     monkeypatch.setenv("TRITON_REPRODUCER_PATH", str(repro_path))
 
     # Run the kernel so MLIR will generate a crash reproducer. It doesn't really
     # matter what the kernel does, just that the PassManager runs its passes.
     triton_[(1, )]()
 
-    repro = repro_path.read_text()
-    assert "mlir_reproducer" in repro, f"Expected MLIR reproducer in {repro_path}. Got:\n{repro}"
-    m = re.search(r"pipeline: \"(.*)\"", repro)
-    assert m, "Expected to match pass pipeline after \"pipeline:\" in MLIR reproducer"
-    pipeline_str = m.group(1)
-    assert pipeline_str, "Expected non-empty pass pipeline in MLIR reproducer"
+    stages = {
+        'make_ttir': "triton-combine",
+        'make_ttgir': "triton.*-coalesce",
+        'make_llir': "convert-triton-.*gpu-to-llvm",
+    }
+
+    for stage_name, stage_pipeline_check in stages.items():
+        assert os.path.exists(str(repro_path) + '.' + stage_name + '.repro.mlir')
+        curr_repro_path = tmp_path / ("repro_prefix." + stage_name + ".repro.mlir")
+        repro = curr_repro_path.read_text()
+        assert "mlir_reproducer" in repro, f"Expected MLIR reproducer in {curr_repro_path}. Got:\n{repro}"
+        m = re.search(r"pipeline: \"(.*" + stage_pipeline_check + ".*)\"", repro)
+        assert m, "Expected to match pass pipeline after \"pipeline:\" in MLIR reproducer"
+        pipeline_str = m.group(1)
+        assert pipeline_str, "Expected non-empty pass pipeline in MLIR reproducer"
diff --git a/third_party/amd/backend/compiler.py b/third_party/amd/backend/compiler.py
@@ -196,7 +196,7 @@ def make_ttir(mod, metadata, options):
         passes.ttir.add_triton_licm(pm)
         passes.common.add_symbol_dce(pm)
         passes.ttir.add_loop_unroll(pm)
-        pm.run(mod)
+        pm.run(mod, 'make_ttir')
         return mod
 
     @staticmethod
@@ -205,7 +205,7 @@ def make_ttgir(mod, metadata, options):
         pm.enable_debug()
         passes.ttir.add_convert_to_ttgpuir(pm, f"hip:{options.arch}", options.num_warps, options.warp_size,
                                            options.num_ctas)
-        pm.run(mod)
+        pm.run(mod, 'make_ttgir_early')
         pm = ir.pass_manager(mod.context)
         pm.enable_debug()
         passes.ttgpuir.add_coalesce(pm)
@@ -254,7 +254,7 @@ def make_ttgir(mod, metadata, options):
         passes.common.add_symbol_dce(pm)
         if use_async_copy:
             amd.passes.ttgpuir.add_update_async_wait_count(pm, options.arch)
-        pm.run(mod)
+        pm.run(mod, 'make_ttgir')
         return mod
 
     @staticmethod
@@ -270,7 +270,7 @@ def gluon_to_ttgir(src, metadata, options):
         passes.gluon.add_canonicalizer(pm)
         passes.ttgpuir.add_combine_tensor_select_and_if(pm)
 
-        pm.run(mod)
+        pm.run(mod, 'gluon_to_ttgir')
         return mod
 
     @staticmethod
@@ -323,7 +323,7 @@ def make_llir(src, metadata, options):
             passes.llvmir.add_di_scope(pm)
 
         amd.passes.ttgpuir.add_builtin_func_to_llvmir(pm, __HIP_FTZ)
-        pm.run(mod)
+        pm.run(mod, 'make_llir')
 
         # LLVM-IR (MLIR) -> LLVM-IR (LLVM)
         llvm.init_targets()
diff --git a/third_party/nvidia/backend/compiler.py b/third_party/nvidia/backend/compiler.py
@@ -239,7 +239,7 @@ def make_ttir(mod, metadata, opt, capability):
         passes.common.add_cse(pm)
         passes.common.add_symbol_dce(pm)
         passes.ttir.add_loop_unroll(pm)
-        pm.run(mod)
+        pm.run(mod, 'make_ttir')
         return mod
 
     @staticmethod
@@ -316,7 +316,7 @@ def make_ttgir(mod, metadata, opt, capability):
         passes.common.add_cse(pm)
         passes.common.add_canonicalizer(pm)
 
-        pm.run(mod)
+        pm.run(mod, 'make_ttgir')
         metadata["cluster_dims"] = (cluster_info.clusterDimX, cluster_info.clusterDimY, cluster_info.clusterDimZ)
         tensordesc_meta = mod.get_tensordesc_metadata()
         metadata["tensordesc_meta"] = tensordesc_meta
@@ -334,7 +334,7 @@ def gluon_to_ttgir(self, src, metadata, options, capability):
         passes.gluon.add_canonicalizer(pm)
         passes.ttgpuir.add_combine_tensor_select_and_if(pm)
 
-        pm.run(mod)
+        pm.run(mod, 'gluon_to_ttgir')
         metadata["tensordesc_meta"] = mod.get_tensordesc_metadata()
         return mod
 
@@ -374,7 +374,7 @@ def make_llir(self, src, metadata, options, capability):
         if CUDABackend.instrumentation:
             CUDABackend.instrumentation.patch("llvmir_to_llvm", pm, mod.context)
 
-        pm.run(mod)
+        pm.run(mod, 'make_llir')
         # LLVM-IR (MLIR) -> LLVM-IR (LLVM)
         llvm.init_targets()
         context = llvm.context()