Add pre-defined lowering to NVPTX, AMDGPU and SPIR-V for PyTorch

MrSidims · MrSidims · commit d8eedcfb1f63 · 2025-05-22T23:42:45.000+02:00
Originally I wasn't planning to add them, as it's possible to just
call llc. On the other hand I started building this tool not only for
engineers familiar with MLIR compiler toolchain(s), but also novices
(like myself). And from this perspective it might make sense to have
(almost) final lowering for target GPU.
diff --git a/backend/server.py b/backend/server.py
@@ -338,7 +338,7 @@ def generate_llvm_mlir(
 # First generate LLVM MLIR and then translate it to LLVM IR.
 def generate_llvm_ir(
     model, example_input, pipeline: List[Tuple[str, str]], dump_each: bool
-):
+) -> str:
     try:
         lowered_mlir = lower_to_llvm_mlir(model, example_input)
 
@@ -365,6 +365,23 @@ def generate_llvm_ir(
         raise IRGenerationError("Failed to generate LLVM IR.")
 
 
+# Generate NVPTX, AMDGPU or SPIR-V.
+def generate_target_gpu_ir(model, example_input, target: str) -> str:
+    try:
+        llvm_ir_module = generate_llvm_ir(model, example_input, [], False)
+        pipeline: list[tuple[str, str]] = [("opt", "-O2")]
+        if target == "nvptx":
+            pipeline.append(("llc", "-mtriple=nvptx64-nvidia-cuda"))
+        elif target == "amdgpu":
+            pipeline.append(("llc", "-mtriple amdgcn-amd-amdhsa"))
+        elif target == "spirv":
+            pipeline.append(("llc", "-mtriple=spirv64-unknown-unknown"))
+        return apply_optional_passes(llvm_ir_module, pipeline, False)
+    except Exception as e:
+        logger.exception("Error generating LLVM IR.")
+        raise IRGenerationError("Failed to generate LLVM IR.")
+
+
 # TODO: Figure out static compilation.
 def compile_triton_ir(
     code: str, ir_type: str, pipeline: List[Tuple[str, str]], dump_each: bool
@@ -547,6 +564,14 @@ def process_model(request: CodeRequest) -> str:
                 combined_output += generate_llvm_ir(
                     model, example_input, pipeline, request.dump_after_each_opt
                 )
+            elif request.ir_type in ("nvptx", "amdgpu", "spirv"):
+                # FIXME?: it could really be just generate_llvm_ir with the pipeline.
+                # Yet I prefered a dedicated function in case of some smart things has
+                # to be done before lowering. For example for SPIR-V it's nice idea
+                # to create a kernel first aka write a pass and execute it here.
+                combined_output += generate_target_gpu_ir(
+                    model, example_input, request.ir_type
+                )
             else:
                 combined_output += "IR type not supported yet."
 
diff --git a/src/app/page.js b/src/app/page.js
@@ -79,6 +79,9 @@ const pytorchIROptions = [
   { value: "stablehlo_mlir", label: "StableHLO MLIR" },
   { value: "llvm_mlir", label: "LLVM MLIR" },
   { value: "llvm_ir", label: "LLVM IR" },
+  { value: "nvptx", label: "NVPTX" },
+  { value: "amdgpu", label: "AMDGPU" },
+  { value: "spirv", label: "SPIR-V" },
   { value: "raw_ir", label: "Raw IR Output" },
 ];
 
@@ -756,8 +759,12 @@ export default function PyTorchTritonExplorer() {
                             "triton_gpu_ir",
                             "triton_llvm_ir",
                           ].includes(irWin.selectedIR);
-                          const allowLlvmOpt =
-                            irWin.selectedIR !== "triton_nvptx";
+                          const allowLlvmOpt = ![
+                            "triton_nvptx",
+                            "nvptx",
+                            "amdgpu",
+                            "spirv",
+                          ].includes(irWin.selectedIR);
                           const allowLLC = allowLlvmOpt;
                           const allowUserTool = true;
 
diff --git a/tests/test_torch_target_gpu_ir_linear.py b/tests/test_torch_target_gpu_ir_linear.py
@@ -0,0 +1,88 @@
+import pytest
+import httpx
+
+API_URL = "http://localhost:8000/generate_ir"
+
+code = """
+import torch
+import torch.nn as nn
+
+class MyModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(4, 4)
+
+    def forward(self, x):
+        return torch.relu(self.linear(x))
+
+model = MyModel()
+example_input = torch.randn(4, 4)
+"""
+
+
+def test_torch_nvptx_linear():
+    payload = {
+        "code": code,
+        "ir_type": "nvptx",
+        "custom_pipeline": [],
+        "torch_mlir_opt": "",
+        "mlir_opt": "",
+        "mlir_translate": "",
+        "llvm_opt": "",
+        "llc": "",
+        "user_tool": "",
+        "dump_after_each_opt": False,
+    }
+
+    response = httpx.post(API_URL, json=payload)
+    assert response.status_code == 200
+
+    ir = response.json()["output"]
+
+    assert "Generated by LLVM NVPTX Back-End" in ir
+    assert ".visible .func  (.param .align 8 .b8 func_retval0[56]) main" in ir
+
+
+def test_torch_nvptx_linear():
+    payload = {
+        "code": code,
+        "ir_type": "amdgpu",
+        "custom_pipeline": [],
+        "torch_mlir_opt": "",
+        "mlir_opt": "",
+        "mlir_translate": "",
+        "llvm_opt": "",
+        "llc": "",
+        "user_tool": "",
+        "dump_after_each_opt": False,
+    }
+
+    response = httpx.post(API_URL, json=payload)
+    assert response.status_code == 200
+
+    ir = response.json()["output"]
+
+    assert "amdgcn-amd-amdhsa--gfx700" in ir
+    assert "main:" in ir
+
+
+def test_torch_nvptx_linear():
+    payload = {
+        "code": code,
+        "ir_type": "spirv",
+        "custom_pipeline": [],
+        "torch_mlir_opt": "",
+        "mlir_opt": "",
+        "mlir_translate": "",
+        "llvm_opt": "",
+        "llc": "",
+        "user_tool": "",
+        "dump_after_each_opt": False,
+    }
+
+    response = httpx.post(API_URL, json=payload)
+    assert response.status_code == 200
+
+    ir = response.json()["output"]
+
+    assert "OpCapability Kernel" in ir