Finalize support for Triton (#42)

MrSidims · web-flow · commit af5daad8706e · 2025-08-03T22:09:02.000+02:00
Triton installation is a bit tricky as it's mandatory to have
either NVIDIA or AMD backend installed on the system (which can
also be different from version-to-version and be hardware dependent.

So here no automatic installation is provided with the commit. Instead
PYTORCH_INDEX env variable is introduced to change default CPU pytorch
nightly wheels to whatever a user desires.
diff --git a/Dockerfile b/Dockerfile
@@ -46,7 +46,7 @@ RUN python3 -m venv /opt/venv && \
     /opt/venv/bin/pip install --pre torch-mlir torchvision \
       --extra-index-url=https://download.pytorch.org/whl/nightly/cpu \
       -f https://github.com/llvm/torch-mlir-release/releases/expanded_assets/dev-wheels && \
-    /opt/venv/bin/pip install fastapi uvicorn pytest httpx
+    /opt/venv/bin/pip install triton fastapi uvicorn pytest httpx
 
 # Create non-root user and fix permissions
 RUN useradd -u 10001 -m --shell /usr/sbin/nologin appuser && \
diff --git a/Dockerfile.backend b/Dockerfile.backend
@@ -15,10 +15,10 @@ RUN apt-get update && \
 RUN useradd -u 10001 -m --shell /usr/sbin/nologin appuser && \
     mkdir -p /home/appuser/.cache
 
-RUN wget -qO- https://apt.llvm.org/llvm.sh | bash -s -- 21 && \
+RUN wget -qO- https://apt.llvm.org/llvm.sh | bash -s -- 22 && \
     apt-get update && \
     apt-get install -y --no-install-recommends \
-      libmlir-21-dev mlir-21-tools && \
+      libmlir-22-dev mlir-22-tools && \
     rm -rf /var/lib/apt/lists/*
 
 COPY --chown=10001:10001 backend /app/backend
@@ -28,13 +28,13 @@ RUN python3 -m venv /opt/venv && \
     /opt/venv/bin/pip install --pre torch-mlir torchvision \
       --extra-index-url=https://download.pytorch.org/whl/nightly/cpu \
       -f https://github.com/llvm/torch-mlir-release/releases/expanded_assets/dev-wheels && \
-    /opt/venv/bin/pip install fastapi uvicorn pydantic
+    /opt/venv/bin/pip install triton fastapi uvicorn pydantic
 
 RUN chown -R appuser:appuser /home/appuser/.cache /app
 
 USER appuser
 
-ENV PATH="/opt/venv/bin:/usr/lib/llvm-21/bin:$PATH"
+ENV PATH="/opt/venv/bin:/usr/lib/llvm-22/bin:$PATH"
 
 EXPOSE 8000
 CMD ["uvicorn", "backend.server:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
@@ -38,19 +38,19 @@ tracing models through various IR stages and transformations.
 - Node.js + npm
 - PyTorch
 - Torch-MLIR
-- Triton
 - LLVM with mlir-opt
+- Triton
 
 To setup PyTorch and Torch-MLIR it's a good idea to visit https://github.com/llvm/torch-mlir repository and follow instructions from there.
 
 Current version of the application is tested on Ubuntu 22.04 windows subsystem using LLVM 22 dev.
 
 Triton requires that PyTorch be compiled with CUDA or ROCm support. When
 installing PyTorch, pick the desired accelerator build. For example, to install
-a CUDA 12.4 wheel you can run (note: this is not included in scripts and dockerfiles):
+a CUDA 12.8 wheel you can run (note: this is not included in scripts and dockerfiles) (at least this works with my Blackwell GPU):
 
 ```bash
-pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu124
+pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/cu128
 ```
 
 ### Install dependencies
@@ -61,12 +61,24 @@ git clone https://github.com/MrSidims/PytorchExplorer.git
 cd PytorchExplorer
 ```
 
+To use custom builds of `torch-mlir-opt`, `mlir-opt`, etc. without placing them in your `$PATH`, configure the following environment variables:
+- `TORCH_MLIR_OPT_PATH`
+- `LLVM_BIN_PATH`
+- `TRITON_OPT_PATH`
+- `PYTORCH_INDEX` – Index URL for installing PyTorch. Defaults to nightly CPU wheels.
+
+For example, to install CUDA-enabled nightly wheels (CUDA 12.8):
+```bash
+PYTORCH_INDEX=https://download.pytorch.org/whl/nightly/cu128 \
+  source setup_backend.sh
+```
+
 Install frontend dependencies:
 ```bash
 source setup_frontend.sh
 ```
 
-Set up backend (Torch, MLIR, etc.):
+Set up backend (Torch, MLIR, etc.) (note, unless `PYTORCH_INDEX` is set the script will install CPU wheels):
 ```bash
 source setup_backend.sh
 ```
@@ -76,11 +88,6 @@ If you already have a working venv for Torch-MLIR, you can just install FastAPI
 pip install fastapi uvicorn pytest httpx
 ```
 
-To use custom builds of `torch-mlir-opt`, `mlir-opt`, etc. without placing them in your `$PATH`, configure the following environment variables:
-- `TORCH_MLIR_OPT_PATH`
-- `LLVM_BIN_PATH`
-- `TRITON_OPT_PATH`
-
 ### Run the application
 
 If you are reused `setup_backend.sh` script - activate the environment with
@@ -220,5 +227,4 @@ For more details about IR lowering, please see [PyTorch Lowerings](docs/pytorch_
 
 ## Integration with your frontend or backend
 
-Refer to the [Integration Guide](docs/integration_guide.md) for details on the API contracts and communication between the frontend and backend used in this project.
-
+Refer to the [Integration Guide](docs/integration_guide.md) for details on the API contracts and communication between the frontend and backend used in this project.
diff --git a/backend/server.py b/backend/server.py
@@ -1,6 +1,7 @@
 import subprocess
 import tempfile
 import os
+import linecache
 import glob
 import uuid
 import hashlib
@@ -413,7 +414,7 @@ def generate_target_gpu_ir(model, example_input, target: str) -> str:
         raise IRGenerationError(f"Failed to generate LLVM IR: {e}") from e
 
 
-# TODO: Figure out static compilation.
+# Compile Triton IR.
 def compile_triton_ir(
     code: str, ir_type: str, pipeline: List[Tuple[str, str]], dump_each: bool
 ) -> str:
@@ -470,6 +471,7 @@ def compile_triton_ir(
             "triton_gpu_ir": "*.ttgir",
             "triton_llvm_ir": "*.llir",
             "triton_nvptx": "*.ptx",
+            "triton_amdgpu": "*.hsaco",
         }
 
         pattern = pattern_map.get(ir_type)
@@ -537,8 +539,20 @@ def process_model(request: CodeRequest) -> str:
                 request.code, request.ir_type, pipeline, request.dump_after_each_opt
             )
 
-        if request.ir_type == "raw_ir" and request.selected_language == "pytorch":
-            # Execute user Python, capture stdout.
+        if request.ir_type == "raw_ir" and request.selected_language in (
+            "pytorch",
+            "triton",
+        ):
+            # If raw IR is requested, we execute the user code directly.
+            # Prepare a fake file for linecache to make
+            # inspect.getsourcelines() work.
+            fake_name   = "<string>"
+            source_code = request.code
+            lines       = [ln + "\n" for ln in source_code.splitlines()]
+            linecache.cache[fake_name] = (
+               len(source_code), None, lines, fake_name
+            )
+            # Execute user code, capture stdout.
             try:
                 with tempfile.TemporaryDirectory() as tmpdir:
                     stdout_path = os.path.join(tmpdir, "captured_output.txt")
@@ -555,10 +569,17 @@ def process_model(request: CodeRequest) -> str:
                     captured, build_pipeline(request), request.dump_after_each_opt
                 )
             except Exception as e:
-                logger.exception("User code with manual IR print execution failed.")
-                raise PytorchExecutionError(
-                    f"Code raised an exception during execution: {e}"
-                ) from e
+                logger.exception(
+                    "User code with manual IR print execution failed."
+                )
+                if request.selected_language == "pytorch":
+                    raise PytorchExecutionError(
+                        f"Code raised an exception during execution: {e}"
+                    ) from e
+                else:
+                    raise TritonExecutionError(
+                        f"Triton code execution raised an exception: {e}"
+                    ) from e
 
         if request.ir_type == "raw_ir":
             return apply_optional_passes(
diff --git a/docs/integration_guide.md b/docs/integration_guide.md
@@ -1,5 +1,5 @@
 # PyTorch Explorer — Integration Guide
-*(last updated : 2025-06-04, commit: 5c5c42)*
+*(last updated : 2025-08-03)*
 
 This document explains **how to integrate with the PyTorch explorer as IR-Playground compiler
 service**—either by
@@ -32,6 +32,7 @@ service**—either by
 | `TORCH_MLIR_OPT_PATH` | Directory ending with `/` that contains `torch-mlir-opt` | `/opt/llvm/bin/` |
 | `LLVM_BIN_PATH` | Directory that contains `mlir-opt`, `mlir-translate`, `opt`, `llc` | `/opt/llvm/bin/` |
 | `TRITON_OPT_PATH` | Directory that contains `triton-opt`, `triton-llvm-opt` | `/opt/triton/bin/` |
+| `PYTORCH_INDEX` | Extra index URL used by `setup_backend.sh` to install PyTorch | `https://download.pytorch.org/whl/nightly/cpu` |
 
 For the reference React UI (and any client based on it) set `NEXT_PUBLIC_BACKEND_URL`
 to point at the running backend instance when they live on different machines.
@@ -122,6 +123,7 @@ Calling this is optional but keeps /tmp tidy on long-running servers.
 |  | triton_gpu_ir | *.ttgir | |
 |  | triton_llvm_ir | *.llir | |
 |  | triton_nvptx | *.ptx | |
+|  | triton_amdgpu | *.hsaco | |
 | Raw | raw_ir | Echo-style (no generation) | |
 
 ## 4. ️**Backend internals (reference implementation)**
@@ -238,7 +240,7 @@ GET /version  ->  "ir-backend 1.1.0-rust"
 
 - New dialect -> implement `generate_<dialect>()`, register it in `process_model`, add value to §3 and to the frontend dropdown.
 - New compiler tool -> add a clause in `apply_optional_passes`.
-- Timeouts / resource limits -> see `compile_triton_ir(... timeout=20)`.
+- Timeouts / resource limits -> see `compile_triton_ir(... timeout=60)`.
 
 ## 8. ️**Appendix — 20-line TypeScript helper**
 
diff --git a/docs/pytorch_lowering.md b/docs/pytorch_lowering.md
@@ -131,4 +131,11 @@ Each stage can optionally be configured to **dump intermediate IR**, making the
 
 ---
 
-For Triton models, see the separate section on `compile_triton_ir`, which handles IR extraction from the Triton JIT cache.
+For Triton models, see the separate section on `compile_triton_ir`, which handles IR extraction from the Triton JIT cache. Supported
+`ir_type` values include:
+
+- `triton_ir` – Triton compiler dump (`*.ttir`)
+- `triton_gpu_ir` – `*.ttgir`
+- `triton_llvm_ir` – `*.llir`
+- `triton_nvptx` – `*.ptx`
+- `triton_amdgpu` – `*.hsaco`
diff --git a/setup_backend.sh b/setup_backend.sh
@@ -30,10 +30,14 @@ source mlir_venv/bin/activate
 
 echo "Installing torch-mlir and dependencies..."
 pip install --upgrade pip
+PYTORCH_INDEX="${PYTORCH_INDEX:-https://download.pytorch.org/whl/nightly/cpu}"
 pip install --pre torch-mlir torchvision \
-	  --extra-index-url https://download.pytorch.org/whl/nightly/cpu \
+	  --extra-index-url "$PYTORCH_INDEX" \
 	    -f https://github.com/llvm/torch-mlir-release/releases/expanded_assets/dev-wheels
 
+echo "Installing Triton..."
+pip install triton
+
 echo "Installing FastAPI and Uvicorn..."
 pip install fastapi uvicorn
 
diff --git a/src/app/ExplorerContent.js b/src/app/ExplorerContent.js
@@ -20,6 +20,8 @@ model = MyModel()
 example_input = torch.randn(4, 4)
 # If you have multiple models, wrap each model and input tensor pair using:
 # __explore__(model, input_tensor)
+# To used your own means to compile Triton IR, please
+# select raw IR output.
 `;
 
 const defaultTritonCode = `import triton
@@ -45,6 +47,8 @@ z = torch.empty_like(x)
 
 grid = lambda meta: (triton.cdiv(N, BLOCK_SIZE),)
 add_kernel[grid](x, y, z, N)
+# To used your own means to compile Triton IR, please
+# select raw IR output.
 `;
 
 const defaultRawIRCode = `module {
@@ -90,6 +94,8 @@ const tritonIROptions = [
   { value: "triton_gpu_ir", label: "Triton GPU IR" },
   { value: "triton_llvm_ir", label: "LLVM IR" },
   { value: "triton_nvptx", label: "NVPTX" },
+  { value: "triton_amdgpu", label: "ROCm" },
+  { value: "raw_ir", label: "Raw IR Output" },
 ];
 
 const rawIROptions = [{ value: "raw_ir", label: "Raw IR Output" }];
@@ -554,8 +560,8 @@ export default function ExplorerContent() {
               style={{ margin: "10px 0" }}
             >
               <option value="pytorch">PyTorch</option>
+              <option value="triton">Triton</option>
               <option value="raw_ir">Raw IR Input</option>
-              <option value="triton">Triton (experimental support)</option>
             </select>
             <div
               style={{