[Gluon] Fix inlining functions with gluon.set_auto_layout op (#7553)

peterbell10 · web-flow · commit cd6d25f7955e · 2025-07-17T14:15:12.000Z
The gluon dialect is missing an inliner interface implementation,
without which the inliner defaults to blocking all inlining.
diff --git a/lib/Dialect/Gluon/IR/Dialect.cpp b/lib/Dialect/Gluon/IR/Dialect.cpp
@@ -1,7 +1,7 @@
 #include "triton/Dialect/Gluon/IR/Dialect.h"
 
-#include "mlir/IR/DialectImplementation.h"
 #include "mlir/Support/LLVM.h"
+#include "triton/Dialect/Triton/IR/Interfaces.h"
 #include "llvm/ADT/TypeSwitch.h"
 
 using namespace mlir;
@@ -111,6 +111,7 @@ void GluonDialect::initialize() {
 #define GET_OP_LIST
 #include "triton/Dialect/Gluon/IR/Ops.cpp.inc"
       >();
+  addInterfaces<TritonInlinerInterface>();
   addInterfaces<GluonInferLayoutInterface>();
 }
 
diff --git a/test/Gluon/inlining.mlir b/test/Gluon/inlining.mlir
@@ -0,0 +1,20 @@
+// RUN: triton-opt %s --gluon-inline | FileCheck %s
+
+#blocked = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
+
+module attributes {"ttg.target" = "cuda:90", "ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.threads-per-warp" = 32 : i32} {
+  tt.func private @set_encoding(%arg0 : tensor<16xi32, #gluon.auto_encoding>) -> tensor<16xi32, #blocked> {
+    %cvt = gluon.set_auto_layout %arg0 : tensor<16xi32, #gluon.auto_encoding> -> tensor<16xi32, #blocked>
+    tt.return %cvt : tensor<16xi32, #blocked>
+  }
+
+  tt.func public @infer_make_range() -> tensor<16xi32, #blocked> {
+    // CHECK-DAG: [[BLOCKED:#.*]] = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
+    // CHECK: [[CST:%.*]] = arith.constant dense<0> : tensor<16xi32, #gluon.auto_encoding>
+    // CHECK: [[SET:%.*]] = gluon.set_auto_layout [[CST]] : tensor<16xi32, #gluon.auto_encoding> -> tensor<16xi32, [[BLOCKED]]>
+    // CHECK: tt.return [[SET]] : tensor<16xi32, [[BLOCKED]]>
+    %cst = arith.constant dense<0> : tensor<16xi32, #gluon.auto_encoding>
+    %0 = tt.call @"set_encoding"(%cst) : (tensor<16xi32, #gluon.auto_encoding>) -> tensor<16xi32, #blocked>
+    tt.return %0 : tensor<16xi32, #blocked>
+  }
+}