[Gluon] Fix splat returning auto encoding (#7490)

peterbell10 · web-flow · commit 041ec1b186ab · 2025-07-12T21:26:49.000+01:00
Previously we propagated the src encoding to all operands, even if it
isn't a tensor. This lead to errors when using splat op returning an
auto encoding.
diff --git a/lib/Dialect/Gluon/Transforms/ResolveAutoEncodings.cpp b/lib/Dialect/Gluon/Transforms/ResolveAutoEncodings.cpp
@@ -121,9 +121,12 @@ LogicalResult inferAutoLayouts(FuncOp func) {
       } else {
         auto srcEncoding = inferSrcEncoding(definingOp, enc);
         if (srcEncoding) {
-          if (failed(updateEncoding(
-                  llvm::to_vector_of<Value>(definingOp->getOperands()),
-                  srcEncoding)))
+          llvm::SmallVector<Value> tensorOperands;
+          for (auto operand : definingOp->getOperands())
+            if (isa<RankedTensorType>(operand.getType()))
+              tensorOperands.push_back(operand);
+
+          if (failed(updateEncoding(tensorOperands, srcEncoding)))
             return failure();
         }
       }
diff --git a/test/Gluon/auto_encoding.mlir b/test/Gluon/auto_encoding.mlir
@@ -96,3 +96,23 @@ module attributes {"ttg.num-warps" = 4 : i32, "ttg.num-ctas" = 1 : i32} {
     tt.return %cvt : tensor<32xi32, #blocked>
   }
 }
+
+
+// -----
+
+
+#blocked = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
+
+module attributes {"ttg.target" = "cuda:90", "ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32, "ttg.threads-per-warp" = 32 : i32} {
+  tt.func public @infer_make_range() -> tensor<16xi32, #blocked> {
+    // CHECK-DAG: [[BLOCKED:#.*]] = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [32], warpsPerCTA = [4], order = [0]}>
+    // CHECK: [[CST:%.*]] = arith.constant 0 : i32
+    // CHECK: [[SPLAT: %.*]] = tt.splat [[CST]] : i32 -> tensor<16xi32, [[BLOCKED]]>
+    // CHECK: [[RES:%.*]] = ttg.convert_layout [[RANGE]] : tensor<16xi32, [[BLOCKED]]> -> tensor<16xi32, [[BLOCKED]]>
+    // CHECK: tt.return [[RES]] : tensor<16xi32, [[BLOCKED]]>
+    %cst = arith.constant 0 : i32
+    %0 = tt.splat %cst : i32 -> tensor<16xi32, #gluon.auto_encoding>
+    %cvt = ttg.convert_layout %0 : tensor<16xi32, #gluon.auto_encoding> -> tensor<16xi32, #blocked>
+    tt.return %cvt : tensor<16xi32, #blocked>
+  }
+}