Check layout within tensor_descriptor -> block_pointer conversion pass

mieshkiwrk · mieshkiwrk · commit 93a910f252de · 2025-11-28T21:06:08.000Z
diff --git a/python/triton/experimental/gluon/language/__init__.py b/python/triton/experimental/gluon/language/__init__.py
@@ -50,6 +50,7 @@
     device_assert,
     device_print,
     dot_fma,
+    xpu_dot_fma,
     expand_dims,
     full,
     fp4_to_fp,
diff --git a/python/triton/experimental/gluon/language/_core.py b/python/triton/experimental/gluon/language/_core.py
@@ -590,3 +590,9 @@ def dot_fma(a, b, acc, _semantic=None):
 
     handle = _semantic.dot(a, b, acc, input_precision=None, max_num_imprecise_acc=None, out_dtype=acc.dtype).handle
     return tensor(handle, acc.type)
+
+
+@builtin
+def xpu_dot_fma(a, b, acc, _semantic=None):
+    handle = _semantic.dot(a, b, acc, input_precision=None, max_num_imprecise_acc=None, out_dtype=acc.dtype).handle
+    return tensor(handle, acc.type)
diff --git a/python/triton/experimental/gluon/language/intel/__init__.py b/python/triton/experimental/gluon/language/intel/__init__.py
@@ -1,3 +1,4 @@
 from ._layouts import IntelDPASLayout
+from . import xpu
 
-__all__ = ["IntelDPASLayout"]
+__all__ = ["IntelDPASLayout", "xpu"]
diff --git a/python/triton/experimental/gluon/language/intel/xpu/__init__.py b/python/triton/experimental/gluon/language/intel/xpu/__init__.py
@@ -0,0 +1,4 @@
+from . import xe
+
+__all__ = ["xe"]
+
diff --git a/third_party/intel/lib/Dialect/Triton/Transforms/TensorDescToBlockPointer.cpp b/third_party/intel/lib/Dialect/Triton/Transforms/TensorDescToBlockPointer.cpp
@@ -109,7 +109,8 @@ struct TritonIntelTensorDescToBlockPointer
   tt::MakeTensorPtrOp
   findOrCreateMakeTensorPtr(Location loc, Value base, ValueRange shape,
                             ValueRange strides, ValueRange offsets,
-                            ArrayRef<int32_t> sizes, OpBuilder &builder) {
+                            ArrayRef<int32_t> sizes, Attribute encoding,
+                            OpBuilder &builder) {
     Block *block = builder.getInsertionBlock();
     const Block::iterator insertPoint = builder.getInsertionPoint();
     auto it = std::find_if(block->begin(), insertPoint, [&](Operation &op) {
@@ -134,8 +135,15 @@ struct TritonIntelTensorDescToBlockPointer
     });
 
     auto makeTensorPtrOp = [&]() {
+      // Create the tensor type with encoding
+      auto pointerType = cast<mlir::triton::PointerType>(base.getType());
+      auto tensorType = RankedTensorType::get(
+          SmallVector<int64_t>(sizes.begin(), sizes.end()),
+          pointerType.getPointeeType(), encoding);
+      auto resultType = mlir::triton::PointerType::get(tensorType, pointerType.getAddressSpace());
+
       auto makeTensorPtr = builder.create<tt::MakeTensorPtrOp>(
-          loc, base, shape, strides, offsets, sizes,
+          loc, resultType, base, shape, strides, offsets,
           builder.getDenseI32ArrayAttr({1, 0}));
       return makeTensorPtr;
     };
@@ -190,6 +198,8 @@ struct TritonIntelTensorDescToBlockPointer
     Location loc = op.getLoc();
     tt::TensorDescType tDescType = op.getType();
 
+    // Extract encoding from the tensor descriptor's block type
+    Attribute encoding = tDescType.getBlockType().getEncoding();
     // Create a new block pointer if a suitable one doesn't already exist.
     SmallVector<Value> shapes, strides, offsets;
     SmallVector<int32_t> sizes;
@@ -209,7 +219,7 @@ struct TritonIntelTensorDescToBlockPointer
     }
 
     auto tensorPtr = findOrCreateMakeTensorPtr(
-        loc, op.getBase(), shapes, strides, offsets, sizes, builder);
+        loc, op.getBase(), shapes, strides, offsets, sizes, encoding, builder);
     LLVM_DEBUG({
       llvm::dbgs() << "With:\n";
       llvm::dbgs().indent(2) << tensorPtr << "\n";

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +from . import xe
++
 +__all__ = ["xe"]
++