intel
diff --git a/‎build_tools/llvm_version.txt
Lines changed: 1 addition & 1 deletion b/‎build_tools/llvm_version.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎build_tools/patches/0008-xegpu-temporary-downstream-defintion-changes-and-vec.patch
Lines changed: 11 additions & 11 deletions b/‎build_tools/patches/0008-xegpu-temporary-downstream-defintion-changes-and-vec.patch
Lines changed: 11 additions & 11 deletions
diff --git a/‎lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
Lines changed: 1 addition & 1 deletion b/‎lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/Conversion/XeTileToXeGPU/XeTileToXeGPU.cpp
Lines changed: 80 additions & 80 deletions b/‎lib/Conversion/XeTileToXeGPU/XeTileToXeGPU.cpp
Lines changed: 80 additions & 80 deletions
diff --git a/‎lib/Transforms/OptimizeTranspose.cpp
Lines changed: 1 addition & 1 deletion b/‎lib/Transforms/OptimizeTranspose.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/Transforms/VectorLinearize.cpp
Lines changed: 4 additions & 4 deletions b/‎lib/Transforms/VectorLinearize.cpp
Lines changed: 4 additions & 4 deletions
diff --git a/‎test/Conversion/XeGPUToXeVM/dpas.mlir
Lines changed: 4 additions & 4 deletions b/‎test/Conversion/XeGPUToXeVM/dpas.mlir
Lines changed: 4 additions & 4 deletions
@@ -1 +1 @@
-d7d91500b6ef7efb059f660ff7e4aa44553643e6
+e24c9e7a0c61ed49e79433d405cb5157483ce691
@@ -1,6 +1,6 @@
-From 7790e4776821298f3e97d9b7f11d8d42e84ede59 Mon Sep 17 00:00:00 2001
+From 34eb42d07af1bd30183c45b24b7663ae9e0470c1 Mon Sep 17 00:00:00 2001
 From: Garra1980 <[email protected]>
-Date: Fri, 21 Feb 2025 19:43:35 +0100
+Date: Wed, 9 Apr 2025 18:26:12 +0200
 Subject: [PATCH 1/1] xegpu temporary downstream definition changes and vec
 
 ---
@@ -10,19 +10,19 @@ Subject: [PATCH 1/1] xegpu temporary downstream definition changes and vec
  3 files changed, 10 insertions(+), 1 deletion(-)
 
 diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
-index 78dfaef97420..80797cd87b82 100644
+index 16a7f63d60c8..8a518e84570d 100644
 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
 +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
-@@ -309,6 +309,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
+@@ -332,6 +332,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
    let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
                         OptionalAttr<UnitAttr>: $packed,
                         OptionalAttr<DenseI64ArrayAttr>: $transpose,
 +                       OptionalAttr<I32Attr>: $transpose_bit_width,
                         OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
                         OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
                         OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-@@ -881,4 +882,9 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> {
-   let extraClassDeclaration = extraBaseClassDeclaration;
+@@ -1003,4 +1004,9 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou
+     let hasVerifier = 1;
  }
 
 +def XeGPU_CompileHintOp : XeGPU_Op<"compile_hint", []> {
@@ -32,18 +32,18 @@ index 78dfaef97420..80797cd87b82 100644
 +
  #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
 diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
-index 61b55c57240c..4f55566ef36b 100644
+index 0bc0f2fca2c3..87af0060aa5d 100644
 --- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
 +++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
-@@ -202,6 +202,7 @@ struct TransferReadLowering : public OpRewritePattern<vector::TransferReadOp> {
+@@ -203,6 +203,7 @@ struct TransferReadLowering : public OpRewritePattern<vector::TransferReadOp> {
      xegpu::CachePolicyAttr hint = nullptr;
      auto loadOp = rewriter.create<xegpu::LoadNdOp>(
          loc, vecTy, ndDesc, /*packed=*/nullptr, transposeAttr,
 +        /*transpose_bit_width*/nullptr,
          /*l1_hint=*/hint,
          /*l2_hint=*/hint, /*l3_hint=*/hint);
      rewriter.replaceOp(readOp, loadOp);
-@@ -271,6 +272,7 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
+@@ -272,6 +273,7 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
      xegpu::CachePolicyAttr hint = nullptr;
      auto loadNdOp = rewriter.create<xegpu::LoadNdOp>(
          loc, vecTy, ndDesc, /*packed=*/nullptr, /*transpose=*/nullptr,
@@ -52,7 +52,7 @@ index 61b55c57240c..4f55566ef36b 100644
          /*l2_hint=*/hint, /*l3_hint=*/hint);
      rewriter.replaceOp(loadOp, loadNdOp);
 diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
-index 25dc1f22f043..1f5361abb38e 100644
+index 0d67e3d70f94..873268c2bc10 100644
 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
 +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -70,6 +70,7 @@ static bool isWriteHintOrNone(const CachePolicyAttr &attr) {
@@ -63,7 +63,7 @@ index 25dc1f22f043..1f5361abb38e 100644
           kind == CachePolicy::WRITE_BACK || kind == CachePolicy::WRITE_THROUGH;
  }
 
-@@ -297,7 +298,7 @@ LogicalResult LoadNdOp::verify() {
+@@ -321,7 +322,7 @@ LogicalResult LoadNdOp::verify() {
        mlir::emitWarning(getLoc()) << "Invalid transpose attr. It is ignored.";
    }
 
 
@@ -122,7 +122,7 @@ class CreateNdDescToXeVMPattern
                   ConversionPatternRewriter &rewriter) const override {
     auto loc = op.getLoc();
     auto resultDesc = cast<TensorDescType>(op.getResult().getType());
-    auto sgMap = resultDesc.getSGMapAttr();
+    auto sgMap = resultDesc.getLayoutAttr();
     if (!sgMap) {
       op.emitError() << "XeVM expects SGMap attribute to be present for tensor "
                         "descriptors";
 
@@ -973,86 +973,86 @@ struct ConvertXeTileToXeGPUPass // convert XeTile to XeGPU
                              memSpace);
     });
 
-    typeConverter.addConversion([&](xetile::TileType type)
-                                    -> xegpu::TensorDescType {
-      auto context = type.getContext();
-      auto scatterAttr = type.getScatterAttr();
-      bool isScattered = scatterAttr ? scatterAttr.getValue() : false;
-
-      // by default the targetTy is the element type, except for SLM cases,
-      // where the data will be treated as 32-bit type implicitly.
-      Type targetTy = type.getElementType();
-
-      xegpu::SGMapAttr sgMap = nullptr;
-      if (auto attr = type.getSgMap()) {
-        auto layout =
-            llvm::to_vector_of<uint32_t>(attr.getWiLayout().asArrayRef());
-        auto data = llvm::to_vector_of<uint32_t>(attr.getWiData().asArrayRef());
-        sgMap = xegpu::SGMapAttr::get(context, layout, data);
-      }
-
-      auto memSpaceAttr = convertMemorySpace(type.getMemorySpace());
-      auto memSpace =
-          memSpaceAttr ? memSpaceAttr.getValue() : xegpu::MemorySpace::Global;
-
-      Attribute encoding;
-      llvm::SmallVector<int64_t> shape;
-      if (isScattered) {
-        // Scattered tile is lowered to scattered tensor_desc with chunk
-        // size 1. It supports both global memory and shared memory. while
-        // scattered tile can support 2D shape, scattered tensor_desc only
-        // support 1D shape.
-        auto chunkSizeAttr = IntegerAttr::get(IntegerType::get(context, 64), 1);
-        auto msA = memSpaceAttr
-                       ? memSpaceAttr
-                       : xegpu::MemorySpaceAttr::get(context, memSpace);
-
-        encoding =
-            xegpu::ScatterTensorDescAttr::get(context, msA, chunkSizeAttr);
-        shape.push_back(type.getNumElements());
-      } else if (memSpace == xegpu::MemorySpace::Global) {
-        // Blocked tile on global memory is lowered to blocked tensor_desc
-        // with the same shape.
-        auto arrayLenAttr = type.getArrayLength();
-        auto boundaryCheckAttr = BoolAttr::get(context, true);
-        encoding = xegpu::BlockTensorDescAttr::get(
-            context, memSpaceAttr, arrayLenAttr, boundaryCheckAttr);
-        shape = llvm::to_vector(type.getShape());
-      } else {
-        // for TileType created for SLM access, it will be converted into:
-        // 1. a 1D block tensor_desc if it is for row-major access
-        // 2. a scattered tensor_desc if it is for col-major access.
-        auto elemBits = type.getElementType().getIntOrFloatBitWidth();
-        auto vnniFactor = std::max<int>(32 / elemBits, 1);
-
-        // SLM access only supports 32-bit or 64-bit data type, so convert
-        // the type if original element type is less than 32-bit.
-        if (elemBits < 32) {
-          targetTy = type.getElementType().isInteger()
-                         ? (Type)IntegerType::get(context, 32)
-                         : (Type)Float32Type::get(context);
-        }
-
-        if (isColMajorOrder(type.getOrder())) {
-          // For access with col-major order
-          auto chunkSize = type.getShape()[0] / vnniFactor;
-          auto chunkSizeAttr =
-              IntegerAttr::get(IntegerType::get(context, 64), chunkSize);
-          encoding = xegpu::ScatterTensorDescAttr::get(context, memSpaceAttr,
-                                                       chunkSizeAttr);
-          shape = {type.getShape()[1], chunkSize};
-        } else {
-          // For access with row-major order
-          auto vecSize = type.getNumElements() / vnniFactor;
-          encoding = xegpu::BlockTensorDescAttr::get(
-              context, memSpaceAttr, nullptr /*array_len*/,
-              nullptr /*boundary_check*/);
-          shape.push_back(vecSize);
-        }
-      }
-      return xegpu::TensorDescType::get(context, shape, targetTy, encoding,
-                                        sgMap);
-    });
+    typeConverter.addConversion(
+        [&](xetile::TileType type) -> xegpu::TensorDescType {
+          auto context = type.getContext();
+          auto scatterAttr = type.getScatterAttr();
+          bool isScattered = scatterAttr ? scatterAttr.getValue() : false;
+
+          // by default the targetTy is the element type, except for SLM cases,
+          // where the data will be treated as 32-bit type implicitly.
+          Type targetTy = type.getElementType();
+
+          xegpu::LayoutAttr sgMap = nullptr;
+          if (auto attr = type.getSgMap()) {
+            auto layout = attr.getWiLayout().asArrayRef();
+            auto data = attr.getWiData().asArrayRef();
+            sgMap = xegpu::LayoutAttr::get(context, layout, data);
+          }
+
+          auto memSpaceAttr = convertMemorySpace(type.getMemorySpace());
+          auto memSpace = memSpaceAttr ? memSpaceAttr.getValue()
+                                       : xegpu::MemorySpace::Global;
+
+          Attribute encoding;
+          llvm::SmallVector<int64_t> shape;
+          if (isScattered) {
+            // Scattered tile is lowered to scattered tensor_desc with chunk
+            // size 1. It supports both global memory and shared memory. while
+            // scattered tile can support 2D shape, scattered tensor_desc only
+            // support 1D shape.
+            auto chunkSizeAttr =
+                IntegerAttr::get(IntegerType::get(context, 64), 1);
+            auto msA = memSpaceAttr
+                           ? memSpaceAttr
+                           : xegpu::MemorySpaceAttr::get(context, memSpace);
+
+            encoding =
+                xegpu::ScatterTensorDescAttr::get(context, msA, chunkSizeAttr);
+            shape.push_back(type.getNumElements());
+          } else if (memSpace == xegpu::MemorySpace::Global) {
+            // Blocked tile on global memory is lowered to blocked tensor_desc
+            // with the same shape.
+            auto arrayLenAttr = type.getArrayLength();
+            auto boundaryCheckAttr = BoolAttr::get(context, true);
+            encoding = xegpu::BlockTensorDescAttr::get(
+                context, memSpaceAttr, arrayLenAttr, boundaryCheckAttr);
+            shape = llvm::to_vector(type.getShape());
+          } else {
+            // for TileType created for SLM access, it will be converted into:
+            // 1. a 1D block tensor_desc if it is for row-major access
+            // 2. a scattered tensor_desc if it is for col-major access.
+            auto elemBits = type.getElementType().getIntOrFloatBitWidth();
+            auto vnniFactor = std::max<int>(32 / elemBits, 1);
+
+            // SLM access only supports 32-bit or 64-bit data type, so convert
+            // the type if original element type is less than 32-bit.
+            if (elemBits < 32) {
+              targetTy = type.getElementType().isInteger()
+                             ? (Type)IntegerType::get(context, 32)
+                             : (Type)Float32Type::get(context);
+            }
+
+            if (isColMajorOrder(type.getOrder())) {
+              // For access with col-major order
+              auto chunkSize = type.getShape()[0] / vnniFactor;
+              auto chunkSizeAttr =
+                  IntegerAttr::get(IntegerType::get(context, 64), chunkSize);
+              encoding = xegpu::ScatterTensorDescAttr::get(
+                  context, memSpaceAttr, chunkSizeAttr);
+              shape = {type.getShape()[1], chunkSize};
+            } else {
+              // For access with row-major order
+              auto vecSize = type.getNumElements() / vnniFactor;
+              encoding = xegpu::BlockTensorDescAttr::get(
+                  context, memSpaceAttr, nullptr /*array_len*/,
+                  nullptr /*boundary_check*/);
+              shape.push_back(vecSize);
+            }
+          }
+          return xegpu::TensorDescType::get(context, shape, targetTy, encoding,
+                                            sgMap);
+        });
 
     auto materializeWithCast = [&](OpBuilder &builder, Type type,
                                    ValueRange inputs, Location loc) -> Value {
 
@@ -477,7 +477,7 @@ struct CreateNdDescOpPattern
     auto newTdescTy = xegpu::TensorDescType::get(
         tdescTy.getShape(), tdescTy.getElementType(), /*array_length=*/1,
         tdescTy.getBoundaryCheck(), tdescTy.getMemorySpace(),
-        tdescTy.getSgMap());
+        tdescTy.getLayout());
     auto origOffsetY = op.getOffsets().back();
     for (int64_t i = 0; i < arrayLength; ++i) {
       auto attr = rewriter.getIndexAttr(i * tdescTy.getShape()[1]);
 
@@ -375,7 +375,7 @@ struct VectorInsertStridedSliceConversion final
     for (auto i = 0; i < srcShape[0]; i++) {
       auto srcOffset = i * srcShape[1];
       auto value = rewriter.create<mlir::vector::ExtractStridedSliceOp>(
-          loc, adaptor.getSource(), srcOffset, srcShape[1], 1);
+          loc, adaptor.getValueToStore(), srcOffset, srcShape[1], 1);
 
       auto dstOffset = linearizedOffset + i * dstShape.back();
       dstValue = rewriter.create<mlir::vector::InsertStridedSliceOp>(
@@ -496,7 +496,7 @@ struct VectorInsertOpConversion final
     if (insertOp.hasDynamicPosition())
       return rewriter.notifyMatchFailure(insertOp,
                                          "dynamic position is not supported.");
-    auto srcTy = insertOp.getSourceType();
+    auto srcTy = insertOp.getValueToStoreType();
     auto srcAsVec = mlir::dyn_cast<mlir::VectorType>(srcTy);
     uint64_t srcSize = 0;
     if (srcAsVec) {
@@ -540,8 +540,8 @@ struct VectorInsertOpConversion final
     std::iota(modifiedSrcIndices.begin(), modifiedSrcIndices.begin() + srcSize,
               0);
     auto modifiedSource = rewriter.create<mlir::vector::ShuffleOp>(
-        insertOp.getLoc(), dstTy, adaptor.getSource(), adaptor.getSource(),
-        modifiedSrcIndices);
+        insertOp.getLoc(), dstTy, adaptor.getValueToStore(),
+        adaptor.getValueToStore(), modifiedSrcIndices);
 
     rewriter.replaceOpWithNewOp<mlir::vector::ShuffleOp>(
         insertOp, dstTy, adaptor.getDest(), modifiedSource,
 
@@ -1,8 +1,8 @@
 // RUN: imex-opt -convert-xegpu-to-xevm -split-input-file %s | FileCheck %s
 
-#sg_map_a_f16 = #xegpu.sg_map<wi_layout = [1, 16], wi_data = [1, 1]>
-#sg_map_b_f16 = #xegpu.sg_map<wi_layout = [1, 16], wi_data = [2, 1]>
-#sg_map_c_f32 = #xegpu.sg_map<wi_layout = [1, 16], wi_data = [1, 1]>
+#sg_map_a_f16 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
+#sg_map_b_f16 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>
+#sg_map_c_f32 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
 
 gpu.module @load_store_check {
     func.func @dpas(%a_loaded: vector<8x1xf16>, %b_loaded: vector<8x2xf16>, %c_loaded: vector<8x1xf32>) -> vector<8x1xf32> {
@@ -14,7 +14,7 @@ gpu.module @load_store_check {
         //CHECK-NEXT: %[[D:.*]] = xevm.dpas %[[CAST_C]], %[[CAST_A]], %[[CAST_B]] {pa = f16, pb = f16, rc = 8} : (vector<8xf32>, vector<8xf16>, vector<16xf16>) -> vector<8xf32>
         // Cast result back to expected shape
         //CHECK-NEXT: %[[CAST_D:.*]] = vector.shape_cast %[[D]] : vector<8xf32> to vector<8x1xf32>
-        %d = xegpu.dpas %a_loaded, %b_loaded, %c_loaded {sg_map_a = #sg_map_a_f16, sg_map_b = #sg_map_b_f16, sg_map_c = #sg_map_c_f32} : vector<8x1xf16>, vector<8x2xf16>, vector<8x1xf32> -> vector<8x1xf32>
+        %d = xegpu.dpas %a_loaded, %b_loaded, %c_loaded {a_layout = #sg_map_a_f16, b_layout = #sg_map_b_f16, c_layout = #sg_map_c_f32} : vector<8x1xf16>, vector<8x2xf16>, vector<8x1xf32> -> vector<8x1xf32>
         return %d : vector<8x1xf32>
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-d7d91500b6ef7efb059f660ff7e4aa44553643e6`
	`1`	`+e24c9e7a0c61ed49e79433d405cb5157483ce691`