Skip to content

Commit 299f2d2

Browse files
authored
[LLVM Pulldown] Bump llvm version (#1062)
1 parent 187b4af commit 299f2d2

17 files changed

+131
-131
lines changed

build_tools/llvm_version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
d7d91500b6ef7efb059f660ff7e4aa44553643e6
1+
e24c9e7a0c61ed49e79433d405cb5157483ce691

build_tools/patches/0008-xegpu-temporary-downstream-defintion-changes-and-vec.patch

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
From 7790e4776821298f3e97d9b7f11d8d42e84ede59 Mon Sep 17 00:00:00 2001
1+
From 34eb42d07af1bd30183c45b24b7663ae9e0470c1 Mon Sep 17 00:00:00 2001
22
From: Garra1980 <[email protected]>
3-
Date: Fri, 21 Feb 2025 19:43:35 +0100
3+
Date: Wed, 9 Apr 2025 18:26:12 +0200
44
Subject: [PATCH 1/1] xegpu temporary downstream definition changes and vec
55

66
---
@@ -10,19 +10,19 @@ Subject: [PATCH 1/1] xegpu temporary downstream definition changes and vec
1010
3 files changed, 10 insertions(+), 1 deletion(-)
1111

1212
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
13-
index 78dfaef97420..80797cd87b82 100644
13+
index 16a7f63d60c8..8a518e84570d 100644
1414
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
1515
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
16-
@@ -309,6 +309,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
16+
@@ -332,6 +332,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
1717
let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
1818
OptionalAttr<UnitAttr>: $packed,
1919
OptionalAttr<DenseI64ArrayAttr>: $transpose,
2020
+ OptionalAttr<I32Attr>: $transpose_bit_width,
2121
OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
2222
OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
2323
OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
24-
@@ -881,4 +882,9 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> {
25-
let extraClassDeclaration = extraBaseClassDeclaration;
24+
@@ -1003,4 +1004,9 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou
25+
let hasVerifier = 1;
2626
}
2727

2828
+def XeGPU_CompileHintOp : XeGPU_Op<"compile_hint", []> {
@@ -32,18 +32,18 @@ index 78dfaef97420..80797cd87b82 100644
3232
+
3333
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
3434
diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
35-
index 61b55c57240c..4f55566ef36b 100644
35+
index 0bc0f2fca2c3..87af0060aa5d 100644
3636
--- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
3737
+++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
38-
@@ -202,6 +202,7 @@ struct TransferReadLowering : public OpRewritePattern<vector::TransferReadOp> {
38+
@@ -203,6 +203,7 @@ struct TransferReadLowering : public OpRewritePattern<vector::TransferReadOp> {
3939
xegpu::CachePolicyAttr hint = nullptr;
4040
auto loadOp = rewriter.create<xegpu::LoadNdOp>(
4141
loc, vecTy, ndDesc, /*packed=*/nullptr, transposeAttr,
4242
+ /*transpose_bit_width*/nullptr,
4343
/*l1_hint=*/hint,
4444
/*l2_hint=*/hint, /*l3_hint=*/hint);
4545
rewriter.replaceOp(readOp, loadOp);
46-
@@ -271,6 +272,7 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
46+
@@ -272,6 +273,7 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
4747
xegpu::CachePolicyAttr hint = nullptr;
4848
auto loadNdOp = rewriter.create<xegpu::LoadNdOp>(
4949
loc, vecTy, ndDesc, /*packed=*/nullptr, /*transpose=*/nullptr,
@@ -52,7 +52,7 @@ index 61b55c57240c..4f55566ef36b 100644
5252
/*l2_hint=*/hint, /*l3_hint=*/hint);
5353
rewriter.replaceOp(loadOp, loadNdOp);
5454
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
55-
index 25dc1f22f043..1f5361abb38e 100644
55+
index 0d67e3d70f94..873268c2bc10 100644
5656
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
5757
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
5858
@@ -70,6 +70,7 @@ static bool isWriteHintOrNone(const CachePolicyAttr &attr) {
@@ -63,7 +63,7 @@ index 25dc1f22f043..1f5361abb38e 100644
6363
kind == CachePolicy::WRITE_BACK || kind == CachePolicy::WRITE_THROUGH;
6464
}
6565

66-
@@ -297,7 +298,7 @@ LogicalResult LoadNdOp::verify() {
66+
@@ -321,7 +322,7 @@ LogicalResult LoadNdOp::verify() {
6767
mlir::emitWarning(getLoc()) << "Invalid transpose attr. It is ignored.";
6868
}
6969

lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ class CreateNdDescToXeVMPattern
122122
ConversionPatternRewriter &rewriter) const override {
123123
auto loc = op.getLoc();
124124
auto resultDesc = cast<TensorDescType>(op.getResult().getType());
125-
auto sgMap = resultDesc.getSGMapAttr();
125+
auto sgMap = resultDesc.getLayoutAttr();
126126
if (!sgMap) {
127127
op.emitError() << "XeVM expects SGMap attribute to be present for tensor "
128128
"descriptors";

lib/Conversion/XeTileToXeGPU/XeTileToXeGPU.cpp

Lines changed: 80 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -973,86 +973,86 @@ struct ConvertXeTileToXeGPUPass // convert XeTile to XeGPU
973973
memSpace);
974974
});
975975

976-
typeConverter.addConversion([&](xetile::TileType type)
977-
-> xegpu::TensorDescType {
978-
auto context = type.getContext();
979-
auto scatterAttr = type.getScatterAttr();
980-
bool isScattered = scatterAttr ? scatterAttr.getValue() : false;
981-
982-
// by default the targetTy is the element type, except for SLM cases,
983-
// where the data will be treated as 32-bit type implicitly.
984-
Type targetTy = type.getElementType();
985-
986-
xegpu::SGMapAttr sgMap = nullptr;
987-
if (auto attr = type.getSgMap()) {
988-
auto layout =
989-
llvm::to_vector_of<uint32_t>(attr.getWiLayout().asArrayRef());
990-
auto data = llvm::to_vector_of<uint32_t>(attr.getWiData().asArrayRef());
991-
sgMap = xegpu::SGMapAttr::get(context, layout, data);
992-
}
993-
994-
auto memSpaceAttr = convertMemorySpace(type.getMemorySpace());
995-
auto memSpace =
996-
memSpaceAttr ? memSpaceAttr.getValue() : xegpu::MemorySpace::Global;
997-
998-
Attribute encoding;
999-
llvm::SmallVector<int64_t> shape;
1000-
if (isScattered) {
1001-
// Scattered tile is lowered to scattered tensor_desc with chunk
1002-
// size 1. It supports both global memory and shared memory. while
1003-
// scattered tile can support 2D shape, scattered tensor_desc only
1004-
// support 1D shape.
1005-
auto chunkSizeAttr = IntegerAttr::get(IntegerType::get(context, 64), 1);
1006-
auto msA = memSpaceAttr
1007-
? memSpaceAttr
1008-
: xegpu::MemorySpaceAttr::get(context, memSpace);
1009-
1010-
encoding =
1011-
xegpu::ScatterTensorDescAttr::get(context, msA, chunkSizeAttr);
1012-
shape.push_back(type.getNumElements());
1013-
} else if (memSpace == xegpu::MemorySpace::Global) {
1014-
// Blocked tile on global memory is lowered to blocked tensor_desc
1015-
// with the same shape.
1016-
auto arrayLenAttr = type.getArrayLength();
1017-
auto boundaryCheckAttr = BoolAttr::get(context, true);
1018-
encoding = xegpu::BlockTensorDescAttr::get(
1019-
context, memSpaceAttr, arrayLenAttr, boundaryCheckAttr);
1020-
shape = llvm::to_vector(type.getShape());
1021-
} else {
1022-
// for TileType created for SLM access, it will be converted into:
1023-
// 1. a 1D block tensor_desc if it is for row-major access
1024-
// 2. a scattered tensor_desc if it is for col-major access.
1025-
auto elemBits = type.getElementType().getIntOrFloatBitWidth();
1026-
auto vnniFactor = std::max<int>(32 / elemBits, 1);
1027-
1028-
// SLM access only supports 32-bit or 64-bit data type, so convert
1029-
// the type if original element type is less than 32-bit.
1030-
if (elemBits < 32) {
1031-
targetTy = type.getElementType().isInteger()
1032-
? (Type)IntegerType::get(context, 32)
1033-
: (Type)Float32Type::get(context);
1034-
}
1035-
1036-
if (isColMajorOrder(type.getOrder())) {
1037-
// For access with col-major order
1038-
auto chunkSize = type.getShape()[0] / vnniFactor;
1039-
auto chunkSizeAttr =
1040-
IntegerAttr::get(IntegerType::get(context, 64), chunkSize);
1041-
encoding = xegpu::ScatterTensorDescAttr::get(context, memSpaceAttr,
1042-
chunkSizeAttr);
1043-
shape = {type.getShape()[1], chunkSize};
1044-
} else {
1045-
// For access with row-major order
1046-
auto vecSize = type.getNumElements() / vnniFactor;
1047-
encoding = xegpu::BlockTensorDescAttr::get(
1048-
context, memSpaceAttr, nullptr /*array_len*/,
1049-
nullptr /*boundary_check*/);
1050-
shape.push_back(vecSize);
1051-
}
1052-
}
1053-
return xegpu::TensorDescType::get(context, shape, targetTy, encoding,
1054-
sgMap);
1055-
});
976+
typeConverter.addConversion(
977+
[&](xetile::TileType type) -> xegpu::TensorDescType {
978+
auto context = type.getContext();
979+
auto scatterAttr = type.getScatterAttr();
980+
bool isScattered = scatterAttr ? scatterAttr.getValue() : false;
981+
982+
// by default the targetTy is the element type, except for SLM cases,
983+
// where the data will be treated as 32-bit type implicitly.
984+
Type targetTy = type.getElementType();
985+
986+
xegpu::LayoutAttr sgMap = nullptr;
987+
if (auto attr = type.getSgMap()) {
988+
auto layout = attr.getWiLayout().asArrayRef();
989+
auto data = attr.getWiData().asArrayRef();
990+
sgMap = xegpu::LayoutAttr::get(context, layout, data);
991+
}
992+
993+
auto memSpaceAttr = convertMemorySpace(type.getMemorySpace());
994+
auto memSpace = memSpaceAttr ? memSpaceAttr.getValue()
995+
: xegpu::MemorySpace::Global;
996+
997+
Attribute encoding;
998+
llvm::SmallVector<int64_t> shape;
999+
if (isScattered) {
1000+
// Scattered tile is lowered to scattered tensor_desc with chunk
1001+
// size 1. It supports both global memory and shared memory. while
1002+
// scattered tile can support 2D shape, scattered tensor_desc only
1003+
// support 1D shape.
1004+
auto chunkSizeAttr =
1005+
IntegerAttr::get(IntegerType::get(context, 64), 1);
1006+
auto msA = memSpaceAttr
1007+
? memSpaceAttr
1008+
: xegpu::MemorySpaceAttr::get(context, memSpace);
1009+
1010+
encoding =
1011+
xegpu::ScatterTensorDescAttr::get(context, msA, chunkSizeAttr);
1012+
shape.push_back(type.getNumElements());
1013+
} else if (memSpace == xegpu::MemorySpace::Global) {
1014+
// Blocked tile on global memory is lowered to blocked tensor_desc
1015+
// with the same shape.
1016+
auto arrayLenAttr = type.getArrayLength();
1017+
auto boundaryCheckAttr = BoolAttr::get(context, true);
1018+
encoding = xegpu::BlockTensorDescAttr::get(
1019+
context, memSpaceAttr, arrayLenAttr, boundaryCheckAttr);
1020+
shape = llvm::to_vector(type.getShape());
1021+
} else {
1022+
// for TileType created for SLM access, it will be converted into:
1023+
// 1. a 1D block tensor_desc if it is for row-major access
1024+
// 2. a scattered tensor_desc if it is for col-major access.
1025+
auto elemBits = type.getElementType().getIntOrFloatBitWidth();
1026+
auto vnniFactor = std::max<int>(32 / elemBits, 1);
1027+
1028+
// SLM access only supports 32-bit or 64-bit data type, so convert
1029+
// the type if original element type is less than 32-bit.
1030+
if (elemBits < 32) {
1031+
targetTy = type.getElementType().isInteger()
1032+
? (Type)IntegerType::get(context, 32)
1033+
: (Type)Float32Type::get(context);
1034+
}
1035+
1036+
if (isColMajorOrder(type.getOrder())) {
1037+
// For access with col-major order
1038+
auto chunkSize = type.getShape()[0] / vnniFactor;
1039+
auto chunkSizeAttr =
1040+
IntegerAttr::get(IntegerType::get(context, 64), chunkSize);
1041+
encoding = xegpu::ScatterTensorDescAttr::get(
1042+
context, memSpaceAttr, chunkSizeAttr);
1043+
shape = {type.getShape()[1], chunkSize};
1044+
} else {
1045+
// For access with row-major order
1046+
auto vecSize = type.getNumElements() / vnniFactor;
1047+
encoding = xegpu::BlockTensorDescAttr::get(
1048+
context, memSpaceAttr, nullptr /*array_len*/,
1049+
nullptr /*boundary_check*/);
1050+
shape.push_back(vecSize);
1051+
}
1052+
}
1053+
return xegpu::TensorDescType::get(context, shape, targetTy, encoding,
1054+
sgMap);
1055+
});
10561056

10571057
auto materializeWithCast = [&](OpBuilder &builder, Type type,
10581058
ValueRange inputs, Location loc) -> Value {

lib/Transforms/OptimizeTranspose.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ struct CreateNdDescOpPattern
477477
auto newTdescTy = xegpu::TensorDescType::get(
478478
tdescTy.getShape(), tdescTy.getElementType(), /*array_length=*/1,
479479
tdescTy.getBoundaryCheck(), tdescTy.getMemorySpace(),
480-
tdescTy.getSgMap());
480+
tdescTy.getLayout());
481481
auto origOffsetY = op.getOffsets().back();
482482
for (int64_t i = 0; i < arrayLength; ++i) {
483483
auto attr = rewriter.getIndexAttr(i * tdescTy.getShape()[1]);

lib/Transforms/VectorLinearize.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ struct VectorInsertStridedSliceConversion final
375375
for (auto i = 0; i < srcShape[0]; i++) {
376376
auto srcOffset = i * srcShape[1];
377377
auto value = rewriter.create<mlir::vector::ExtractStridedSliceOp>(
378-
loc, adaptor.getSource(), srcOffset, srcShape[1], 1);
378+
loc, adaptor.getValueToStore(), srcOffset, srcShape[1], 1);
379379

380380
auto dstOffset = linearizedOffset + i * dstShape.back();
381381
dstValue = rewriter.create<mlir::vector::InsertStridedSliceOp>(
@@ -496,7 +496,7 @@ struct VectorInsertOpConversion final
496496
if (insertOp.hasDynamicPosition())
497497
return rewriter.notifyMatchFailure(insertOp,
498498
"dynamic position is not supported.");
499-
auto srcTy = insertOp.getSourceType();
499+
auto srcTy = insertOp.getValueToStoreType();
500500
auto srcAsVec = mlir::dyn_cast<mlir::VectorType>(srcTy);
501501
uint64_t srcSize = 0;
502502
if (srcAsVec) {
@@ -540,8 +540,8 @@ struct VectorInsertOpConversion final
540540
std::iota(modifiedSrcIndices.begin(), modifiedSrcIndices.begin() + srcSize,
541541
0);
542542
auto modifiedSource = rewriter.create<mlir::vector::ShuffleOp>(
543-
insertOp.getLoc(), dstTy, adaptor.getSource(), adaptor.getSource(),
544-
modifiedSrcIndices);
543+
insertOp.getLoc(), dstTy, adaptor.getValueToStore(),
544+
adaptor.getValueToStore(), modifiedSrcIndices);
545545

546546
rewriter.replaceOpWithNewOp<mlir::vector::ShuffleOp>(
547547
insertOp, dstTy, adaptor.getDest(), modifiedSource,

test/Conversion/XeGPUToXeVM/dpas.mlir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// RUN: imex-opt -convert-xegpu-to-xevm -split-input-file %s | FileCheck %s
22

3-
#sg_map_a_f16 = #xegpu.sg_map<wi_layout = [1, 16], wi_data = [1, 1]>
4-
#sg_map_b_f16 = #xegpu.sg_map<wi_layout = [1, 16], wi_data = [2, 1]>
5-
#sg_map_c_f32 = #xegpu.sg_map<wi_layout = [1, 16], wi_data = [1, 1]>
3+
#sg_map_a_f16 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
4+
#sg_map_b_f16 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>
5+
#sg_map_c_f32 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
66

77
gpu.module @load_store_check {
88
func.func @dpas(%a_loaded: vector<8x1xf16>, %b_loaded: vector<8x2xf16>, %c_loaded: vector<8x1xf32>) -> vector<8x1xf32> {
@@ -14,7 +14,7 @@ gpu.module @load_store_check {
1414
//CHECK-NEXT: %[[D:.*]] = xevm.dpas %[[CAST_C]], %[[CAST_A]], %[[CAST_B]] {pa = f16, pb = f16, rc = 8} : (vector<8xf32>, vector<8xf16>, vector<16xf16>) -> vector<8xf32>
1515
// Cast result back to expected shape
1616
//CHECK-NEXT: %[[CAST_D:.*]] = vector.shape_cast %[[D]] : vector<8xf32> to vector<8x1xf32>
17-
%d = xegpu.dpas %a_loaded, %b_loaded, %c_loaded {sg_map_a = #sg_map_a_f16, sg_map_b = #sg_map_b_f16, sg_map_c = #sg_map_c_f32} : vector<8x1xf16>, vector<8x2xf16>, vector<8x1xf32> -> vector<8x1xf32>
17+
%d = xegpu.dpas %a_loaded, %b_loaded, %c_loaded {a_layout = #sg_map_a_f16, b_layout = #sg_map_b_f16, c_layout = #sg_map_c_f32} : vector<8x1xf16>, vector<8x2xf16>, vector<8x1xf32> -> vector<8x1xf32>
1818
return %d : vector<8x1xf32>
1919
}
2020
}

0 commit comments

Comments
 (0)