Skip to content

[LLVM Pulldown] Bump to LLVM rev 92164faf17d553359418b9f49c1a41d680d0… #1104

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build_tools/llvm_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
228e96b28a84828e1720c387a339a7e68dbdc029
92164faf17d553359418b9f49c1a41d680d0de49

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
From 89e527e48b727a1479aa47fdbe3d2d178d8969a7 Mon Sep 17 00:00:00 2001
From 5900db1c91d40157c2724d324ea65e22936e3354 Mon Sep 17 00:00:00 2001
From: Garra1980 <[email protected]>
Date: Mon, 4 Aug 2025 17:50:56 +0200
Subject: [PATCH] Add serilialization and deserialization for spirv
Date: Tue, 12 Aug 2025 23:41:51 +0200
Subject: [PATCH] Add serialization and de-serialization support for spirv

---
mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp | 6 ++++++
mlir/lib/Target/SPIRV/Serialization/Serializer.cpp | 6 ++++++
2 files changed, 12 insertions(+)

diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
index 88931b53a688..f1c22d09cc8e 100644
index d8c54ec5f88c..3b539382dedd 100644
--- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
+++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
@@ -282,6 +282,7 @@ LogicalResult spirv::Deserializer::processDecoration(ArrayRef<uint32_t> words) {
@@ -283,6 +283,7 @@ LogicalResult spirv::Deserializer::processDecoration(ArrayRef<uint32_t> words) {
symbol, FPRoundingModeAttr::get(opBuilder.getContext(),
static_cast<FPRoundingMode>(words[2])));
break;
+ case spirv::Decoration::Alignment:
case spirv::Decoration::DescriptorSet:
case spirv::Decoration::Binding:
if (words.size() != 3) {
@@ -343,6 +344,10 @@ LogicalResult spirv::Deserializer::processDecoration(ArrayRef<uint32_t> words) {
case spirv::Decoration::RestrictPointer:
case spirv::Decoration::NoContraction:
@@ -346,6 +347,10 @@ LogicalResult spirv::Deserializer::processDecoration(ArrayRef<uint32_t> words) {
case spirv::Decoration::Constant:
case spirv::Decoration::Invariant:
case spirv::Decoration::Patch:
+ case spirv::Decoration::SingleElementVectorINTEL:
+ case spirv::Decoration::VectorComputeCallableFunctionINTEL:
+ case spirv::Decoration::VectorComputeFunctionINTEL:
+ case spirv::Decoration::VectorComputeVariableINTEL:
if (words.size() != 2) {
return emitError(unknownLoc, "OpDecoration with ")
<< decorationName << "needs a single target <id>";
@@ -351,6 +356,7 @@ LogicalResult spirv::Deserializer::processDecoration(ArrayRef<uint32_t> words) {
@@ -354,6 +359,7 @@ LogicalResult spirv::Deserializer::processDecoration(ArrayRef<uint32_t> words) {
break;
case spirv::Decoration::Location:
case spirv::Decoration::SpecId:
Expand All @@ -40,10 +40,10 @@ index 88931b53a688..f1c22d09cc8e 100644
return emitError(unknownLoc, "OpDecoration with ")
<< decorationName << "needs a single integer literal";
diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp
index 737f29662f64..cd925b02b6a6 100644
index 7c007de31558..3aa26ab923a9 100644
--- a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp
+++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp
@@ -283,8 +283,10 @@ LogicalResult Serializer::processDecorationAttr(Location loc, uint32_t resultID,
@@ -302,8 +302,10 @@ LogicalResult Serializer::processDecorationAttr(Location loc, uint32_t resultID,
}
return emitError(loc, "expected FPRoundingModeAttr attribute for ")
<< stringifyDecoration(decoration);
Expand All @@ -54,17 +54,16 @@ index 737f29662f64..cd925b02b6a6 100644
case spirv::Decoration::Location:
if (auto intAttr = dyn_cast<IntegerAttr>(attr)) {
args.push_back(intAttr.getValue().getZExtValue());
@@ -318,6 +320,10 @@ LogicalResult Serializer::processDecorationAttr(Location loc, uint32_t resultID,
case spirv::Decoration::RestrictPointer:
case spirv::Decoration::NoContraction:
case spirv::Decoration::Constant:
@@ -340,6 +342,10 @@ LogicalResult Serializer::processDecorationAttr(Location loc, uint32_t resultID,
case spirv::Decoration::Block:
case spirv::Decoration::Invariant:
case spirv::Decoration::Patch:
+ case spirv::Decoration::SingleElementVectorINTEL:
+ case spirv::Decoration::VectorComputeCallableFunctionINTEL:
+ case spirv::Decoration::VectorComputeFunctionINTEL:
+ case spirv::Decoration::VectorComputeVariableINTEL:
case spirv::Decoration::Block:
// For unit attributes and decoration attributes, the args list
// has no values so we do nothing.
--
if (isa<UnitAttr, DecorationAttr>(attr))
--
2.34.1

Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ index 7f4d4f1381df..ebd4f1a3f66a 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -373,6 +373,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
OptionalAttr<UnitAttr>: $packed,
OptionalAttr<DenseI64ArrayAttr>: $transpose,
+ OptionalAttr<I32Attr>: $transpose_bit_width,
Expand All @@ -24,7 +24,7 @@ index 7f4d4f1381df..ebd4f1a3f66a 100644
@@ -1147,4 +1148,9 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou
let hasCanonicalizer = 1;
}

+def XeGPU_CompileHintOp : XeGPU_Op<"compile_hint", []> {
+ let summary = "prevents the compiler from scheduling.";
+ let assemblyFormat = [{ attr-dict }];
Expand Down Expand Up @@ -68,27 +68,26 @@ index 33450f3fa229..528b9d55ee61 100644
+ kind == CachePolicy::STREAMING ||
kind == CachePolicy::WRITE_BACK || kind == CachePolicy::WRITE_THROUGH;
}

@@ -419,8 +420,8 @@ void LoadNdOp::build(OpBuilder &builder, OperationState &state, Type retType,
xegpu::CachePolicyAttr l3_hint) {

return build(builder, state, retType, tensorDesc, ValueRange(),
- DenseI64ArrayAttr(), packed, transpose, l1_hint, l2_hint,
- l3_hint);
+ DenseI64ArrayAttr(), packed, transpose, nullptr,
+ l1_hint, l2_hint, l3_hint);
}

LogicalResult LoadNdOp::verify() {
@@ -482,7 +483,7 @@ LogicalResult LoadNdOp::verify() {
mlir::emitWarning(getLoc()) << "Invalid transpose attr. It is ignored.";
}

- if (getPacked()) {
+ if (getPacked() || getTransposeBitWidth() == 32) {
if (tdescTy.getRank() == 2) {
const int axis = 0;
auto vnni_factor = valueShape.back();
--
--
2.34.1

6 changes: 3 additions & 3 deletions lib/Conversion/XeGPUToVC/LSCPatterns.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1198,9 +1198,9 @@ class PrefetchPattern : public OpConversionPattern<PrefetchOp> {
// auto l2hint = op.getL2Hint();
auto l3hint = op.getL3Hint();

auto callOp = genPrefetchIntrinsicCall(rewriter, loc, simd_lanes, l1hint,
l3hint, elemTy, chunkSize, scope,
adaptor.getSource());
auto callOp =
genPrefetchIntrinsicCall(rewriter, loc, simd_lanes, l1hint, l3hint,
elemTy, chunkSize, scope, adaptor.getSource());

rewriter.replaceOp(op, callOp);
return success();
Expand Down
7 changes: 3 additions & 4 deletions lib/Conversion/XeTileToXeGPU/XeTileToXeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,10 +491,9 @@ class LoadOpPattern : public OpConversionPattern<xetile::LoadTileOp> {
auto packAttr = UnitAttr();
auto transAttr = DenseI64ArrayAttr();
auto bitWidthAttr = IntegerAttr();
auto ldOp = rewriter.create<xegpu::LoadNdOp>(loc, vecTy, adaptor.getTile(),
ValueRange(), DenseI64ArrayAttr(),
packAttr, transAttr,
bitWidthAttr, L1, L2, L3);
auto ldOp = rewriter.create<xegpu::LoadNdOp>(
loc, vecTy, adaptor.getTile(), ValueRange(), DenseI64ArrayAttr(),
packAttr, transAttr, bitWidthAttr, L1, L2, L3);

llvm::SmallVector<Value> results({ldOp.getResult()});
if (memSpace == xegpu::MemorySpace::SLM) {
Expand Down
12 changes: 6 additions & 6 deletions lib/Dialect/NDArray/Extensions/MeshShardingExtensions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ static T getBaseShardDimOff(T shard, T numShards, T extend) {
}

static Sharding ShardingFromOption(const ShardingOption &option,
MLIRContext *ctxt) {
MLIRContext *ctxt) {
SmallVector<GridAxesAttr> res;
for (const auto &v : option.shardingArray) {
res.emplace_back(GridAxesAttr::get(ctxt, v));
Expand Down Expand Up @@ -141,7 +141,8 @@ getShardingWithShardedDimsOffs(Value ary, OffsetSizeAndStrideOpInterface op) {
ShapedType::isDynamicShape(strides))
return op->emitOpError("Dynamic offsets/sizes/strides are not supported");

auto arySharding = aryShardOp.getSharding().getDefiningOp<shard::ShardingOp>();
auto arySharding =
aryShardOp.getSharding().getDefiningOp<shard::ShardingOp>();
// currently no support for sharding dims sizes on input
if (!arySharding.getStaticShardedDimsOffsets().empty())
return op->emitOpError(
Expand Down Expand Up @@ -190,10 +191,9 @@ getShardingWithShardedDimsOffs(Value ary, OffsetSizeAndStrideOpInterface op) {
}
}

return Sharding::get(
arySharding.getGridAttr(), arySharding.getSplitAxes().getAxes(),
{}, // static halo
splitOffs, {}, {});
return Sharding::get(arySharding.getGridAttr(),
arySharding.getSplitAxes().getAxes(), {}, // static halo
splitOffs, {}, {});
}

static std::pair<Value, Value>
Expand Down
4 changes: 2 additions & 2 deletions lib/Dialect/XeTile/Transforms/Blocking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1042,8 +1042,8 @@ class RewriteTileReductionOp
for (auto v : intermediates) {
auto resultTy = VectorType::get({1, 1}, elemTy);
for (auto i = 0; i < blkSize[1]; i++) {
auto extractOp =
rewriter.create<vector::ExtractOp>(loc, v, rewriter.getIndexAttr(i));
auto extractOp = rewriter.create<vector::ExtractOp>(
loc, v, rewriter.getIndexAttr(i));
auto splatOp = rewriter.create<vector::SplatOp>(op.getLoc(), resultTy,
extractOp);
newOps.push_back(splatOp);
Expand Down
2 changes: 1 addition & 1 deletion lib/Target/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
add_subdirectory(LLVM)
add_subdirectory(LLVM)
24 changes: 12 additions & 12 deletions lib/Transforms/OptimizeTranspose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,10 +516,10 @@ struct LoadNdOpPattern : public OpConversionPattern<xegpu::LoadNdOp> {
op.getType().getElementType());
for (auto source : tdescSources) {
auto loadNdOp = rewriter.create<xegpu::LoadNdOp>(
op.getLoc(), newLoadTy, source,
ValueRange(), DenseI64ArrayAttr(), op.getPackedAttr(),
op.getTransposeAttr(), op.getTransposeBitWidthAttr(),
op.getL1HintAttr(), op.getL2HintAttr(), op.getL3HintAttr());
op.getLoc(), newLoadTy, source, ValueRange(), DenseI64ArrayAttr(),
op.getPackedAttr(), op.getTransposeAttr(),
op.getTransposeBitWidthAttr(), op.getL1HintAttr(), op.getL2HintAttr(),
op.getL3HintAttr());
loadNdOps.push_back(loadNdOp);
}
rewriter.replaceOpWithMultiple(op, {loadNdOps});
Expand Down Expand Up @@ -847,10 +847,10 @@ struct TransposeRewritePattern : public OpRewritePattern<vector::TransposeOp> {
rewriter.getIntegerType(32),
32); // need to do a 32 bit transpose to get the packed layout.
auto newLoadOp = rewriter.create<xegpu::LoadNdOp>(
loadOp.getLoc(), newVectorTy, loadOp.getTensorDesc(),
ValueRange(), DenseI64ArrayAttr(), packedAttr,
transposeAttr, transposeBitWidthAttr, loadOp.getL1HintAttr(),
loadOp.getL2HintAttr(), loadOp.getL3HintAttr());
loadOp.getLoc(), newVectorTy, loadOp.getTensorDesc(), ValueRange(),
DenseI64ArrayAttr(), packedAttr, transposeAttr, transposeBitWidthAttr,
loadOp.getL1HintAttr(), loadOp.getL2HintAttr(),
loadOp.getL3HintAttr());
// Replace the uses of the packed layout conversion with new load.
rewriter.replaceAllUsesWith(packedLayoutOps.back()->getResult(0),
newLoadOp.getResult());
Expand All @@ -872,10 +872,10 @@ struct TransposeRewritePattern : public OpRewritePattern<vector::TransposeOp> {
auto transposeAttr =
DenseI64ArrayAttr::get(rewriter.getContext(), {1, 0});
auto newLoadOp = rewriter.create<xegpu::LoadNdOp>(
loadOp.getLoc(), newVectorTy, loadOp.getTensorDesc(),
ValueRange(), DenseI64ArrayAttr(), packedAttr,
transposeAttr, IntegerAttr(), loadOp.getL1HintAttr(),
loadOp.getL2HintAttr(), loadOp.getL3HintAttr());
loadOp.getLoc(), newVectorTy, loadOp.getTensorDesc(), ValueRange(),
DenseI64ArrayAttr(), packedAttr, transposeAttr, IntegerAttr(),
loadOp.getL1HintAttr(), loadOp.getL2HintAttr(),
loadOp.getL3HintAttr());
rewriter.replaceAllUsesWith(op.getResult(), newLoadOp.getResult());
}

Expand Down
18 changes: 8 additions & 10 deletions lib/Transforms/RemoveSingleElemVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ namespace {

struct VectorExtractOpConversion final
: public mlir::OpConversionPattern<mlir::vector::ExtractOp> {
using mlir::OpConversionPattern<
mlir::vector::ExtractOp>::OpConversionPattern;
using mlir::OpConversionPattern<mlir::vector::ExtractOp>::OpConversionPattern;

mlir::LogicalResult
matchAndRewrite(mlir::vector::ExtractOp extractOp, OpAdaptor adaptor,
Expand Down Expand Up @@ -84,8 +83,8 @@ struct VectorExtractStridedSliceConversion final

// We only convert ops extracting a single element from a 1D vector.
if (resType.getNumElements() == 1 && srcVector.getType().getRank() == 1) {
rewriter.replaceOpWithNewOp<mlir::vector::ExtractOp>(
extractOp, srcVector, offsets[0]);
rewriter.replaceOpWithNewOp<mlir::vector::ExtractOp>(extractOp, srcVector,
offsets[0]);
return mlir::success();
}
return mlir::failure();
Expand Down Expand Up @@ -122,9 +121,8 @@ struct VectorizableOpPattern final
};

template <typename OpTy>
static mlir::Value
createInsertOps(OpTy op, mlir::ValueRange operands,
mlir::ConversionPatternRewriter &rewriter) {
static mlir::Value createInsertOps(OpTy op, mlir::ValueRange operands,
mlir::ConversionPatternRewriter &rewriter) {
auto loc = op.getLoc();
auto type = op.getType();
auto elemType = type.getElementType();
Expand All @@ -139,8 +137,7 @@ createInsertOps(OpTy op, mlir::ValueRange operands,
mlir::Value newOp =
rewriter.create<mlir::arith::ConstantOp>(loc, type, denseAttr);
for (auto [i, opr] : llvm::enumerate(operands)) {
newOp =
rewriter.create<mlir::vector::InsertOp>(loc, opr, newOp, i);
newOp = rewriter.create<mlir::vector::InsertOp>(loc, opr, newOp, i);
}
return newOp;
}
Expand Down Expand Up @@ -267,7 +264,8 @@ struct RemoveSingleElemVectorPass final
return mlir::Value();

return builder
.create<mlir::vector::ExtractOp>(loc, inputs[0], builder.getIndexAttr(0))
.create<mlir::vector::ExtractOp>(loc, inputs[0],
builder.getIndexAttr(0))
.getResult();
};

Expand Down
25 changes: 12 additions & 13 deletions test/Conversion/XeTileToXeGPU/sg_scattered_ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,18 @@ gpu.module @test {
//CHECK: %[[cast_1:.*]] = memref.cast %[[arg2]] : memref<*xf32> to memref<?xf32>
//CHECK: %[[block_id_x:.*]] = gpu.block_id x
//CHECK: %[[r0:.*]] = arith.muli %[[block_id_x]], %[[c1024]] : index
//CHECK: %[[r1:.*]] = vector.splat %[[r0]] : vector<1x16xindex>
//CHECK: %[[r2:.*]] = vector.shape_cast %[[r1]] : vector<1x16xindex> to vector<16xindex>
//CHECK: %[[r3:.*]] = xegpu.create_tdesc %[[cast]], %[[r2]] : memref<?xf32>, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
//CHECK: %[[r4:.*]] = xegpu.load %[[r3]], %[[cst]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}> : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32>
//CHECK: %[[r5:.*]] = vector.shape_cast %[[r4]] : vector<16xf32> to vector<1x16xf32>
//CHECK: %[[r6:.*]] = xegpu.create_tdesc %[[cast_0]], %[[r2]] : memref<?xf32>, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
//CHECK: %[[r7:.*]] = xegpu.load %[[r6]], %[[cst]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}> : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32>
//CHECK: %[[r8:.*]] = vector.shape_cast %[[r7]] : vector<16xf32> to vector<1x16xf32>
//CHECK: %[[r9:.*]] = arith.addf %[[r5]], %[[r8]] : vector<1x16xf32>
//CHECK: %[[r10:.*]] = xegpu.create_tdesc %[[cast_1]], %[[r2]] : memref<?xf32>, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
//CHECK: %[[r11:.*]] = vector.shape_cast %[[r9]] : vector<1x16xf32> to vector<16xf32>
//CHECK: xegpu.store %[[r11]], %[[r10]], %[[cst]] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<write_back>, l3_hint = #xegpu.cache_hint<write_back>}> : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>
//CHECK: xegpu.store %[[r11]], %[[r10]], %[[cst]] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<write_back>, l3_hint = #xegpu.cache_hint<write_back>}> : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>
//CHECK: %[[r1:.*]] = vector.broadcast %[[r0]] : index to vector<16xindex>
//CHECK: %[[r2:.*]] = xegpu.create_tdesc %[[cast]], %[[r1]] : memref<?xf32>, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
//CHECK: %[[r3:.*]] = xegpu.load %[[r2]], %[[cst]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}> : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32>
//CHECK: %[[r4:.*]] = vector.shape_cast %[[r3]] : vector<16xf32> to vector<1x16xf32>
//CHECK: %[[r5:.*]] = xegpu.create_tdesc %[[cast_0]], %[[r1]] : memref<?xf32>, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
//CHECK: %[[r6:.*]] = xegpu.load %[[r5]], %[[cst]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}> : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32>
//CHECK: %[[r7:.*]] = vector.shape_cast %[[r6]] : vector<16xf32> to vector<1x16xf32>
//CHECK: %[[r8:.*]] = arith.addf %[[r4]], %[[r7]] : vector<1x16xf32>
//CHECK: %[[r9:.*]] = xegpu.create_tdesc %[[cast_1]], %[[r1]] : memref<?xf32>, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
//CHECK: %[[r10:.*]] = vector.shape_cast %[[r8]] : vector<1x16xf32> to vector<16xf32>
//CHECK: xegpu.store %[[r10]], %[[r9]], %[[cst]] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<write_back>, l3_hint = #xegpu.cache_hint<write_back>}> : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>
//CHECK: xegpu.store %[[r10]], %[[r9]], %[[cst]] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<write_back>, l3_hint = #xegpu.cache_hint<write_back>}> : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>
%c1024 = arith.constant 1024 : index
%cst = arith.constant dense<true> : vector<1x32xi1>
%cast = memref.cast %arg0 : memref<*xf32> to memref<?xf32>
Expand Down
2 changes: 1 addition & 1 deletion test/Dialect/NDArray/Extensions/lit.local.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
local_excludes = ['mesh-spmdization.mlir']

if(not config.imex_enable_excluded_tests):
config.excludes.update(local_excludes)
config.excludes.update(local_excludes)
Loading
Loading