Skip to content

Commit b3f2a4a

Browse files
committed
adding anchor layout for load/store/prefetch_nd and dpas
1 parent 5af0398 commit b3f2a4a

File tree

11 files changed

+72
-52
lines changed

11 files changed

+72
-52
lines changed

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,8 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
268268
OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
269269
OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
270270
OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
271-
OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
271+
OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint,
272+
OptionalAttr<DistributeLayoutAttr>:$anchor_layout);
272273

273274
let extraClassDeclaration = extraBaseClassDeclaration # [{
274275
xegpu::TensorDescType getTensorDescType() {
@@ -360,7 +361,8 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
360361
OptionalAttr<DenseI64ArrayAttr>: $transpose,
361362
OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
362363
OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
363-
OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
364+
OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint,
365+
OptionalAttr<DistributeLayoutAttr>:$anchor_layout);
364366

365367
let results = (outs XeGPU_ValueType: $value);
366368

@@ -454,7 +456,8 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
454456
OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
455457
OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
456458
OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
457-
OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
459+
OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint,
460+
OptionalAttr<DistributeLayoutAttr>:$anchor_layout);
458461

459462
let extraClassDeclaration = extraBaseClassDeclaration # [{
460463
VectorType getValueType() {
@@ -1046,7 +1049,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>]> {
10461049
"xegpu::CachePolicyAttr": $l1_hint,
10471050
"xegpu::CachePolicyAttr": $l2_hint,
10481051
"xegpu::CachePolicyAttr": $l3_hint,
1049-
"xegpu::DistributeLayoutAttr": $layout)>
1052+
"xegpu::DistributeLayoutAttr": $anchor_layout)>
10501053
];
10511054

10521055
let hasVerifier = 1;
@@ -1133,7 +1136,11 @@ def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>]
11331136
let arguments = (ins
11341137
XeGPU_DpasOprType : $lhs,
11351138
XeGPU_DpasOprType : $rhs,
1136-
Optional<XeGPU_DpasResType>: $acc);
1139+
Optional<XeGPU_DpasResType>: $acc,
1140+
OptionalAttr<DistributeLayoutAttr>:$anchor_layout_a,
1141+
OptionalAttr<DistributeLayoutAttr>:$anchor_layout_b,
1142+
OptionalAttr<DistributeLayoutAttr>:$anchor_layout_cd
1143+
);
11371144
let results = (outs XeGPU_DpasResType: $result);
11381145

11391146
let extraClassDeclaration = [{
@@ -1319,7 +1326,7 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>,
13191326
Variadic<Index>: $offsets,
13201327
DenseI64ArrayAttr: $const_offsets,
13211328
OptionalAttr<UnitAttr>:$subgroup_block_io,
1322-
OptionalAttr<DistributeLayoutAttr>:$layout
1329+
OptionalAttr<DistributeLayoutAttr>:$anchor_layout
13231330
);
13241331
let results = (outs AnyTypeOf<[XeGPU_ValueType, XeGPU_ScalarType]>:$res);
13251332
let assemblyFormat = [{
@@ -1338,7 +1345,7 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>,
13381345
- `subgroup_block_io`: [optional] An attribute indicating that the operation can be
13391346
lowered to a subgroup block load. When this attribute is present,
13401347
the offsets are subgroup-uniform across all lanes.
1341-
- `layout`: [optional] An attribute for guiding distributions among
1348+
- `anchor_layout`: [optional] An attribute for guiding distributions among
13421349
subgroups and/or work-items. It currently can accept either
13431350
LayoutAttr or SliceAttr.
13441351
Results:
@@ -1347,7 +1354,7 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>,
13471354

13481355
let builders = [
13491356
OpBuilder<(ins "Type":$res, "TypedValue<MemDescType>": $mem_desc,
1350-
"llvm::ArrayRef<OpFoldResult>": $offsets, "DistributeLayoutAttr": $layout)>,
1357+
"llvm::ArrayRef<OpFoldResult>": $offsets, "DistributeLayoutAttr": $anchor_layout)>,
13511358
];
13521359
let extraClassDeclaration = [{
13531360
SmallVector<OpFoldResult> getMixedOffsets() {
@@ -1373,7 +1380,7 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
13731380
Variadic<Index>: $offsets,
13741381
DenseI64ArrayAttr: $const_offsets,
13751382
OptionalAttr<UnitAttr>:$subgroup_block_io,
1376-
OptionalAttr<DistributeLayoutAttr>:$layout
1383+
OptionalAttr<DistributeLayoutAttr>:$anchor_layout
13771384
);
13781385
let assemblyFormat = [{ $data `,` $mem_desc `` custom<DynamicIndexList>($offsets, $const_offsets)
13791386
prop-dict attr-dict `` `:` type(operands)}];
@@ -1389,13 +1396,13 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
13891396
- `subgroup_block_io`: [optional] An attribute indicating that the operation can be
13901397
lowered to a subgroup block store. When this attribute is present,
13911398
the offsets are subgroup-uniform across all lanes.
1392-
- `layout`: [optional] An attribute for guiding distributions among
1399+
- `anchor_layout`: [optional] An attribute for guiding distributions among
13931400
subgroups and/or work-items. It currently can accept either
13941401
LayoutAttr or SliceAttr.
13951402
}];
13961403
let builders = [
13971404
OpBuilder<(ins "Value" : $data, "TypedValue<MemDescType>": $mem_desc,
1398-
"llvm::ArrayRef<OpFoldResult>": $offsets, "DistributeLayoutAttr": $layout)>,
1405+
"llvm::ArrayRef<OpFoldResult>": $offsets, "DistributeLayoutAttr": $anchor_layout)>,
13991406
];
14001407
let extraClassDeclaration = [{
14011408
SmallVector<OpFoldResult> getMixedOffsets() {

mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ using std::optional;
2222
namespace mlir {
2323
namespace xegpu {
2424

25+
//#include "mlir/Dialect/XeGPU/IR/XeGPUOpInterface.cpp.inc"
26+
2527
void XeGPUDialect::initialize() {
2628
addTypes<
2729
#define GET_TYPEDEF_LIST

mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ void PrefetchNdOp::build(OpBuilder &builder, OperationState &state,
465465
xegpu::CachePolicyAttr l3_hint) {
466466

467467
return build(builder, state, tensorDesc, ValueRange(), DenseI64ArrayAttr(),
468-
l1_hint, l2_hint, l3_hint);
468+
l1_hint, l2_hint, l3_hint, /*anchor_layout=*/nullptr);
469469
}
470470

471471
void PrefetchNdOp::build(OpBuilder &builder, OperationState &state,
@@ -480,7 +480,7 @@ void PrefetchNdOp::build(OpBuilder &builder, OperationState &state,
480480
auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
481481

482482
build(builder, state, tensorDesc, dynamicOffsets, staticOffsetsAttr, l1_hint,
483-
l2_hint, l3_hint);
483+
l2_hint, l3_hint, /*anchor_layout=*/nullptr);
484484
}
485485

486486
LogicalResult PrefetchNdOp::verify() {
@@ -519,7 +519,7 @@ void LoadNdOp::build(OpBuilder &builder, OperationState &state, Type retType,
519519

520520
return build(builder, state, retType, tensorDesc, ValueRange(),
521521
DenseI64ArrayAttr(), packed, transpose, l1_hint, l2_hint,
522-
l3_hint);
522+
l3_hint, /*anchor_layout=*/nullptr);
523523
}
524524

525525
void LoadNdOp::build(OpBuilder &builder, OperationState &state, Type retType,
@@ -535,7 +535,8 @@ void LoadNdOp::build(OpBuilder &builder, OperationState &state, Type retType,
535535
auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
536536

537537
build(builder, state, retType, tensorDesc, dynamicOffsets, staticOffsetsAttr,
538-
packed, transpose, l1_hint, l2_hint, l3_hint);
538+
packed, transpose, l1_hint, l2_hint, l3_hint,
539+
/*anchor_layout=*/nullptr);
539540
}
540541

541542
LogicalResult LoadNdOp::verify() {
@@ -638,7 +639,8 @@ void StoreNdOp::build(OpBuilder &builder, OperationState &state, Value value,
638639
xegpu::CachePolicyAttr l3_hint) {
639640

640641
return build(builder, state, value, tensorDesc, ValueRange(),
641-
DenseI64ArrayAttr(), l1_hint, l2_hint, l3_hint);
642+
DenseI64ArrayAttr(), l1_hint, l2_hint, l3_hint,
643+
/*anchor_layout=*/nullptr);
642644
}
643645

644646
void StoreNdOp::build(OpBuilder &builder, OperationState &state, Value value,
@@ -653,7 +655,7 @@ void StoreNdOp::build(OpBuilder &builder, OperationState &state, Value value,
653655
auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
654656

655657
build(builder, state, value, tensorDesc, dynamicOffsets, staticOffsetsAttr,
656-
l1_hint, l2_hint, l3_hint);
658+
l1_hint, l2_hint, l3_hint, /*anchor_layout=*/nullptr);
657659
}
658660

659661
LogicalResult StoreNdOp::verify() {
@@ -876,7 +878,7 @@ void LoadGatherOp::build(OpBuilder &builder, OperationState &state,
876878
xegpu::CachePolicyAttr l2_hint,
877879
xegpu::CachePolicyAttr l3_hint) {
878880
build(builder, state, valueType, source, Value(), mask, IntegerAttr(),
879-
l1_hint, l2_hint, l3_hint, /*layout=*/nullptr);
881+
l1_hint, l2_hint, l3_hint, /*anchor_layout=*/nullptr);
880882
}
881883

882884
void LoadGatherOp::build(OpBuilder &builder, OperationState &state,
@@ -892,7 +894,7 @@ void LoadGatherOp::build(OpBuilder &builder, OperationState &state,
892894
auto offset = vector::FromElementsOp::create(builder, loc, type, values);
893895

894896
build(builder, state, valueType, source, offset, mask, chunk_size, l1_hint,
895-
l2_hint, l3_hint, /*layout=*/nullptr);
897+
l2_hint, l3_hint, /*anchor_layout=*/nullptr);
896898
}
897899

898900
void LoadGatherOp::build(OpBuilder &builder, OperationState &state,
@@ -960,7 +962,7 @@ void StoreScatterOp::build(OpBuilder &builder, OperationState &state,
960962
xegpu::CachePolicyAttr l2_hint,
961963
xegpu::CachePolicyAttr l3_hint) {
962964
build(builder, state, value, dest, Value(), mask, IntegerAttr(), l1_hint,
963-
l2_hint, l3_hint, /*layout=*/nullptr);
965+
l2_hint, l3_hint, /*anchor_layout=*/nullptr);
964966
}
965967

966968
void StoreScatterOp::build(OpBuilder &builder, OperationState &state,
@@ -978,7 +980,7 @@ void StoreScatterOp::build(OpBuilder &builder, OperationState &state,
978980

979981
// Call the correct builder overload that does not expect result types.
980982
build(builder, state, value, dest, offset, mask, chunk_size, l1_hint, l2_hint,
981-
l3_hint, /*layout=*/nullptr);
983+
l3_hint, /*anchor_layout=*/nullptr);
982984
}
983985

984986
void StoreScatterOp::build(
@@ -1155,7 +1157,8 @@ LogicalResult LoadMatrixOp::verify() {
11551157
MemDescType mdescTy = getMemDesc().getType();
11561158

11571159
return IsValidMatrixOpParams(resTy, mdescTy, subgroup_block_io,
1158-
getLayoutAttr(), [&]() { return emitError(); });
1160+
getAnchorLayoutAttr(),
1161+
[&]() { return emitError(); });
11591162
}
11601163

11611164
//===----------------------------------------------------------------------===//
@@ -1179,7 +1182,8 @@ LogicalResult StoreMatrixOp::verify() {
11791182
UnitAttr subgroup_block_io = getSubgroupBlockIoAttr();
11801183
MemDescType mdescTy = getMemDesc().getType();
11811184
return IsValidMatrixOpParams(dataTy, mdescTy, subgroup_block_io,
1182-
getLayoutAttr(), [&]() { return emitError(); });
1185+
getAnchorLayoutAttr(),
1186+
[&]() { return emitError(); });
11831187
}
11841188

11851189
namespace mlir {

mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,7 @@ struct LoadMatrixDistribution final : public gpu::WarpDistributionPattern {
965965
SmallVector<Value> offsetsAsValues =
966966
vector::getAsValues(rewriter, matrixOp.getLoc(), offsets);
967967

968-
auto layout = matrixOp.getLayoutAttr();
968+
auto layout = matrixOp.getAnchorLayoutAttr();
969969
if (!layout)
970970
return rewriter.notifyMatchFailure(
971971
matrixOp, "the matrix operation lacks layout attribute");
@@ -1041,7 +1041,7 @@ struct StoreMatrixDistribution final : public gpu::WarpDistributionPattern {
10411041
SmallVector<Value> offsetsAsValues =
10421042
vector::getAsValues(rewriter, matrixOp.getLoc(), offsets);
10431043

1044-
auto layout = matrixOp.getLayoutAttr();
1044+
auto layout = matrixOp.getAnchorLayoutAttr();
10451045
if (!layout)
10461046
return rewriter.notifyMatchFailure(
10471047
matrixOp, "the matrix operation lacks layout attribute");

mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -954,7 +954,7 @@ struct UnrollLoadMatrixOp : public UnrollPattern<xegpu::LoadMatrixOp> {
954954

955955
Type elemTy = valueTy.getElementType();
956956
ArrayRef<int64_t> shape = valueTy.getShape();
957-
auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
957+
auto layout = dyn_cast<xegpu::LayoutAttr>(op.getAnchorLayoutAttr());
958958

959959
VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
960960

@@ -993,7 +993,7 @@ struct UnrollStoreMatrixOp : public UnrollPattern<xegpu::StoreMatrixOp> {
993993
VectorType valueTy = llvm::dyn_cast<VectorType>(op.getData().getType());
994994
assert(valueTy && "the value type must be vector type!");
995995
ArrayRef<int64_t> shape = valueTy.getShape();
996-
auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
996+
auto layout = dyn_cast<xegpu::LayoutAttr>(op.getAnchorLayoutAttr());
997997

998998
SmallVector<Type> convertedValTypes =
999999
getUnrolledTypes(valueTy, *targetShape);

mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,16 @@ genOffsetsList(ConversionPatternRewriter &rewriter, OpType op,
8686
if (origOffsets.empty())
8787
return failure();
8888

89+
// if op is xegpu::CreateNdDescOp, call op.getLayoutAttr()
90+
xegpu::DistributeLayoutAttr layout;
91+
if constexpr (std::is_same_v<OpType, xegpu::LoadMatrixOp> ||
92+
std::is_same_v<OpType, xegpu::StoreMatrixOp>) {
93+
layout = op.getAnchorLayoutAttr();
94+
} else {
95+
layout = op.getLayoutAttr();
96+
}
97+
8998
// not applicable to ops without workgroup layout attributes
90-
xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
9199
if (!layout || !layout.isForWorkgroup())
92100
return failure();
93101

@@ -190,7 +198,7 @@ struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
190198
xegpu::TensorDescType tdescTy = op.getType();
191199
ArrayRef<int64_t> wgShape = tdescTy.getShape();
192200
Type elemTy = tdescTy.getElementType();
193-
xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
201+
xegpu::DistributeLayoutAttr layout = tdescTy.getLayoutAttr();
194202
SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
195203
auto newTdescTy =
196204
xegpu::TensorDescType::get(ctx, sgShape, elemTy, tdescTy.getEncoding(),
@@ -999,7 +1007,7 @@ struct WgToSgLoadMatrixOp : public OpConversionPattern<xegpu::LoadMatrixOp> {
9991007
assert(valueTy && "the value type must be vector type!");
10001008
Type elemTy = valueTy.getElementType();
10011009

1002-
xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
1010+
xegpu::DistributeLayoutAttr layout = op.getAnchorLayoutAttr();
10031011
SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
10041012
VectorType newResTy = VectorType::get(sgShape, elemTy);
10051013
SmallVector<Value> newOps;
@@ -1025,7 +1033,7 @@ struct WgToSgStoreMatrixOp : public OpConversionPattern<xegpu::StoreMatrixOp> {
10251033
if (failed(genOffsetsList(rewriter, op, offsetsList)))
10261034
return failure();
10271035

1028-
xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
1036+
xegpu::DistributeLayoutAttr layout = op.getAnchorLayoutAttr();
10291037
for (auto [v, offsets] : llvm::zip(adaptor.getData(), offsetsList))
10301038
xegpu::StoreMatrixOp::create(rewriter, op.getLoc(), v, op.getMemDesc(),
10311039
offsets, layout.dropSgLayoutAndData());
@@ -1409,12 +1417,12 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
14091417

14101418
target.addDynamicallyLegalOp<xegpu::LoadMatrixOp>(
14111419
[=](xegpu::LoadMatrixOp op) -> bool {
1412-
return isLegal(op.getLayoutAttr());
1420+
return isLegal(op.getAnchorLayoutAttr());
14131421
});
14141422

14151423
target.addDynamicallyLegalOp<xegpu::StoreMatrixOp>(
14161424
[=](xegpu::StoreMatrixOp op) -> bool {
1417-
return isLegal(op.getLayoutAttr());
1425+
return isLegal(op.getAnchorLayoutAttr());
14181426
});
14191427

14201428
target.addDynamicallyLegalOp<arith::ConstantOp>(

mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,11 @@ xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) {
135135

136136
// for LoadMatrixOp, the layout is attached to the property of the op
137137
if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(defOp))
138-
return loadOp.getLayoutAttr();
138+
return loadOp.getAnchorLayoutAttr();
139139

140140
// for StoreMatrixOp, the layout is attached to the property of the op
141141
if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(defOp))
142-
return storeOp.getLayoutAttr();
143-
142+
return storeOp.getAnchorLayoutAttr();
144143
std::string layoutName = getLayoutName(result);
145144
if (defOp->hasAttr(layoutName))
146145
return defOp->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
@@ -168,10 +167,10 @@ xegpu::getDistributeLayoutAttr(const OpOperand &opr) {
168167
Operation *op = opr.getOwner();
169168

170169
if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(op))
171-
return loadOp.getLayoutAttr();
170+
return loadOp.getAnchorLayoutAttr();
172171

173172
if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(op))
174-
return storeOp.getLayoutAttr();
173+
return storeOp.getAnchorLayoutAttr();
175174

176175
std::string layoutName = xegpu::getLayoutName(opr);
177176
if (op->hasAttr(layoutName))

mlir/test/Dialect/XeGPU/invalid.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -894,23 +894,23 @@ func.func @store_mem_desc_invalid_rank(%arg0: !xegpu.mem_desc<64xf16>, %arg1: ve
894894
// -----
895895
func.func @simt_store_matrix_vector_nonlinear(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1]>>, %arg1: vector<2x16xf32>) {
896896
// expected-error@+1 {{With subgroup_block_io, accessed data must be contiguous and coalesced}}
897-
xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} :
897+
xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, anchor_layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} :
898898
vector<2x16xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1]>>
899899
return
900900
}
901901

902902
// -----
903903
func.func @simt_store_matrix_vector_noncoalesced(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [1, 16]>>, %arg1: vector<16x2xf32>) {
904904
// expected-error@+1 {{With subgroup_block_io, the distributed dimensions must be contiguous}}
905-
xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>} :
905+
xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, anchor_layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>} :
906906
vector<16x2xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [1, 16]>>
907907
return
908908
}
909909

910910
// -----
911911
func.func @simt_store_matrix_vector_noncoalesced(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1], block = [1, 17]>>, %arg1: vector<16x2xf32>) {
912912
// expected-error@+1 {{With subgroup_block_io, the block shape must match the lane layout}}
913-
xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} :
913+
xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, anchor_layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} :
914914
vector<16x2xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1], block = [1, 17]>>
915915
return
916916
}

0 commit comments

Comments
 (0)