Skip to content

Commit 205fdfd

Browse files
committed
Clean up tests
1 parent 10643dc commit 205fdfd

File tree

8 files changed

+83
-181
lines changed

8 files changed

+83
-181
lines changed

mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,6 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
288288
llvm::reverse(llvm::seq<int64_t>(0, sgLayoutInt.size())));
289289
}
290290

291-
// Validate order
292291
if (order.size() != sgLayoutInt.size()) {
293292
return failure();
294293
}

mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp

Lines changed: 31 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,60 +1235,63 @@ struct WgToSgVectorTransposeOp
12351235
if (!layout || !layout.isForWorkgroup())
12361236
return failure();
12371237

1238-
// Get the source layout for validation
12391238
xegpu::DistributeLayoutAttr sourceLayout =
12401239
xegpu::getDistributeLayoutAttr(op.getVector());
12411240
if (!sourceLayout || !sourceLayout.isForWorkgroup())
12421241
return failure();
12431242

1244-
// Validate that result layout is transpose of source layout
12451243
SmallVector<int64_t> sourceSgLayout =
12461244
sourceLayout.getEffectiveSgLayoutAsInt();
12471245
SmallVector<int64_t> sourceSgData = sourceLayout.getEffectiveSgDataAsInt();
12481246
SmallVector<int64_t> resultSgLayout = layout.getEffectiveSgLayoutAsInt();
12491247
SmallVector<int64_t> resultSgData = layout.getEffectiveSgDataAsInt();
1248+
DenseI32ArrayAttr sourceOrder = sourceLayout.getOrder();
1249+
DenseI32ArrayAttr resultOrder = layout.getOrder();
12501250

1251-
ArrayRef<int64_t> permutation = op.getPermutation();
1252-
1253-
// Check that sgLayout and sgData are properly transposed
1254-
if (sourceSgLayout.size() != resultSgLayout.size() ||
1255-
sourceSgData.size() != resultSgData.size() ||
1256-
sourceSgLayout.size() != permutation.size()) {
1251+
if (!sourceOrder || !resultOrder) {
12571252
return rewriter.notifyMatchFailure(
1258-
op, "Source and result layouts must have same rank as permutation");
1253+
op, "Both source and result must have order attributes");
12591254
}
12601255

1261-
// Validate sgLayout transpose
1262-
for (size_t i = 0; i < permutation.size(); ++i) {
1263-
int64_t srcDim = permutation[i];
1264-
if (srcDim < 0 || srcDim >= static_cast<int64_t>(sourceSgLayout.size())) {
1265-
return rewriter.notifyMatchFailure(op, "Invalid permutation index");
1266-
}
1267-
if (resultSgLayout[i] != sourceSgLayout[srcDim]) {
1268-
return rewriter.notifyMatchFailure(
1269-
op, "Result sgLayout is not transpose of source sgLayout according "
1270-
"to permutation");
1271-
}
1256+
SmallVector<int64_t> sourceOrderVec = llvm::to_vector(
1257+
llvm::map_range(sourceOrder.asArrayRef(),
1258+
[](int32_t idx) { return static_cast<int64_t>(idx); }));
1259+
SmallVector<int64_t> resultOrderVec = llvm::to_vector(
1260+
llvm::map_range(resultOrder.asArrayRef(),
1261+
[](int32_t idx) { return static_cast<int64_t>(idx); }));
1262+
1263+
ArrayRef<int64_t> permutation = op.getPermutation();
1264+
size_t expectedSize = permutation.size();
1265+
if (sourceSgLayout.size() != expectedSize ||
1266+
sourceSgData.size() != expectedSize ||
1267+
resultSgLayout.size() != expectedSize ||
1268+
resultSgData.size() != expectedSize ||
1269+
sourceOrderVec.size() != expectedSize ||
1270+
resultOrderVec.size() != expectedSize) {
1271+
return rewriter.notifyMatchFailure(
1272+
op, "All layouts and permutation must have the same rank");
12721273
}
12731274

1274-
// Validate sgData transpose
1275+
// Check that sgLayout, sgData & order are properly transposed for operand
1276+
// and result
12751277
for (size_t i = 0; i < permutation.size(); ++i) {
12761278
int64_t srcDim = permutation[i];
1277-
if (resultSgData[i] != sourceSgData[srcDim]) {
1279+
if (resultSgLayout[i] != sourceSgLayout[srcDim] ||
1280+
resultSgData[i] != sourceSgData[srcDim] ||
1281+
resultOrderVec[i] != sourceOrderVec[srcDim]) {
12781282
return rewriter.notifyMatchFailure(
1279-
op, "Result sgData is not transpose of source sgData according to "
1280-
"permutation");
1283+
op, "Result layout is not a valid transpose of source layout "
1284+
"according to permutation");
12811285
}
12821286
}
12831287

12841288
SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
12851289
VectorType newResultType =
12861290
VectorType::get(sgShape, resultType.getElementType());
1287-
12881291
SmallVector<Value> newTransposeOps;
12891292
for (auto src : adaptor.getVector()) {
12901293
auto newTranspose = vector::TransposeOp::create(
1291-
rewriter, op.getLoc(), newResultType, src, op.getPermutation());
1294+
rewriter, op.getLoc(), newResultType, src, permutation);
12921295
if (!layout.getEffectiveLaneLayoutAsInt().empty() ||
12931296
!layout.getEffectiveInstDataAsInt().empty())
12941297
xegpu::setDistributeLayoutAttr(newTranspose->getResult(0),
@@ -1444,7 +1447,8 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
14441447
});
14451448

14461449
target.addDynamicallyLegalOp<vector::ShapeCastOp, vector::StepOp,
1447-
vector::TransposeOp, vector::BroadcastOp>(
1450+
vector::TransposeOp, vector::BroadcastOp,
1451+
vector::MultiDimReductionOp>(
14481452
[=](Operation *op) -> bool {
14491453
// Check for either a SliceAttr or LayoutAttr on the result.
14501454
auto layout = xegpu::getDistributeLayoutAttr(op->getResult(0));
@@ -1463,16 +1467,6 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
14631467
return isLegal(layout);
14641468
});
14651469

1466-
target.addDynamicallyLegalOp<vector::BroadcastOp>(
1467-
[=](vector::BroadcastOp op) -> bool {
1468-
return isLegal(xegpu::getDistributeLayoutAttr(op.getResult()));
1469-
});
1470-
1471-
target.addDynamicallyLegalOp<vector::MultiDimReductionOp>(
1472-
[=](vector::MultiDimReductionOp op) -> bool {
1473-
return isLegal(xegpu::getDistributeLayoutAttr(op.getResult()));
1474-
});
1475-
14761470
target.addDynamicallyLegalOp<xegpu::ConvertLayoutOp>(
14771471
[=](xegpu::ConvertLayoutOp op) -> bool {
14781472
return isLegal(op.getInputLayout()) && isLegal(op.getTargetLayout());

mlir/test/Dialect/XeGPU/xegpu-attr-interface.mlir

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ gpu.module @test {
1010
// CHECK-DAG: %[[BASE:.*]] = vector.step : vector<32xindex>
1111
// CHECK-DAG: %[[CAST:.*]] = vector.broadcast %[[MOD]] : index to vector<32xindex>
1212
// CHECK-DAG: %[[ADD:.*]] = arith.addi %[[BASE]], %[[CAST]] : vector<32xindex>
13-
// CHECK-DAG: %[[RESULT:.*]] = builtin.unrealized_conversion_cast %[[ADD]] : vector<32xindex> to vector<128xindex>
1413
%step = vector.step {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [4, 8], sg_data = [32, 32]>, dims = [1]>}: vector<128xindex>
1514
gpu.return %step : vector<128xindex>
1615
}
@@ -25,7 +24,6 @@ gpu.module @test {
2524
// CHECK-DAG: %[[BASE:.*]] = vector.step : vector<32xindex>
2625
// CHECK-DAG: %[[CAST:.*]] = vector.broadcast %[[MOD]] : index to vector<32xindex>
2726
// CHECK-DAG: %[[ADD:.*]] = arith.addi %[[BASE]], %[[CAST]] : vector<32xindex>
28-
// CHECK-DAG: %[[RESULT:.*]] = builtin.unrealized_conversion_cast %[[ADD]] : vector<32xindex> to vector<128xindex>
2927
%0 = vector.step {layout_result_0 = #xegpu.slice<#xegpu.slice<#xegpu.layout<sg_layout = [4, 8, 1], sg_data = [32, 32, 1]>, dims = [2]>, dims = [1]>} : vector<128xindex>
3028
gpu.return %0 : vector<128xindex>
3129
}

mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-elemwise.mlir

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,12 @@ gpu.module @test_elementwise_ops {
166166
%load_b = xegpu.load_nd %tdesc_b
167167
: !xegpu.tensor_desc<24x32xf32, #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>>
168168
-> vector<24x32xf32>
169-
// CHECK-COUNT-12: arith.negf {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>}
170-
// CHECK-SAME-COUNT-12: : vector<2x2xf32>
169+
// CHECK-COUNT-12: arith.negf {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>} : vector<2x2xf32>
171170
// CHECK-NOT: arith.negf
172171
%negf = arith.negf %load_a
173172
{layout_result_0 = #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>}
174173
: vector<24x32xf32>
175-
// CHECK-COUNT-12: math.powf {{.*}}, {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>}
176-
// CHECK-SAME-COUNT-12: : vector<2x2xf32>
174+
// CHECK-COUNT-12: math.powf {{.*}}, {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>} : vector<2x2xf32>
177175
// CHECK-NOT: math.powf
178176
%powf = math.powf %load_a, %load_b
179177
{layout_result_0 = #xegpu.layout<sg_layout = [4, 4], sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>}

mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-rr.mlir

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,7 @@ gpu.module @test_round_robin_assignment {
44
// CHECK-LABEL: create_nd_tdesc
55
// CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
66
gpu.func @create_nd_tdesc(%src: memref<256x128xf32>) {
7-
// CHECK: %[[SGID:.*]] = gpu.subgroup_id : index
8-
// CHECK: %[[C4:.*]] = arith.constant 4 : index
9-
// CHECK: %[[IDX:.*]] = index.remu %[[SGID]], %[[C4]]
10-
// CHECK: %[[IDY_DIV:.*]] = index.divu %[[SGID]], %[[C4]]
11-
// CHECK: %[[C8:.*]] = arith.constant 8 : index
12-
// CHECK: %[[IDY:.*]] = index.remu %[[IDY_DIV]], %[[C8]]
13-
// CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<256x128xf32>
14-
// CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
7+
// CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<256x128xf32> -> !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
158
// CHECK-NOT: xegpu.create_nd_tdesc
169
%tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<256x128xf32>
1710
-> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
@@ -46,9 +39,7 @@ gpu.module @test_round_robin_assignment {
4639
gpu.func @load_nd_tdesc(%src: memref<256x128xf32>) {
4740
%tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<256x128xf32>
4841
-> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
49-
// CHECK-COUNT-4: xegpu.load_nd %{{.*}}
50-
// CHECK-SAME-COUNT-4: : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
51-
// CHECK-SAME-COUNT-4: -> vector<16x16xf32>
42+
// CHECK-COUNT-4: xegpu.load_nd %{{.*}} : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<16x16xf32>
5243
// CHECK-NOT: xegpu.load_nd
5344
%load = xegpu.load_nd %tdesc
5445
: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
@@ -61,8 +52,7 @@ gpu.module @test_round_robin_assignment {
6152
gpu.func @store_nd(%src: memref<256x128xf32>) {
6253
%tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<256x128xf32>
6354
-> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
64-
// CHECK-COUNT-4: xegpu.store_nd %{{.*}}, %{{.*}}
65-
// CHECK-SAME-COUNT-4: : vector<16x16xf32>, !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
55+
// CHECK-COUNT-4: xegpu.store_nd %{{.*}}, %{{.*}} : vector<16x16xf32>, !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
6656
// CHECK-NOT: xegpu.store_nd
6757
%load = xegpu.load_nd %tdesc
6858
: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
@@ -77,8 +67,7 @@ gpu.module @test_round_robin_assignment {
7767
gpu.func @update_nd(%src: memref<256x128xf32>){
7868
%tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<256x128xf32>
7969
-> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
80-
// CHECK-COUNT-4: xegpu.update_nd_offset %{{.*}}, [0, 16]
81-
// CHECK-SAME-COUNT-4: : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
70+
// CHECK-COUNT-4: xegpu.update_nd_offset %{{.*}}, [0, 16] : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
8271
// CHECK-NOT: xegpu.update_nd_offset
8372
%update = xegpu.update_nd_offset %tdesc, [0, 16]
8473
: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
@@ -88,13 +77,9 @@ gpu.module @test_round_robin_assignment {
8877
// CHECK-LABEL: dpas
8978
// CHECK-SAME: (%[[ARG_0:.*]]: memref<256x128xf16>, %[[ARG_1:.*]]: memref<128x256xf16>)
9079
gpu.func @dpas(%a: memref<256x128xf16>, %b: memref<128x256xf16>) {
91-
// CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<256x128xf16>
92-
// CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
93-
// CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_1]][%{{.*}}, %{{.*}}] : memref<128x256xf16>
94-
// CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>>
95-
// CHECK-COUNT-16: xegpu.dpas %{{.*}}, %{{.*}}
96-
// CHECK-SAME-COUNT-16: {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
97-
// CHECK-SAME-COUNT-16: : vector<16x16xf16>, vector<16x16xf16> -> vector<16x16xf32>
80+
// CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<256x128xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
81+
// CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_1]][%{{.*}}, %{{.*}}] : memref<128x256xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>>
82+
// CHECK-COUNT-16: xegpu.dpas %{{.*}}, %{{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<16x16xf16>, vector<16x16xf16> -> vector<16x16xf32>
9883
// CHECK-NOT: xegpu.dpas
9984
%tdesc_a = xegpu.create_nd_tdesc %a[0, 0] : memref<256x128xf16>
10085
-> !xegpu.tensor_desc<256x128xf16, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
@@ -115,8 +100,7 @@ gpu.module @test_round_robin_assignment {
115100
// CHECK-LABEL: prefetch_nd_tdesc
116101
// CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
117102
gpu.func @prefetch_nd_tdesc(%src: memref<256x128xf32>) {
118-
// CHECK-COUNT-4: xegpu.prefetch_nd %{{.*}}
119-
// CHECK-SAME-COUNT-4: !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
103+
// CHECK-COUNT-4: xegpu.prefetch_nd %{{.*}} : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
120104
// CHECK-NOT: xegpu.prefetch_nd
121105
%tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<256x128xf32>
122106
-> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 16], lane_layout = [1, 16], lane_data = [1, 1]>>
@@ -133,9 +117,7 @@ gpu.module @test_round_robin_assignment {
133117
%load = xegpu.load_nd %tdesc
134118
: !xegpu.tensor_desc<128x1xf32, #xegpu.layout<sg_layout = [4, 1], sg_data = [16, 1], lane_layout = [8, 1], lane_data = [1, 1]>>
135119
-> vector<128x1xf32>
136-
// CHECK-COUNT-2: vector.broadcast {{.*}}
137-
// CHECK-SAME-COUNT-2: {layout_result_0 = #xegpu.layout<lane_layout = [8, 1], lane_data = [1, 1]>}
138-
// CHECK-SAME-COUNT-2: : vector<16x1xf32> to vector<16x32xf32>
120+
// CHECK-COUNT-2: vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<lane_layout = [8, 1], lane_data = [1, 1]>} : vector<16x1xf32> to vector<16x32xf32>
139121
// CHECK-NOT: vector.broadcast
140122
%broadcast = vector.broadcast %load
141123
{layout_result_0 = #xegpu.layout<sg_layout = [4, 1], sg_data = [16, 32], lane_layout = [8, 1], lane_data = [1, 1]>}

0 commit comments

Comments
 (0)