Skip to content

Commit d5d2713

Browse files
committed
save work
1 parent d06477e commit d5d2713

File tree

2 files changed

+21
-19
lines changed

2 files changed

+21
-19
lines changed

mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,11 +1475,12 @@ struct UpdateNdOffsetDistribution final : public gpu::WarpDistributionPattern {
14751475
subgroupOp, "warp result is not a xegpu::UpdateNdOffset op");
14761476
auto updateOp = operand->get().getDefiningOp<xegpu::UpdateNdOffsetOp>();
14771477
unsigned operandIdx = operand->getOperandNumber();
1478-
auto newTensorDescTy = dropLayouts(updateOp.getTensorDescType());
1478+
xegpu::TensorDescType newTensorDescTy =
1479+
dropLayouts(updateOp.getTensorDescType());
14791480

14801481
SmallVector<Value, 3> newYieldValues;
14811482
SmallVector<Type, 3> newYieldTypes;
1482-
for (auto operand : updateOp->getOperands()) {
1483+
for (Value operand : updateOp->getOperands()) {
14831484
newYieldValues.push_back(operand);
14841485
if (isa<xegpu::TensorDescType>(operand.getType())) {
14851486
newYieldTypes.push_back(newTensorDescTy);
@@ -1492,7 +1493,7 @@ struct UpdateNdOffsetDistribution final : public gpu::WarpDistributionPattern {
14921493
rewriter, subgroupOp, newYieldValues, newYieldTypes, newRetIndices);
14931494
rewriter.setInsertionPointAfter(newWarpOp);
14941495
SmallVector<Value> newUpdateOperands;
1495-
for (auto i : newRetIndices) {
1496+
for (size_t i : newRetIndices) {
14961497
if (isa<xegpu::TensorDescType>(newWarpOp.getResult(i).getType())) {
14971498
newUpdateOperands.push_back(resolveDistributedTy(
14981499
newWarpOp.getResult(i), newTensorDescTy, rewriter));
@@ -1519,7 +1520,7 @@ struct PrefetchNdDistribution final : public gpu::WarpDistributionPattern {
15191520
auto prefetchOp = dyn_cast_or_null<xegpu::PrefetchNdOp>(lastNode);
15201521
if (!prefetchOp)
15211522
return failure();
1522-
auto layout = prefetchOp.getTensorDescType().getLayoutAttr();
1523+
xegpu::LayoutAttr layout = prefetchOp.getTensorDescType().getLayoutAttr();
15231524
if (!layout)
15241525
return rewriter.notifyMatchFailure(
15251526
prefetchOp, "the source tensor descriptor lacks layout attribute");
@@ -1530,7 +1531,8 @@ struct PrefetchNdDistribution final : public gpu::WarpDistributionPattern {
15301531
gpu::WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndAppendReturns(
15311532
rewriter, subgroupOp, newYieldValues, newYieldTypes, newRetIndices);
15321533

1533-
auto newTensorDescTy = dropLayouts(prefetchOp.getTensorDescType());
1534+
xegpu::TensorDescType newTensorDescTy =
1535+
dropLayouts(prefetchOp.getTensorDescType());
15341536
rewriter.setInsertionPointAfter(newWarpOp);
15351537
SmallVector<Value> newPrefetchOperands = {resolveDistributedTy(
15361538
newWarpOp.getResult(newRetIndices[0]), newTensorDescTy, rewriter)};
@@ -1570,12 +1572,12 @@ struct GpuIndexOpDistribution final : public gpu::WarpDistributionPattern {
15701572
using gpu::WarpDistributionPattern::WarpDistributionPattern;
15711573
LogicalResult matchAndRewrite(gpu::WarpExecuteOnLane0Op subgroupOp,
15721574
PatternRewriter &rewriter) const override {
1573-
auto operand = getWarpResult(subgroupOp, llvm::IsaPred<IndexOp>);
1575+
OpOperand *operand = getWarpResult(subgroupOp, llvm::IsaPred<IndexOp>);
15741576
if (!operand)
15751577
return rewriter.notifyMatchFailure(subgroupOp,
15761578
"warp result is not a gpu index op");
1577-
auto indexOp = operand->template get().template getDefiningOp<IndexOp>();
1578-
unsigned operandIdx = operand->template getOperandNumber();
1579+
auto indexOp = operand->get().getDefiningOp<IndexOp>();
1580+
unsigned operandIdx = operand->getOperandNumber();
15791581
SmallVector<Value, 3> newYieldValues;
15801582
SmallVector<Type, 3> newYieldTypes;
15811583
for (auto operand : indexOp->template getOperands()) {
@@ -1587,7 +1589,7 @@ struct GpuIndexOpDistribution final : public gpu::WarpDistributionPattern {
15871589
rewriter, subgroupOp, newYieldValues, newYieldTypes, newRetIndices);
15881590
rewriter.setInsertionPointAfter(newWarpOp);
15891591
SmallVector<Value> newIndexOperands;
1590-
for (auto i : newRetIndices) {
1592+
for (size_t i : newRetIndices) {
15911593
newIndexOperands.push_back(newWarpOp.getResult(i));
15921594
}
15931595
auto newIndexOp = rewriter.create<IndexOp>(

mlir/test/Dialect/XeGPU/subgroup-distribution.mlir

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -162,14 +162,14 @@ gpu.func @create_nd_tdesc_non_memref(%arg0: ui64, %arg1: ui64,
162162
}
163163

164164
// -----
165-
// CHECK-LABEL: gpu.func @test_update_nd_offset_1d(
165+
// CHECK-LABEL: gpu.func @update_nd_offset_1d(
166166
// CHECK: %[[ARG0:[0-9a-zA-Z]+]]: memref<256xf32>) {
167167
// CHECK: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<1xf32>
168168
// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<256xf32> -> !xegpu.tensor_desc<16xf32>
169169
// CHECK: %[[T1:.*]] = xegpu.update_nd_offset %[[T0]], [%c32] : !xegpu.tensor_desc<16xf32>
170170
// CHECK: xegpu.store_nd %[[CST]], %[[T1]] : vector<1xf32>, !xegpu.tensor_desc<16xf32>
171171
gpu.module @test {
172-
gpu.func @test_update_nd_offset_1d(%arg0: memref<256xf32>){
172+
gpu.func @update_nd_offset_1d(%arg0: memref<256xf32>){
173173
%c0 = arith.constant 0 : index
174174
%c32 = arith.constant 32 : index
175175
%1 = arith.constant dense<1.000000e+00> : vector<16xf32>
@@ -181,14 +181,14 @@ gpu.func @test_update_nd_offset_1d(%arg0: memref<256xf32>){
181181
}
182182

183183
// -----
184-
// CHECK-LABEL: gpu.func @test_update_nd_offset_2d
184+
// CHECK-LABEL: gpu.func @update_nd_offset_2d
185185
// CHECK: %[[ARG0:[0-9a-zA-Z]+]]: memref<256x256xf32>) {
186186
// CHECK: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<16xf32>
187187
// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<256x256xf32> -> !xegpu.tensor_desc<16x16xf32>
188188
// CHECK: %[[T1:.*]] = xegpu.update_nd_offset %[[T0]], [%c32, %c32] : !xegpu.tensor_desc<16x16xf32>
189189
// CHECK: xegpu.store_nd %[[CST]], %[[T1]] : vector<16xf32>, !xegpu.tensor_desc<16x16xf32>
190190
gpu.module @test {
191-
gpu.func @test_update_nd_offset_2d(%arg0: memref<256x256xf32>){
191+
gpu.func @update_nd_offset_2d(%arg0: memref<256x256xf32>){
192192
%c0 = arith.constant 0 : index
193193
%c32 = arith.constant 32 : index
194194
%1 = arith.constant dense<1.000000e+00> : vector<16x16xf32>
@@ -200,12 +200,12 @@ gpu.func @test_update_nd_offset_2d(%arg0: memref<256x256xf32>){
200200
}
201201

202202
// -----
203-
// CHECK-LABEL: gpu.func @test_prefetch_2d
203+
// CHECK-LABEL: gpu.func @prefetch_2d
204204
// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<256x256xf16>) {
205205
// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<256x256xf16> -> !xegpu.tensor_desc<16x16xf16>
206206
// CHECK: xegpu.prefetch_nd %[[T0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16>
207207
gpu.module @test {
208-
gpu.func @test_prefetch_2d(%arg0: memref<256x256xf16>){
208+
gpu.func @prefetch_2d(%arg0: memref<256x256xf16>){
209209
%c0 = arith.constant 0 : index
210210
%0 = xegpu.create_nd_tdesc %arg0[%c0, %c0] : memref<256x256xf16> -> !xegpu.tensor_desc<16x16xf16>
211211
xegpu.prefetch_nd %0 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>: !xegpu.tensor_desc<16x16xf16>
@@ -214,12 +214,12 @@ gpu.func @test_prefetch_2d(%arg0: memref<256x256xf16>){
214214
}
215215

216216
// -----
217-
// CHECK-LABEL: gpu.func @test_prefetch_1d
217+
// CHECK-LABEL: gpu.func @prefetch_1d
218218
// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<256xf16>) {
219219
// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][%{{.*}}] : memref<256xf16> -> !xegpu.tensor_desc<16xf16>
220220
// CHECK: xegpu.prefetch_nd %[[T0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16xf16>
221221
gpu.module @test {
222-
gpu.func @test_prefetch_1d(%arg0: memref<256xf16>){
222+
gpu.func @prefetch_1d(%arg0: memref<256xf16>){
223223
%c0 = arith.constant 0 : index
224224
%0 = xegpu.create_nd_tdesc %arg0[%c0] : memref<256xf16> -> !xegpu.tensor_desc<16xf16>
225225
xegpu.prefetch_nd %0 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>: !xegpu.tensor_desc<16xf16>
@@ -229,7 +229,7 @@ gpu.func @test_prefetch_1d(%arg0: memref<256xf16>){
229229

230230

231231
// -----
232-
// CHECK-LABEL: gpu.func @test_gemm_loop
232+
// CHECK-LABEL: gpu.func @gemm_loop
233233
// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<1024x1024xbf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<1024x1024xbf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<1024x1024xf32>) {
234234
// CHECK: %[[BLOCK_ID_Y:.*]] = gpu.block_id y
235235
// CHECK: %[[Y_COORD:.*]] = arith.muli %[[BLOCK_ID_Y]], %c16 : index
@@ -252,7 +252,7 @@ gpu.func @test_prefetch_1d(%arg0: memref<256xf16>){
252252
// CHECK: %[[T9:.*]] = vector.shape_cast %[[T5]] : vector<8x1xf32> to vector<8xf32>
253253
// CHECK: xegpu.store_nd %[[T9]], %[[T8]] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32>
254254
gpu.module @test {
255-
gpu.func @test_gemm_loop(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>, %arg2: memref<1024x1024xf32>){
255+
gpu.func @gemm_loop(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>, %arg2: memref<1024x1024xf32>){
256256
%c0 = arith.constant 0 : index
257257
%c16 = arith.constant 16 : index
258258
%c8 = arith.constant 8 : index

0 commit comments

Comments
 (0)