Skip to content

Commit a46810d

Browse files
authored
Improve the logic of checking tile usage with getEffectiveUsers. (#710)
1 parent d62c043 commit a46810d

File tree

9 files changed

+75
-56
lines changed

9 files changed

+75
-56
lines changed

include/imex/Utils/XeCommon.h

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ namespace imex {
3030
// xetile.TileType. They are currently not supported yet.
3131
bool isSupportedModule(mlir::gpu::GPUModuleOp mod);
3232

33+
int getOperandIndex(mlir::Operation *op, mlir::Value operand);
34+
mlir::BlockArgument getArgForOperand(mlir::scf::ForOp &op, mlir::Value operand);
35+
3336
mlir::ValueRange buildUnrealizedCast(mlir::OpBuilder &builder,
3437
mlir::TypeRange resultTypes,
3538
mlir::ValueRange inputs);
@@ -194,22 +197,6 @@ class TileUsageAnalysis {
194197
OTHER = 64
195198
};
196199

197-
int getOperandIndex(mlir::Operation *op, mlir::Value operand) {
198-
for (auto [i, value] : llvm::enumerate(op->getOperands())) {
199-
if (operand == value)
200-
return i;
201-
}
202-
return -1;
203-
};
204-
205-
mlir::BlockArgument getArgForOperand(mlir::scf::ForOp &op,
206-
mlir::Value operand) {
207-
auto idx = getOperandIndex(op, operand);
208-
auto numControls = op.getNumControlOperands();
209-
assert(idx >= (int)numControls);
210-
return op.getRegionIterArg(idx - numControls);
211-
};
212-
213200
llvm::DenseMap<mlir::Operation *, uint> Usage;
214201
};
215202

lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ class SgTileUnpackPackOpPattern
117117
mlir::LogicalResult
118118
matchAndRewrite(xetile::TileUnpackOp op, OpAdaptor adaptor,
119119
XeGPUOneToNPatterRewriter &rewriter) const override {
120-
121120
using funcTy = VectorTypedValue(mlir::Value, mlir::Value, mlir::Location,
122121
mlir::PatternRewriter &);
123122

lib/Dialect/XeTile/Transforms/Blocking.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <llvm/ADT/SetVector.h>
3535
#include <llvm/Support/Debug.h>
3636

37+
#include <algorithm>
3738
#include <optional>
3839

3940
#include "imex/Dialect/XeTile/Transforms/Blocking.h"
@@ -171,6 +172,23 @@ getInnerBlockSizes(mlir::Operation *operation, mlir::Type elemTy, int height,
171172
return {};
172173
}
173174

175+
// works similar to getUsers. If the user is a SCF::ForOp,
176+
// it will return the users of corresponding scf::ForOp argument.
177+
// TODO: make it to be general to handle composite ops, e.g,
178+
// SCF::ForOp, SCF::WhileOp, etc.
179+
static llvm::SmallVector<mlir::Operation *> getEffectiveUsers(mlir::Value val) {
180+
llvm::SmallVector<mlir::Operation *> users;
181+
for (auto user : val.getUsers()) {
182+
if (auto forOp = llvm::dyn_cast<mlir::scf::ForOp>(user)) {
183+
auto arg = getArgForOperand(forOp, val);
184+
users.append(arg.user_begin(), arg.user_end());
185+
} else {
186+
users.push_back(user);
187+
}
188+
}
189+
return users;
190+
}
191+
174192
// it blocks a constant dense value if it is used by XeTile operators,
175193
// e.g, tile_mma and store_tile. It currently extends a 2D vector into
176194
// 4D vector with the last 2 dim corresponding to block size.
@@ -470,17 +488,16 @@ struct InitTileOpPattern : public XeTileConversion<xetile::InitTileOp> {
470488
if (order[0] == 0 && order[1] == 1)
471489
transpose = true;
472490

473-
for (auto user : op->getUsers()) {
474-
if (llvm::dyn_cast<xetile::LoadTileOp>(user)) {
475-
auto loadTileOp = llvm::dyn_cast<xetile::LoadTileOp>(user);
491+
for (auto user : getEffectiveUsers(op)) {
492+
if (auto loadTileOp = llvm::dyn_cast<xetile::LoadTileOp>(user)) {
476493
if (isForDPASB(loadTileOp) && elementSize < 32) {
477494
vnni = true;
495+
break;
478496
}
479-
break;
480497
}
481498
}
482-
if (vnni && transpose && elementSize < 32) {
483499

500+
if (vnni && transpose && elementSize < 32) {
484501
int factor = 32 / elementSize;
485502
vnni = false;
486503
llvm::SmallVector<int64_t, 2> innerBlock = getInnerBlockSizes<Load>(

lib/Utils/XeCommon.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,22 @@
2424

2525
namespace imex {
2626

27+
int getOperandIndex(mlir::Operation *op, mlir::Value operand) {
28+
for (auto [i, value] : llvm::enumerate(op->getOperands())) {
29+
if (operand == value)
30+
return i;
31+
}
32+
return -1;
33+
};
34+
35+
mlir::BlockArgument getArgForOperand(mlir::scf::ForOp &op,
36+
mlir::Value operand) {
37+
auto idx = getOperandIndex(op, operand);
38+
auto numControls = op.getNumControlOperands();
39+
assert(idx >= (int)numControls);
40+
return op.getRegionIterArg(idx - numControls);
41+
};
42+
2743
bool isSupportedModule(mlir::gpu::GPUModuleOp mod) {
2844
bool hasTileTyInFuncTy = false;
2945
mod.walk<mlir::WalkOrder::PreOrder>([&](mlir::gpu::GPUFuncOp op) {

test/Dialect/XeTile/Transforms/blocking.mlir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,28 +73,28 @@ gpu.module @test_kernel {
7373
//CHECK: %[[R1:.*]] = xetile.init_tile %[[arg0]][%[[c0]], %[[c0]]] : memref<32x128xf16> -> !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>
7474
%1 = xetile.init_tile %a[%c0, %c0] : memref<32x128xf16> -> !xetile.tile<32x32xf16>
7575

76-
//CHECK: %[[R2:.*]] = xetile.init_tile %[[arg1]][%[[c0]], %[[c0]]] : memref<128x32xf16> -> !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>
76+
//CHECK: %[[R2:.*]] = xetile.init_tile %[[arg1]][%[[c0]], %[[c0]]] : memref<128x32xf16> -> !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 16]>>
7777
%2 = xetile.init_tile %b[%c0, %c0] : memref<128x32xf16> -> !xetile.tile<32x32xf16>
7878

7979
//CHECK: %[[R3:.*]] = xetile.tile_pack %[[R0]] { inner_blocks = [32, 16] } : vector<32x32xf32> -> vector<1x2x32x16xf32>
8080
//CHECK: %[[R4:.*]]:3 = scf.for %[[arg3:.*]] = %[[c0]] to %[[c128]] step %[[c32]]
8181
//CHECK-SAME: iter_args(%[[arg4:.*]] = %[[R1]], %[[arg5:.*]] = %[[R2]], %[[arg6:.*]] = %[[R3]])
8282
//CHECK-SAME: !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>,
83-
//CHECK-SAME: !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>, vector<1x2x32x16xf32>
83+
//CHECK-SAME: !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 16]>>, vector<1x2x32x16xf32>
8484
%out:3 = scf.for %k = %c0 to %c128 step %c32 iter_args(%a_tile = %1, %b_tile = %2, %c_value = %cst)
8585
-> (!xetile.tile<32x32xf16>, !xetile.tile<32x32xf16>, vector<32x32xf32>) {
8686
//CHECK: %[[R8:.*]] = xetile.load_tile %[[arg4]] { padding = 0.000000e+00 : f32 } : !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>> -> vector<1x1x32x32xf16>
8787
//CHECK: %[[R9:.*]] = xetile.tile_unpack %[[R8]] { inner_blocks = [32, 32] } : vector<1x1x32x32xf16> -> vector<32x32xf16>
8888
%3 = xetile.load_tile %a_tile : !xetile.tile<32x32xf16> -> vector<32x32xf16>
8989

90-
//CHECK: %[[R10:.*]] = xetile.load_tile %[[arg5]] { padding = 0.000000e+00 : f32 } : !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>> -> vector<1x1x32x32xf16>
91-
//CHECK: %[[R11:.*]] = xetile.tile_unpack %[[R10]] { inner_blocks = [32, 32] } : vector<1x1x32x32xf16> -> vector<32x32xf16>
90+
//CHECK: %[[R10:.*]] = xetile.load_tile %[[arg5]] { padding = 0.000000e+00 : f32 } : !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 16]>> -> vector<1x2x32x16xf16>
91+
//CHECK: %[[R11:.*]] = xetile.tile_unpack %[[R10]] { inner_blocks = [32, 16] } : vector<1x2x32x16xf16> -> vector<32x32xf16>
9292
%4 = xetile.load_tile %b_tile : !xetile.tile<32x32xf16> -> vector<32x32xf16>
9393

9494
//CHECK: %[[R12:.*]] = xetile.update_tile_offset %[[arg4]], [%[[c0]], %[[c32]]] : !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>, index, index -> !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>
9595
%a_next_tile = xetile.update_tile_offset %a_tile, [%c0, %c32]: !xetile.tile<32x32xf16>, index, index -> !xetile.tile<32x32xf16>
9696

97-
//CHECK: %[[R13:.*]] = xetile.update_tile_offset %[[arg5]], [%[[c32]], %[[c0]]] : !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>, index, index -> !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>
97+
//CHECK: %[[R13:.*]] = xetile.update_tile_offset %[[arg5]], [%[[c32]], %[[c0]]] : !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 16]>>, index, index -> !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 16]>>
9898
%b_next_tile = xetile.update_tile_offset %b_tile, [%c32, %c0]: !xetile.tile<32x32xf16>, index, index -> !xetile.tile<32x32xf16>
9999

100100
//CHECK: %[[R14:.*]] = xetile.tile_pack %[[R9]] { inner_blocks = [8, 16] } : vector<32x32xf16> -> vector<4x2x8x16xf16>
@@ -107,7 +107,7 @@ gpu.module @test_kernel {
107107

108108
//CHECK: %[[R19:.*]] = xetile.tile_unpack %[[R18]] { inner_blocks = [8, 16] } : vector<4x2x8x16xf32> -> vector<32x32xf32>
109109
//CHECK: %[[R20:.*]] = xetile.tile_pack %[[R19]] { inner_blocks = [32, 16] } : vector<32x32xf32> -> vector<1x2x32x16xf32>
110-
//CHECK: scf.yield %[[R12]], %[[R13]], %[[R20]] : !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>, !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>, vector<1x2x32x16xf32>
110+
//CHECK: scf.yield %[[R12]], %[[R13]], %[[R20]] : !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>, !xetile.tile<32x32xf16, #xetile.tile_attr<inner_blocks = [32, 16]>>, vector<1x2x32x16xf32>
111111
scf.yield %a_next_tile, %b_next_tile, %c_new_value : !xetile.tile<32x32xf16>, !xetile.tile<32x32xf16>, vector<32x32xf32>
112112
}
113113
//CHECK: %[[REG5:.*]] = xetile.tile_unpack %[[R4]]#2 { inner_blocks = [32, 16] } : vector<1x2x32x16xf32> -> vector<32x32xf32>

test/Dialect/XeTile/Transforms/sg_gemm_1k_1k_1k_f16_f32.mlir

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,15 @@ gpu.module @test_kernel {
4848
%a_init_tile = xetile.init_tile %A[%m, %c0] : memref<1024x1024xf16> -> !xetile.tile<64x64xf16>
4949

5050
//CHECK: %[[R9:.*]] = xetile.init_tile %[[B]][%[[C0]], %[[R3]]]
51-
//CHECK-SAME: memref<1024x1024xf16> -> !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>
51+
//CHECK-SAME: memref<1024x1024xf16> -> !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 16]>>
5252
%b_init_tile = xetile.init_tile %B[%c0, %n] : memref<1024x1024xf16> -> !xetile.tile<64x64xf16>
5353

5454
// compute the value of C tile by iterating over tiles in k-dimension and doing dpas
5555
//CHECK: %[[R10:.*]] = xetile.tile_pack %[[R7]] { inner_blocks = [32, 16] } : vector<64x64xf32> -> vector<2x4x32x16xf32>
5656
//CHECK: %[[R11:.*]]:3 = scf.for %[[arg3:.*]] = %[[C0]] to %[[C2]] step %[[C1]]
5757
//CHECK-SAME: iter_args(%[[arg4:.*]] = %[[R8]], %[[arg5:.*]] = %[[R9]], %[[arg6:.*]] = %[[R10]])
5858
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>
59-
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>, vector<2x4x32x16xf32>
59+
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 16]>>, vector<2x4x32x16xf32>
6060
%out:3 = scf.for %k = %c0 to %c1024 step %c64
6161
iter_args(%a_tile = %a_init_tile, %b_tile = %b_init_tile, %c_value = %c_init_value)
6262
-> (!xetile.tile<64x64xf16>, !xetile.tile<64x64xf16>, vector<64x64xf32>) {
@@ -67,8 +67,8 @@ gpu.module @test_kernel {
6767
%a_value = xetile.load_tile %a_tile : !xetile.tile<64x64xf16> -> vector<64x64xf16>
6868

6969
//CHECK: %[[R16:.*]] = xetile.load_tile %[[arg5]] { padding = 0.000000e+00 : f32 }
70-
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 32]>> -> vector<2x2x32x32xf16>
71-
//CHECK: %[[R17:.*]] = xetile.tile_unpack %[[R16]] { inner_blocks = [32, 32] } : vector<2x2x32x32xf16> -> vector<64x64xf16>
70+
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 16]>> -> vector<2x4x32x16xf16>
71+
//CHECK: %[[R17:.*]] = xetile.tile_unpack %[[R16]] { inner_blocks = [32, 16] } : vector<2x4x32x16xf16> -> vector<64x64xf16>
7272
%b_value = xetile.load_tile %b_tile : !xetile.tile<64x64xf16> -> vector<64x64xf16>
7373

7474
// perform dpas and accumulate
@@ -87,14 +87,14 @@ gpu.module @test_kernel {
8787
%a_next_tile = xetile.update_tile_offset %a_tile, [%c0, %c64] : !xetile.tile<64x64xf16>, index, index -> !xetile.tile<64x64xf16>
8888

8989
//CHECK: %[[R25:.*]] = xetile.update_tile_offset %[[arg5]], [%[[C1]], %[[C0]]]
90-
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>, index, index
91-
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>
90+
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 16]>>, index, index
91+
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 16]>>
9292
%b_next_tile = xetile.update_tile_offset %b_tile, [%c64, %c0] : !xetile.tile<64x64xf16>, index, index -> !xetile.tile<64x64xf16>
9393

9494
//CHECK: %[[R26:.*]] = xetile.tile_pack %[[R23]] { inner_blocks = [32, 16] } : vector<64x64xf32> -> vector<2x4x32x16xf32>
9595
//CHECK: scf.yield %[[R24]], %[[R25]], %[[R26]]
9696
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>
97-
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 32]>>, vector<2x4x32x16xf32>
97+
//CHECK-SAME: !xetile.tile<64x64xf16, #xetile.tile_attr<inner_blocks = [32, 16]>>, vector<2x4x32x16xf32>
9898
scf.yield %a_next_tile, %b_next_tile, %c_new_value
9999
: !xetile.tile<64x64xf16>, !xetile.tile<64x64xf16>, vector<64x64xf32>
100100
}

test/Dialect/XeTile/Transforms/sg_gemm_1k_1k_1k_i8_i32.mlir

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,20 +44,20 @@ gpu.module @test_kernel {
4444
%a_init_tile = xetile.init_tile %A[%m, %c0] : memref<1024x1024xi8> -> !xetile.tile<64x64xi8>
4545

4646
//CHECK: %[[R9:.*]] = xetile.init_tile %[[arg1]][%[[c0]], %[[R3]]]
47-
//CHECK-SAME: memref<1024x1024xi8> -> !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 64]>>
47+
//CHECK-SAME: memref<1024x1024xi8> -> !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 16]>>
4848
%b_init_tile = xetile.init_tile %B[%c0, %n] : memref<1024x1024xi8> -> !xetile.tile<64x64xi8>
4949

5050
//CHECK: %[[R10:.*]] = xetile.tile_pack %[[R7]] { inner_blocks = [32, 16] } : vector<64x64xi32> -> vector<2x4x32x16xi32>
5151
//CHECK: %[[R11:.*]]:3 = scf.for %[[arg3:.*]] = %[[c0]] to %[[c1024]] step %[[c64]] iter_args(%[[arg4:.*]] = %[[R8]], %[[arg5:.*]] = %[[R9]], %[[arg6:.*]] = %[[R10]])
52-
//CHECK-SAME: !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 64]>>, !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 64]>>, vector<2x4x32x16xi32>
52+
//CHECK-SAME: !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 64]>>, !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 16]>>, vector<2x4x32x16xi32>
5353
%out:3 = scf.for %k = %c0 to %c1024 step %c64 iter_args(%a_tile = %a_init_tile, %b_tile = %b_init_tile, %c_value = %c_init_value)
5454
-> (!xetile.tile<64x64xi8>, !xetile.tile<64x64xi8>, vector<64x64xi32>) {
5555
//CHECK: %[[R14:.*]] = xetile.load_tile %[[arg4]] { padding = 0 : i32 } : !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 64]>> -> vector<2x1x32x64xi8>
5656
//CHECK: %[[R15:.*]] = xetile.tile_unpack %[[R14]] { inner_blocks = [32, 64] } : vector<2x1x32x64xi8> -> vector<64x64xi8>
5757
%a_value = xetile.load_tile %a_tile : !xetile.tile<64x64xi8> -> vector<64x64xi8>
5858

59-
//CHECK: %[[R16:.*]] = xetile.load_tile %[[arg5]] { padding = 0 : i32 } : !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 64]>> -> vector<2x1x32x64xi8>
60-
//CHECK: %[[R17:.*]] = xetile.tile_unpack %[[R16]] { inner_blocks = [32, 64] } : vector<2x1x32x64xi8> -> vector<64x64xi8>
59+
//CHECK: %[[R16:.*]] = xetile.load_tile %[[arg5]] { padding = 0 : i32 } : !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 16]>> -> vector<2x4x32x16xi8>
60+
//CHECK: %[[R17:.*]] = xetile.tile_unpack %[[R16]] { inner_blocks = [32, 16] } : vector<2x4x32x16xi8> -> vector<64x64xi8>
6161
%b_value = xetile.load_tile %b_tile : !xetile.tile<64x64xi8> -> vector<64x64xi8>
6262

6363
//CHECK: %[[R18:.*]] = xetile.tile_pack %[[R15]] { inner_blocks = [8, 8] } : vector<64x64xi8> -> vector<8x8x8x8xi8>
@@ -74,14 +74,14 @@ gpu.module @test_kernel {
7474
%a_next_tile = xetile.update_tile_offset %a_tile, [%c0, %c64] : !xetile.tile<64x64xi8>, index, index -> !xetile.tile<64x64xi8>
7575

7676
//CHECK: %[[R25:.*]] = xetile.update_tile_offset %[[arg5]], [%[[c64]], %[[c0]]]
77-
//CHECK-SAME: !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 64]>>, index, index
78-
//CHECK-SAME: !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 64]>>
77+
//CHECK-SAME: !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 16]>>, index, index
78+
//CHECK-SAME: !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 16]>>
7979
%b_next_tile = xetile.update_tile_offset %b_tile, [%c64, %c0] : !xetile.tile<64x64xi8>, index, index -> !xetile.tile<64x64xi8>
8080

8181
//CHECK: %[[R26:.*]] = xetile.tile_pack %[[R23]] { inner_blocks = [32, 16] } : vector<64x64xi32> -> vector<2x4x32x16xi32>
8282
//CHECK: scf.yield %[[R24]], %[[R25]], %[[R26]]
8383
//CHECK-SAME: !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 64]>>
84-
//CHECK-SAME: !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 64]>>, vector<2x4x32x16xi32>
84+
//CHECK-SAME: !xetile.tile<64x64xi8, #xetile.tile_attr<inner_blocks = [32, 16]>>, vector<2x4x32x16xi32>
8585
scf.yield %a_next_tile, %b_next_tile, %c_new_value : !xetile.tile<64x64xi8>, !xetile.tile<64x64xi8>, vector<64x64xi32>
8686
}
8787
//CHECK: %[[R12:.*]] = xetile.tile_unpack %[[R11]]#2 { inner_blocks = [32, 16] } : vector<2x4x32x16xi32> -> vector<64x64xi32>

0 commit comments

Comments
 (0)