Skip to content

Commit b005ec2

Browse files
authored
Bump IREE to 055ce1f (#1124)
The main change is to update `tensor.pack/unpack to linalg.pack/unpack` followed by upstream change llvm/llvm-project#123902.
1 parent 5cfa07a commit b005ec2

15 files changed

+186
-186
lines changed

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEBufferizeToAllocation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ static FailureOr<SmallVector<Value>> getPackOrCopyOperands(
7676
uint32_t currentLevel{0};
7777
Operation *currentOp = input.value().getDefiningOp();
7878
while (currentLevel < depthLevel && currentOp != nullptr) {
79-
if (dyn_cast<tensor::PackOp>(currentOp)) {
79+
if (dyn_cast<linalg::PackOp>(currentOp)) {
8080
currentLevel++;
8181
if (currentLevel == depthLevel) break;
8282
} else if (dyn_cast<linalg::CopyOp>(currentOp)) {

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEFuseProducerIntoLoop.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace {
2121

2222
/// A utility function specific to this pass which, given a value `operand`,
2323
/// traverses the def-chain till it finds a tensor.extract_slice. Currently,
24-
/// the two producer ops that are allowed in the def-chain are tensor.pack and
24+
/// the two producer ops that are allowed in the def-chain are linalg.pack and
2525
/// linalg.copy ops. The 2 cases where it successfully finds and returns an
2626
/// extract_slice (SLICE) are:
2727
///
@@ -39,7 +39,7 @@ namespace {
3939
static FailureOr<tensor::ExtractSliceOp> getTensorExtractSliceDefiningOp(
4040
Value operand) {
4141
// Roll back through all the pack or copy ops immediately preceding `operand`.
42-
while (isa_and_present<tensor::PackOp, linalg::CopyOp>(
42+
while (isa_and_present<linalg::PackOp, linalg::CopyOp>(
4343
operand.getDefiningOp())) {
4444
operand = operand.getDefiningOp()->getOperand(0);
4545
}
@@ -49,7 +49,7 @@ static FailureOr<tensor::ExtractSliceOp> getTensorExtractSliceDefiningOp(
4949
if (!sliceOp) return failure();
5050

5151
// Case 1 outlined above.
52-
if (isa_and_present<tensor::PackOp, linalg::CopyOp>(
52+
if (isa_and_present<linalg::PackOp, linalg::CopyOp>(
5353
sliceOp.getSource().getDefiningOp())) {
5454
return sliceOp;
5555
}
@@ -60,7 +60,7 @@ static FailureOr<tensor::ExtractSliceOp> getTensorExtractSliceDefiningOp(
6060
LoopLikeOpInterface loop = dyn_cast<LoopLikeOpInterface>(parent);
6161
if (!loop) return failure();
6262
Operation *operandParent = loop.getTiedLoopInit(blkArg)->getOwner();
63-
if (isa_and_present<tensor::PackOp, linalg::CopyOp>(operandParent))
63+
if (isa_and_present<linalg::PackOp, linalg::CopyOp>(operandParent))
6464
return sliceOp;
6565
}
6666

@@ -110,7 +110,7 @@ void AMDAIEFuseProducerIntoLoopPass::runOnOperation() {
110110
LoopLikeOpInterface loops = cast<LoopLikeOpInterface>(scfLoopOp);
111111

112112
// Based on the `fuseDepth`, we would greedily fuse the producers of a linalg
113-
// computation op. Currently, we are limiting the producers to tensor.pack or
113+
// computation op. Currently, we are limiting the producers to linalg.pack or
114114
// linalg.copy ops.
115115
for (unsigned depth = 1; depth <= fuseDepth; depth++) {
116116
// Search the last compute op in the loop and its producer slices.
@@ -153,7 +153,7 @@ void AMDAIEFuseProducerIntoLoopPass::runOnOperation() {
153153

154154
// Case where operand of a generic op is a pack/copy op which is in a
155155
// different block than the generic's block.
156-
else if (isa_and_present<tensor::PackOp, linalg::CopyOp>(
156+
else if (isa_and_present<linalg::PackOp, linalg::CopyOp>(
157157
operand.getDefiningOp())) {
158158
Operation *parent = operand.getDefiningOp();
159159
Block *genericBlock = genericOp->getBlock();

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEPackAndTranspose.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ void AMDAIEPackAndTransposePass::runOnOperation() {
106106
}
107107

108108
// Step 3. Pack Transpose
109-
SmallVector<tensor::PackOp> packOps = packResult->packOps;
109+
SmallVector<linalg::PackOp> packOps = packResult->packOps;
110110
linalg::LinalgOp packedOp = packResult->packedLinalgOp;
111-
SmallVector<tensor::UnPackOp> unpackOps = packResult->unPackOps;
111+
SmallVector<linalg::UnPackOp> unpackOps = packResult->unPackOps;
112112

113113
if (packOps.size() != 3 || !packedOp || unpackOps.empty()) {
114114
funcOp->emitOpError("failed to get correct pack and unpack ops");
@@ -122,7 +122,7 @@ void AMDAIEPackAndTransposePass::runOnOperation() {
122122

123123
for (auto [index, unpackEmpty, innerPerm, outerPerm] :
124124
llvm::zip(packIndices, unpackArr, innerPermArr, outerPermArr)) {
125-
tensor::UnPackOp unpackOp;
125+
linalg::UnPackOp unpackOp;
126126
if (unpackEmpty) {
127127
unpackOp = unpackOps.back();
128128
}

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIETileAndFuse.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ static bool isTilingReductionDimension(TilingInterface consumerOp,
178178
}
179179

180180
static bool consumerToSkip(TilingInterface op) {
181-
if (isa<linalg::CopyOp>(op) || isa<tensor::PackOp>(op) ||
182-
isa<tensor::UnPackOp>(op))
181+
if (isa<linalg::CopyOp>(op) || isa<linalg::PackOp>(op) ||
182+
isa<linalg::UnPackOp>(op))
183183
return true;
184184
return false;
185185
}
@@ -279,7 +279,7 @@ void AMDAIETileAndFusePass::runOnOperation() {
279279
TilingInterface consumerOp;
280280
funcOp->walk<WalkOrder::PostOrder, ReverseIterator>([&](TilingInterface op) {
281281
// Find the next consumer op if it does not have loops OR it is from
282-
// the skip ops list which currently contains linalg.copy and tensor.unpack.
282+
// the skip ops list which currently contains linalg.copy and linalg.unpack.
283283
if (op.getLoopIteratorTypes().empty() || consumerToSkip(op))
284284
return WalkResult::advance();
285285

@@ -356,7 +356,7 @@ void AMDAIETileAndFusePass::runOnOperation() {
356356
bool fusableOp =
357357
TypeSwitch<Operation *, bool>(originalProducer.getOwner())
358358
// List ops that shouldnt be fused.
359-
.Case<tensor::PackOp, tensor::PadOp, linalg::CopyOp,
359+
.Case<linalg::PackOp, tensor::PadOp, linalg::CopyOp,
360360
memref::CopyOp>([](Operation *) { return false; })
361361
// Fuse all Linalg ops (can be generalized later)
362362
.Default([&](Operation *op) {

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,7 @@ void addMLIRAIELoweringPasses(OpPassManager &pm) {
794794
pm.addPass(createCanonicalizerPass());
795795
pm.addPass(createConvertLinalgToLoopsPass());
796796
pm.addPass(createLowerAffinePass());
797-
pm.addPass(createConvertSCFToCFPass());
797+
pm.addPass(createSCFToControlFlowPass());
798798

799799
{
800800
OpPassManager &devicePM = pm.nest<xilinx::AIE::DeviceOp>();

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ def AMDAIEFuseProducerIntoLoop :
354354
let description = [{
355355
Greedily fuse the producers of a linalg computation op based on the `fuseDepth`.
356356
Currently, the two producer ops that are allowed in the defining op chain are
357-
tensor.pack and linalg.copy ops.
357+
linalg.pack and linalg.copy ops.
358358
}];
359359
let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIEFuseProducerIntoLoopPass()";
360360
let options = [

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/bufferize_to_allocation.mlir

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ func.func @matmul_static(%arg0 : tensor<1024x2048xi32>, %arg1 : tensor<2048x512x
1414
%c0 = arith.constant 0 : index
1515
%5 = tensor.empty() : tensor<1024x512xi32>
1616
%6 = tensor.empty() : tensor<16x32x64x64xi32>
17-
%pack = tensor.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %6 : tensor<1024x2048xi32> -> tensor<16x32x64x64xi32>
17+
%pack = linalg.pack %arg0 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %6 : tensor<1024x2048xi32> -> tensor<16x32x64x64xi32>
1818
%7 = tensor.empty() : tensor<32x8x64x64xi32>
19-
%pack_0 = tensor.pack %arg1 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %7 : tensor<2048x512xi32> -> tensor<32x8x64x64xi32>
19+
%pack_0 = linalg.pack %arg1 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %7 : tensor<2048x512xi32> -> tensor<32x8x64x64xi32>
2020
%8 = tensor.empty() : tensor<16x8x64x64xi32>
2121
%9 = tensor.empty() : tensor<16x32x16x8x4x8xi32>
22-
%pack_1 = tensor.pack %pack inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %9 : tensor<16x32x64x64xi32> -> tensor<16x32x16x8x4x8xi32>
22+
%pack_1 = linalg.pack %pack inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %9 : tensor<16x32x64x64xi32> -> tensor<16x32x16x8x4x8xi32>
2323
%10 = tensor.empty() : tensor<32x8x8x8x8x8xi32>
24-
%pack_2 = tensor.pack %pack_0 inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %10 : tensor<32x8x64x64xi32> -> tensor<32x8x8x8x8x8xi32>
24+
%pack_2 = linalg.pack %pack_0 inner_dims_pos = [3, 2] inner_tiles = [8, 8] into %10 : tensor<32x8x64x64xi32> -> tensor<32x8x8x8x8x8xi32>
2525
%11 = tensor.empty() : tensor<16x8x16x8x4x8xi32>
2626
%12 = linalg.fill ins(%c0_i32 : i32) outs(%11 : tensor<16x8x16x8x4x8xi32>) -> tensor<16x8x16x8x4x8xi32>
2727
%13 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} ins(%pack_1, %pack_2 : tensor<16x32x16x8x4x8xi32>, tensor<32x8x8x8x8x8xi32>) outs(%12 : tensor<16x8x16x8x4x8xi32>) {
@@ -30,63 +30,63 @@ func.func @matmul_static(%arg0 : tensor<1024x2048xi32>, %arg1 : tensor<2048x512x
3030
%15 = arith.addi %out, %14 : i32
3131
linalg.yield %15 : i32
3232
} -> tensor<16x8x16x8x4x8xi32>
33-
%unpack = tensor.unpack %13 inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %8 : tensor<16x8x16x8x4x8xi32> -> tensor<16x8x64x64xi32>
34-
%unpack_3 = tensor.unpack %unpack inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %5 : tensor<16x8x64x64xi32> -> tensor<1024x512xi32>
33+
%unpack = linalg.unpack %13 inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %8 : tensor<16x8x16x8x4x8xi32> -> tensor<16x8x64x64xi32>
34+
%unpack_3 = linalg.unpack %unpack inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %5 : tensor<16x8x64x64xi32> -> tensor<1024x512xi32>
3535
return %unpack_3 : tensor<1024x512xi32>
3636
}
3737

3838
// LINALG-INPUT-OUTPUT-NOT: memref.alloc
39-
// LINALG-INPUT-OUTPUT: tensor.pack
39+
// LINALG-INPUT-OUTPUT: linalg.pack
4040
// LINALG-INPUT-OUTPUT-NOT: memref.alloc
41-
// LINALG-INPUT-OUTPUT: tensor.pack
41+
// LINALG-INPUT-OUTPUT: linalg.pack
4242
// LINALG-INPUT-OUTPUT: memref.alloc() : memref<16x32x16x8x4x8xi32, 2 : i32>
4343
// LINALG-INPUT-OUTPUT: bufferization.to_tensor
44-
// LINALG-INPUT-OUTPUT: tensor.pack
44+
// LINALG-INPUT-OUTPUT: linalg.pack
4545
// LINALG-INPUT-OUTPUT: memref.alloc() : memref<32x8x8x8x8x8xi32, 2 : i32>
4646
// LINALG-INPUT-OUTPUT: bufferization.to_tensor
47-
// LINALG-INPUT-OUTPUT: tensor.pack
47+
// LINALG-INPUT-OUTPUT: linalg.pack
4848
// LINALG-INPUT-OUTPUT: memref.alloc() : memref<16x8x16x8x4x8xi32, 2 : i32>
4949
// LINALG-INPUT-OUTPUT: bufferization.to_tensor
5050
// LINALG-INPUT-OUTPUT: linalg.fill
5151
// LINALG-INPUT-OUTPUT: linalg.generic
5252

5353
// LINALG-INPUT-NOT: memref.alloc
54-
// LINALG-INPUT: tensor.pack
54+
// LINALG-INPUT: linalg.pack
5555
// LINALG-INPUT-NOT: memref.alloc
56-
// LINALG-INPUT: tensor.pack
56+
// LINALG-INPUT: linalg.pack
5757
// LINALG-INPUT: memref.alloc() : memref<16x32x16x8x4x8xi32, 2 : i32>
5858
// LINALG-INPUT: bufferization.to_tensor
59-
// LINALG-INPUT: tensor.pack
59+
// LINALG-INPUT: linalg.pack
6060
// LINALG-INPUT: memref.alloc() : memref<32x8x8x8x8x8xi32, 2 : i32>
6161
// LINALG-INPUT: bufferization.to_tensor
62-
// LINALG-INPUT: tensor.pack
62+
// LINALG-INPUT: linalg.pack
6363
// LINALG-INPUT-NOT: memref.alloc
6464
// LINALG-INPUT: linalg.fill
6565
// LINALG-INPUT: linalg.generic
6666

6767
// LINALG-OUTPUT-NOT: memref.alloc
68-
// LINALG-OUTPUT: tensor.pack
68+
// LINALG-OUTPUT: linalg.pack
6969
// LINALG-OUTPUT-NOT: memref.alloc
70-
// LINALG-OUTPUT: tensor.pack
70+
// LINALG-OUTPUT: linalg.pack
7171
// LINALG-OUTPUT-NOT: memref.alloc
72-
// LINALG-OUTPUT: tensor.pack
72+
// LINALG-OUTPUT: linalg.pack
7373
// LINALG-OUTPUT-NOT: memref.alloc
74-
// LINALG-OUTPUT: tensor.pack
74+
// LINALG-OUTPUT: linalg.pack
7575
// LINALG-OUTPUT: memref.alloc() : memref<16x8x16x8x4x8xi32, 2 : i32>
7676
// LINALG-OUTPUT: bufferization.to_tensor
7777
// LINALG-OUTPUT: linalg.fill
7878
// LINALG-OUTPUT: linalg.generic
7979

8080
// PACK-INPUT: memref.alloc() : memref<16x32x64x64xi32, 1 : i32>
8181
// PACK-INPUT: bufferization.to_tensor
82-
// PACK-INPUT: tensor.pack
82+
// PACK-INPUT: linalg.pack
8383
// PACK-INPUT: memref.alloc() : memref<32x8x64x64xi32, 1 : i32>
8484
// PACK-INPUT: bufferization.to_tensor
85-
// PACK-INPUT: tensor.pack
85+
// PACK-INPUT: linalg.pack
8686
// PACK-INPUT-NOT: memref.alloc
87-
// PACK-INPUT: tensor.pack
87+
// PACK-INPUT: linalg.pack
8888
// PACK-INPUT-NOT: memref.alloc
89-
// PACK-INPUT: tensor.pack
89+
// PACK-INPUT: linalg.pack
9090
// PACK-INPUT-NOT: memref.alloc
9191
// PACK-INPUT: linalg.fill
9292
// PACK-INPUT: linalg.generic
@@ -105,14 +105,14 @@ func.func @matmul_elementwise(%arg0: tensor<1024x512xi8>, %arg1: tensor<512x1024
105105
%extracted_slice_0 = tensor.extract_slice %arg1[0, %arg4] [512, 64] [1, 1] : tensor<512x1024xi8> to tensor<512x64xi8>
106106
%extracted_slice_1 = tensor.extract_slice %0[%arg3, %arg4] [64, 64] [1, 1] : tensor<1024x1024xi32> to tensor<64x64xi32>
107107
%2 = tensor.empty() : tensor<1x16x64x32xi8>
108-
%pack = tensor.pack %extracted_slice inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %2 : tensor<64x512xi8> -> tensor<1x16x64x32xi8>
108+
%pack = linalg.pack %extracted_slice inner_dims_pos = [0, 1] inner_tiles = [64, 32] into %2 : tensor<64x512xi8> -> tensor<1x16x64x32xi8>
109109
%3 = tensor.empty() : tensor<16x1x32x64xi8>
110-
%pack_2 = tensor.pack %extracted_slice_0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %3 : tensor<512x64xi8> -> tensor<16x1x32x64xi8>
110+
%pack_2 = linalg.pack %extracted_slice_0 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64] into %3 : tensor<512x64xi8> -> tensor<16x1x32x64xi8>
111111
%4 = tensor.empty() : tensor<1x1x64x64xi32>
112112
%5 = tensor.empty() : tensor<1x16x4x16x4x8xi8>
113-
%pack_3 = tensor.pack %pack outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %5 : tensor<1x16x64x32xi8> -> tensor<1x16x4x16x4x8xi8>
113+
%pack_3 = linalg.pack %pack outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %5 : tensor<1x16x64x32xi8> -> tensor<1x16x4x16x4x8xi8>
114114
%6 = tensor.empty() : tensor<16x1x8x4x8x8xi8>
115-
%pack_4 = tensor.pack %pack_2 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [8, 8] into %6 : tensor<16x1x32x64xi8> -> tensor<16x1x8x4x8x8xi8>
115+
%pack_4 = linalg.pack %pack_2 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [8, 8] into %6 : tensor<16x1x32x64xi8> -> tensor<16x1x8x4x8x8xi8>
116116
%7 = tensor.empty() : tensor<1x1x8x16x4x8xi32>
117117
%8 = linalg.fill ins(%c0_i32 : i32) outs(%7 : tensor<1x1x8x16x4x8xi32>) -> tensor<1x1x8x16x4x8xi32>
118118
%9 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} ins(%pack_3, %pack_4 : tensor<1x16x4x16x4x8xi8>, tensor<16x1x8x4x8x8xi8>) outs(%8 : tensor<1x1x8x16x4x8xi32>) {
@@ -125,49 +125,49 @@ func.func @matmul_elementwise(%arg0: tensor<1024x512xi8>, %arg1: tensor<512x1024
125125
} -> tensor<1x1x8x16x4x8xi32>
126126
%extracted_slice_5 = tensor.extract_slice %arg2[%arg3, %arg4] [64, 64] [1, 1] : tensor<1024x1024xi32> to tensor<64x64xi32>
127127
%extracted_slice_6 = tensor.extract_slice %arg5[%arg3, %arg4] [64, 64] [1, 1] : tensor<1024x1024xi32> to tensor<64x64xi32>
128-
%pack_7 = tensor.pack %extracted_slice_6 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %4 : tensor<64x64xi32> -> tensor<1x1x64x64xi32>
129-
%pack_8 = tensor.pack %extracted_slice_5 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %4 : tensor<64x64xi32> -> tensor<1x1x64x64xi32>
130-
%pack_9 = tensor.pack %pack_7 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %7 : tensor<1x1x64x64xi32> -> tensor<1x1x8x16x4x8xi32>
131-
%pack_10 = tensor.pack %pack_8 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %7 : tensor<1x1x64x64xi32> -> tensor<1x1x8x16x4x8xi32>
128+
%pack_7 = linalg.pack %extracted_slice_6 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %4 : tensor<64x64xi32> -> tensor<1x1x64x64xi32>
129+
%pack_8 = linalg.pack %extracted_slice_5 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %4 : tensor<64x64xi32> -> tensor<1x1x64x64xi32>
130+
%pack_9 = linalg.pack %pack_7 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %7 : tensor<1x1x64x64xi32> -> tensor<1x1x8x16x4x8xi32>
131+
%pack_10 = linalg.pack %pack_8 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %7 : tensor<1x1x64x64xi32> -> tensor<1x1x8x16x4x8xi32>
132132
%10 = linalg.generic {indexing_maps = [#map3, #map3, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%9, %pack_10 : tensor<1x1x8x16x4x8xi32>, tensor<1x1x8x16x4x8xi32>) outs(%pack_9 : tensor<1x1x8x16x4x8xi32>) {
133133
^bb0(%in: i32, %in_12: i32, %out: i32):
134134
%11 = arith.addi %in, %in_12 : i32
135135
linalg.yield %11 : i32
136136
} -> tensor<1x1x8x16x4x8xi32>
137-
%unpack = tensor.unpack %10 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %4 : tensor<1x1x8x16x4x8xi32> -> tensor<1x1x64x64xi32>
138-
%unpack_11 = tensor.unpack %unpack inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %extracted_slice_1 : tensor<1x1x64x64xi32> -> tensor<64x64xi32>
137+
%unpack = linalg.unpack %10 outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %4 : tensor<1x1x8x16x4x8xi32> -> tensor<1x1x64x64xi32>
138+
%unpack_11 = linalg.unpack %unpack inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %extracted_slice_1 : tensor<1x1x64x64xi32> -> tensor<64x64xi32>
139139
scf.forall.in_parallel {
140140
tensor.parallel_insert_slice %unpack_11 into %arg5[%arg3, %arg4] [64, 64] [1, 1] : tensor<64x64xi32> into tensor<1024x1024xi32>
141141
}
142142
} {mapping = [#gpu.block<y>, #gpu.block<x>]}
143143
return %1 : tensor<1024x1024xi32>
144144
}
145145

146-
// ELEMENTWISE-INPUT-COUNT-4: tensor.pack
146+
// ELEMENTWISE-INPUT-COUNT-4: linalg.pack
147147
// ELEMENTWISE-INPUT: linalg.fill
148148
// ELEMENTWISE-INPUT: linalg.generic
149149
// ELEMENTWISE-INPUT-NOT: memref.alloc
150-
// ELEMENTWISE-INPUT: tensor.pack
150+
// ELEMENTWISE-INPUT: linalg.pack
151151
// ELEMENTWISE-INPUT-NOT: memref.alloc
152-
// ELEMENTWISE-INPUT: tensor.pack
152+
// ELEMENTWISE-INPUT: linalg.pack
153153
// ELEMENTWISE-INPUT-NOT: memref.alloc
154-
// ELEMENTWISE-INPUT: tensor.pack
154+
// ELEMENTWISE-INPUT: linalg.pack
155155
// ELEMENTWISE-INPUT: memref.alloc() : memref<1x1x8x16x4x8xi32, 2 : i32>
156156
// ELEMENTWISE-INPUT: bufferization.to_tensor
157-
// ELEMENTWISE-INPUT: tensor.pack
157+
// ELEMENTWISE-INPUT: linalg.pack
158158
// ELEMENTWISE-INPUT: linalg.generic
159159

160-
// ELEMENTWISE-INPUT-OUTPUT-COUNT-4: tensor.pack
160+
// ELEMENTWISE-INPUT-OUTPUT-COUNT-4: linalg.pack
161161
// ELEMENTWISE-INPUT-OUTPUT: linalg.fill
162162
// ELEMENTWISE-INPUT-OUTPUT: linalg.generic
163163
// ELEMENTWISE-INPUT-OUTPUT-NOT: memref.alloc
164-
// ELEMENTWISE-INPUT-OUTPUT: tensor.pack
164+
// ELEMENTWISE-INPUT-OUTPUT: linalg.pack
165165
// ELEMENTWISE-INPUT-OUTPUT-NOT: memref.alloc
166-
// ELEMENTWISE-INPUT-OUTPUT: tensor.pack
166+
// ELEMENTWISE-INPUT-OUTPUT: linalg.pack
167167
// ELEMENTWISE-INPUT-OUTPUT: memref.alloc() : memref<1x1x8x16x4x8xi32, 2 : i32>
168168
// ELEMENTWISE-INPUT-OUTPUT: bufferization.to_tensor
169-
// ELEMENTWISE-INPUT-OUTPUT: tensor.pack
169+
// ELEMENTWISE-INPUT-OUTPUT: linalg.pack
170170
// ELEMENTWISE-INPUT-OUTPUT: memref.alloc() : memref<1x1x8x16x4x8xi32, 2 : i32>
171171
// ELEMENTWISE-INPUT-OUTPUT: bufferization.to_tensor
172-
// ELEMENTWISE-INPUT-OUTPUT: tensor.pack
172+
// ELEMENTWISE-INPUT-OUTPUT: linalg.pack
173173
// ELEMENTWISE-INPUT-OUTPUT: linalg.generic

0 commit comments

Comments
 (0)