Skip to content

Commit b1da7a8

Browse files
Groverkssgiacs-epic
authored andcommitted
[VectorDistribution] Remove signatures after distribution (iree-org#19319)
This patch removes discardable attributes set for vector distribution, after the pass. These attributes otherwise keep propagating and make output IR harder to read. Signed-off-by: Giacomo Serafini <[email protected]>
1 parent 4b11a2e commit b1da7a8

File tree

5 files changed

+16
-21
lines changed

5 files changed

+16
-21
lines changed

compiler/src/iree/compiler/Codegen/Common/GPU/GPUVectorDistribution.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,11 @@ LogicalResult distributeVectorOps(Operation *root,
314314
return failure();
315315
}
316316

317+
// Remove signature after distribution.
318+
root->walk([](Operation *op) {
319+
op->removeDiscardableAttr(kVectorLayoutFetcherStorageAttrName);
320+
});
321+
317322
if (options.verifyConversion()) {
318323
WalkResult hasConversionOp = root->walk([](Operation *op) {
319324
if (isa<IREE::VectorExt::ToSIMDOp, IREE::VectorExt::ToSIMTOp>(op)) {

compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_contract_amdgpu.mlir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ builtin.module attributes { transform.with_named_sequence } {
8383
// CHECK: %[[R_CAST:.+]] = vector.shape_cast %[[MFMA]] : vector<16xf32> to vector<4x1x4x1xf32>
8484
// CHECK: %[[B_OUT:.*]] = vector.broadcast %[[R_CAST]] : vector<4x1x4x1xf32> to vector<1x1x4x1x4x1xf32>
8585
// CHECK: %[[R_SIMD:.+]] = iree_vector_ext.to_simd %[[B_OUT]] : vector<1x1x4x1x4x1xf32> -> vector<32x32xf32>
86-
// CHECK: return {{.*}} %[[R_SIMD]]
86+
// CHECK: return %[[R_SIMD]]
8787

8888
// -----
8989

@@ -161,7 +161,7 @@ builtin.module attributes { transform.with_named_sequence } {
161161
// CHECK: %[[R_CAST:.+]] = vector.shape_cast %[[MFMA]] : vector<4xf32> to vector<1x1x4x1xf32>
162162
// CHECK: %[[B_OUT:.*]] = vector.broadcast %[[R_CAST]] : vector<1x1x4x1xf32> to vector<1x1x1x1x4x1xf32>
163163
// CHECK: %[[R_SIMD:.+]] = iree_vector_ext.to_simd %[[B_OUT]] : vector<1x1x1x1x4x1xf32> -> vector<16x16xf32>
164-
// CHECK: return {{.*}} %[[R_SIMD]]
164+
// CHECK: return %[[R_SIMD]]
165165

166166
// -----
167167

@@ -250,7 +250,7 @@ builtin.module attributes { transform.with_named_sequence } {
250250
// CHECK: %[[R1_CAST:.+]] = vector.shape_cast %[[MFMA1]] : vector<16xf32> to vector<4x1x4x1xf32>
251251
// CHECK: %[[C1_INS:.+]] = vector.insert %[[R1_CAST]], %[[C0_INS]] [1, 0] : vector<4x1x4x1xf32> into vector<2x1x4x1x4x1xf32>
252252
// CHECK: %[[R:.+]] = iree_vector_ext.to_simd %[[C1_INS]] : vector<2x1x4x1x4x1xf32> -> vector<64x32xf32>
253-
// CHECK: return {{.*}}} %[[R]]
253+
// CHECK: return %[[R]]
254254

255255
// -----
256256

@@ -589,7 +589,7 @@ builtin.module attributes { transform.with_named_sequence } {
589589
// CHECK: %[[R_CAST:.+]] = vector.shape_cast %[[WMMA]] : vector<8xf32> to vector<8x1x1x1xf32>
590590
// CHECK: %[[B_OUT:.*]] = vector.broadcast %[[R_CAST]] : vector<8x1x1x1xf32> to vector<1x1x8x1x1x1xf32>
591591
// CHECK: %[[R_SIMD:.+]] = iree_vector_ext.to_simd %[[B_OUT]] : vector<1x1x8x1x1x1xf32> -> vector<16x16xf32>
592-
// CHECK: return {{.*}} %[[R_SIMD]]
592+
// CHECK: return %[[R_SIMD]]
593593

594594
// -----
595595

@@ -682,4 +682,4 @@ builtin.module attributes { transform.with_named_sequence } {
682682
// CHECK: %[[R_CAST:.+]] = vector.shape_cast %[[MFMA_1]] : vector<16xf32> to vector<4x1x4x1xf32>
683683
// CHECK: %[[B_OUT:.*]] = vector.broadcast %[[R_CAST]] : vector<4x1x4x1xf32> to vector<1x1x4x1x4x1xf32>
684684
// CHECK: %[[R_SIMD:.+]] = iree_vector_ext.to_simd %[[B_OUT]] : vector<1x1x4x1x4x1xf32> -> vector<32x32xf32>
685-
// CHECK: return {{.*}} %[[R_SIMD]]
685+
// CHECK: return %[[R_SIMD]]

compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_nested_layout_vector_distribution.mlir

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -955,21 +955,11 @@ builtin.module attributes { transform.with_named_sequence } {
955955
}
956956
}
957957

958-
// CHECK: #[[$LAYOUT:.+]] = #iree_vector_ext.nested_layout
959-
// CHECK-SAME: subgroup_tile = [2, 2],
960-
// CHECK-SAME: batch_tile = [4, 2]
961-
// CHECK-SAME: outer_tile = [1, 2]
962-
// CHECK-SAME: thread_tile = [16, 4]
963-
// CHECK-SAME: element_tile = [2, 2]
964-
// CHECK-SAME: subgroup_strides = [1, 2],
965-
// CHECK-SAME: thread_strides = [1, 16]
966-
967958
// CHECK-LABEL: func @transpose
968959
// CHECK: iree_vector_ext.to_simt %{{.*}} : vector<64x256xf16> -> vector<2x4x2x1x2x2xf16>
969960
// CHECK: vector.transpose %{{.*}}, [1, 0, 3, 2, 5, 4] : vector<2x4x2x1x2x2xf16> to vector<4x2x1x2x2x2xf16>
970961
// CHECK: math.sqrt %{{.*}} : vector<4x2x1x2x2x2xf16>
971962
// CHECK: iree_vector_ext.to_simd %{{.*}} : vector<4x2x1x2x2x2xf16> -> vector<256x64xf16>
972-
// CHECK: return {{.*}}#[[$LAYOUT]]
973963

974964
// -----
975965

compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_vector_distribution.mlir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ func.func @distribute_elementwise_nested_layout_f16(%a: vector<128x128x128xf16>,
1919
%root = arith.constant dense<0.0> : vector<128x128x128xf16>
2020
%rootl = iree_vector_ext.to_layout %root to layout(#nested) : vector<128x128x128xf16>
2121
// CHECK-DAG: %[[B:.*]] = iree_vector_ext.to_simt %{{.*}} : vector<128x128x128xf16> -> vector<8x2x4x1x4x4x1x8x2xf16>
22-
// CHECK-DAG: %[[C:.*]] = arith.mulf %[[B]], %[[ROOT]] {{.*}} : vector<8x2x4x1x4x4x1x8x2xf16>
22+
// CHECK-DAG: %[[C:.*]] = arith.mulf %[[B]], %[[ROOT]] : vector<8x2x4x1x4x4x1x8x2xf16>
2323
%c = arith.mulf %rootl, %b : vector<128x128x128xf16>
2424
// CHECK-DAG: %[[A:.*]] = iree_vector_ext.to_simt %{{.*}} : vector<128x128x128xf16> -> vector<8x2x4x1x4x4x1x8x2xf16>
25-
// CHECK-DAG: %[[D:.*]] = arith.addf %[[C]], %[[A]] fastmath<reassoc,nnan> {{.*}} : vector<8x2x4x1x4x4x1x8x2xf16>
25+
// CHECK-DAG: %[[D:.*]] = arith.addf %[[C]], %[[A]] fastmath<reassoc,nnan> : vector<8x2x4x1x4x4x1x8x2xf16>
2626
%d = arith.addf %c, %a fastmath<reassoc,nnan> : vector<128x128x128xf16>
2727
// CHECK: iree_vector_ext.to_simd %[[D]] : vector<8x2x4x1x4x4x1x8x2xf16> -> vector<128x128x128xf16>
2828
return %d : vector<128x128x128xf16>
@@ -51,10 +51,10 @@ func.func @distribute_scf_for(%a: vector<16x16xi32>, %b: vector<16x16xi32>) -> v
5151
// CHECK: iter_args(%[[ARG0:.*]] = %[[ROOT]]) -> (vector<1x1x1x1x16x16xi32>)
5252
%out = scf.for %i = %c0 to %c128 step %c1 iter_args(%arg0 = %rootl) -> (vector<16x16xi32>) {
5353
// CHECK-DAG: %[[B:.*]] = iree_vector_ext.to_simt %{{.*}} : vector<16x16xi32> -> vector<1x1x1x1x16x16xi32>
54-
// CHECK-DAG: %[[C:.*]] = arith.muli %[[ARG0]], %[[B]] {{.*}} : vector<1x1x1x1x16x16xi32>
54+
// CHECK-DAG: %[[C:.*]] = arith.muli %[[ARG0]], %[[B]] : vector<1x1x1x1x16x16xi32>
5555
%c = arith.muli %arg0, %b : vector<16x16xi32>
5656
// CHECK-DAG: %[[A:.*]] = iree_vector_ext.to_simt %{{.*}} : vector<16x16xi32> -> vector<1x1x1x1x16x16xi32>
57-
// CHECK-DAG: %[[D:.*]] = arith.addi %[[C]], %[[A]] {{.*}} : vector<1x1x1x1x16x16xi32>
57+
// CHECK-DAG: %[[D:.*]] = arith.addi %[[C]], %[[A]] : vector<1x1x1x1x16x16xi32>
5858
%d = arith.addi %c, %a : vector<16x16xi32>
5959
// CHECK: scf.yield %[[D]] : vector<1x1x1x1x16x16xi32>
6060
scf.yield %d : vector<16x16xi32>

compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_vector_distribute_gfx942.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
9090

9191
// CHECK-LABEL: func.func @matmul_256x256x256_f16_f16()
9292
// CHECK: scf.for {{.*}} = %c0 to %c256 step %c128 iter_args(%[[ARG:.+]] = {{.*}}) -> (vector<2x2x1x1x4x1xf16>)
93-
// CHECK: arith.extf %[[ARG]] {{.*}} : vector<2x2x1x1x4x1xf16> to vector<2x2x1x1x4x1xf32>
93+
// CHECK: arith.extf %[[ARG]] : vector<2x2x1x1x4x1xf16> to vector<2x2x1x1x4x1xf32>
9494
// CHECK-COUNT-32: amdgpu.mfma {{.*}} {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32>
9595
// CHECK: %[[TRUNC:.+]] = arith.truncf %{{.*}} : vector<2x2x1x1x4x1xf32> to vector<2x2x1x1x4x1xf16>
9696
// CHECK: scf.yield %[[TRUNC]] : vector<2x2x1x1x4x1xf16>
@@ -157,7 +157,7 @@ hal.executable.variant @rocm target(<"rocm", "rocm-hsaco-fb">) {
157157
// This has more than 2 iteartions. So we have prefetching enabled for this case. Due to
158158
// prefetching, we have one iteration peeled of so upper bound is 2048 - 128 = 1920.
159159
// CHECK: scf.for {{.*}} = %c0 to %c1920 step %c128 iter_args(%[[ARG:.+]] = {{.*}}) -> (vector<4x1x1x1x4x1xf16>)
160-
// CHECK: arith.extf %[[ARG]] {{.*}} : vector<4x1x1x1x4x1xf16> to vector<4x1x1x1x4x1xf32>
160+
// CHECK: arith.extf %[[ARG]] : vector<4x1x1x1x4x1xf16> to vector<4x1x1x1x4x1xf32>
161161
// CHECK-COUNT-32: amdgpu.mfma {{.*}} {blocks = 1 : i32, k = 16 : i32, m = 16 : i32, n = 16 : i32} blgp = none : vector<4xf16>, vector<4xf16>, vector<4xf32>
162162
// CHECK: %[[TRUNC:.+]] = arith.truncf %{{.*}} : vector<4x1x1x1x4x1xf32> to vector<4x1x1x1x4x1xf16>
163163
// CHECK: scf.yield %[[TRUNC]] : vector<4x1x1x1x4x1xf16>

0 commit comments

Comments
 (0)