Skip to content

Commit 1f322ce

Browse files
authored
[Codegen] Test Cleanup 2/8: Common GPU tests (#22745)
Result of a scan over all tests in Codegen to cleanup common issues in tests. A summary of the results + a preamble approximating the issues to look for can be found here: https://gist.github.com/qedawkins/40f9e604fd83745bf1ac20fd63a7a61f
1 parent acefc23 commit 1f322ce

File tree

43 files changed

+320
-428
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+320
-428
lines changed

compiler/src/iree/compiler/Codegen/Common/GPU/test/decompose_horizontally_fused_gemms.mlir

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@ func.func @fused_contraction_1(%arg0: tensor<2x4096x640xf16>,
44
%arg1: tensor<10x64x640xf16>, %arg2: tensor<10x64x640xf16>,
55
%arg3: tensor<10x64x640xf16>)
66
-> (tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>) {
7-
%0 = tensor.empty() : tensor<2x10x4096x64xf16>
8-
%1 = tensor.empty() : tensor<2x10x4096x64xf32>
7+
%0 = tensor.empty() : tensor<2x10x4096x64xf32>
98
%cst = arith.constant 0.000000e+00 : f32
10-
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<2x10x4096x64xf32>) -> tensor<2x10x4096x64xf32>
11-
%3:3 = linalg.generic {
9+
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<2x10x4096x64xf32>) -> tensor<2x10x4096x64xf32>
10+
%2:3 = linalg.generic {
1211
indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d2, d4)>,
1312
affine_map<(d0, d1, d2, d3, d4) -> (d1, d3, d4)>,
1413
affine_map<(d0, d1, d2, d3, d4) -> (d1, d3, d4)>,
@@ -18,7 +17,7 @@ func.func @fused_contraction_1(%arg0: tensor<2x4096x640xf16>,
1817
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>],
1918
iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction"]}
2019
ins(%arg0, %arg1, %arg2, %arg3 : tensor<2x4096x640xf16>, tensor<10x64x640xf16>, tensor<10x64x640xf16>, tensor<10x64x640xf16>)
21-
outs(%2, %2, %2 : tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>)
20+
outs(%1, %1, %1 : tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>)
2221
attrs = {
2322
lowering_config = #iree_gpu.lowering_config<{
2423
mma_kind = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>, promote_operands = [0, 1, 2, 3],
@@ -36,7 +35,7 @@ func.func @fused_contraction_1(%arg0: tensor<2x4096x640xf16>,
3635
%16 = arith.addf %out_4, %15 : f32
3736
linalg.yield %10, %13, %16 : f32, f32, f32
3837
} -> (tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>)
39-
return %3#0, %3#1, %3#2 : tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>
38+
return %2#0, %2#1, %2#2 : tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>, tensor<2x10x4096x64xf32>
4039
}
4140
// CHECK-LABEL: func @fused_contraction_1
4241
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<2x4096x640xf16>
@@ -106,10 +105,9 @@ func.func @fused_contraction_2(%arg0: tensor<4096x640xf32>,
106105
%arg1: tensor<640x640xf32>, %arg2: tensor<640x640xf32>, %arg3: tensor<640x640xf32>)
107106
-> (tensor<4096x640xf32>, tensor<4096x640xf32>, tensor<4096x640xf32>) {
108107
%0 = tensor.empty() : tensor<4096x640xf32>
109-
%1 = tensor.empty() : tensor<4096x640xf32>
110108
%cst = arith.constant 0.000000e+00 : f32
111-
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4096x640xf32>) -> tensor<4096x640xf32>
112-
%3:3 = linalg.generic {
109+
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4096x640xf32>) -> tensor<4096x640xf32>
110+
%2:3 = linalg.generic {
113111
indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
114112
affine_map<(d0, d1, d2) -> (d2, d1)>,
115113
affine_map<(d0, d1, d2) -> (d2, d1)>,
@@ -119,7 +117,7 @@ func.func @fused_contraction_2(%arg0: tensor<4096x640xf32>,
119117
affine_map<(d0, d1, d2) -> (d0, d1)>],
120118
iterator_types = ["parallel", "parallel", "reduction"]}
121119
ins(%arg0, %arg1, %arg2, %arg3 : tensor<4096x640xf32>, tensor<640x640xf32>, tensor<640x640xf32>, tensor<640x640xf32>)
122-
outs(%2, %2, %2 : tensor<4096x640xf32>, tensor<4096x640xf32>, tensor<4096x640xf32>)
120+
outs(%1, %1, %1 : tensor<4096x640xf32>, tensor<4096x640xf32>, tensor<4096x640xf32>)
123121
attrs = {
124122
lowering_config = #iree_gpu.lowering_config<{
125123
mma_kind = #iree_gpu.mma_layout<MFMA_F32_16x16x4_F32>, promote_operands = [0, 1, 2, 3],
@@ -133,13 +131,9 @@ func.func @fused_contraction_2(%arg0: tensor<4096x640xf32>,
133131
%9 = arith.addf %out_4, %8 : f32
134132
linalg.yield %5, %7, %9 : f32, f32, f32
135133
} -> (tensor<4096x640xf32>, tensor<4096x640xf32>, tensor<4096x640xf32>)
136-
return %3#0, %3#1, %3#2 : tensor<4096x640xf32>, tensor<4096x640xf32>, tensor<4096x640xf32>
134+
return %2#0, %2#1, %2#2 : tensor<4096x640xf32>, tensor<4096x640xf32>, tensor<4096x640xf32>
137135
}
138136
// CHECK-LABEL: func @fused_contraction_2
139-
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<4096x640xf32>
140-
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<640x640xf32>
141-
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<640x640xf32>
142-
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: tensor<640x640xf32>
143137
// CHECK: %[[FILL:.+]] = linalg.fill
144138
// CHECK: %[[GENERIC0:.+]] = linalg.generic
145139
// CHECK: ^bb0(%[[B0_0:[a-zA-Z0-9_]+]]: f32, %[[B1_0:[a-zA-Z0-9_]+]]: f32, %[[B2_0:[a-zA-Z0-9_]+]]: f32

compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_alloc_private_memory_for_dps_ops.mlir

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ func.func @unused_result_copied(%arg0: !iree_tensor_ext.dispatch.tensor<readonly
1919
// CHECK-LABEL: func.func @big_result_not_copied
2020
// CHECK-NOT: bufferization.alloc_tensor()
2121
func.func @big_result_not_copied(%arg0: !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x33xf32>>, %arg1: tensor<1x33xi64>) -> tensor<1x33xi64> {
22-
%cst = arith.constant dense<1.000000e+00> : tensor<1x33xf32>
2322
%2 = iree_tensor_ext.dispatch.tensor.load %arg0, offsets = [0, 0], sizes = [1, 33], strides = [1, 1] : !iree_tensor_ext.dispatch.tensor<readonly:tensor<1x33xf32>> -> tensor<1x33xf32>
2423
%3:2 = iree_linalg_ext.sort dimension(1) outs(%2, %arg1 :tensor<1x33xf32>, tensor<1x33xi64>) {
2524
^bb0(%arg4: f32, %arg5: f32, %arg6: i64, %arg7: i64):
@@ -45,9 +44,9 @@ func.func @used_result_not_copied(%arg0: !iree_tensor_ext.dispatch.tensor<readon
4544

4645
// -----
4746

48-
// CHECK-LABEL: func @memref_semantics(
47+
// CHECK-LABEL: func.func @memref_semantics(
4948
// CHECK-SAME: %[[DEST:.+]]: memref<?x?xf32>
50-
// CHECK: linalg.fill {{.*}} outs(%[[DEST]]
49+
// CHECK: linalg.fill ins(%{{.+}}) outs(%[[DEST]]
5150
func.func @memref_semantics(%dest: memref<?x?xf32>) {
5251
%cst = arith.constant 0.000000e+00 : f32
5352
linalg.fill ins(%cst : f32) outs(%dest : memref<?x?xf32>)

compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_derived_thread_config.mlir

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
module {
66
func.func @inferred_add_tensor(%3: tensor<64x256xf32>, %4: tensor<64x256xf32>, %5: tensor<64x256xf32>) -> tensor<64x256xf32>
77
attributes {
8-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [16, 32, 1] subgroup_size = 64, {}>
8+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [16, 32, 1] subgroup_size = 64>
99
} {
1010
%6 = linalg.generic {
1111
indexing_maps = [#map, #map, #map],
1212
iterator_types = ["parallel", "parallel"]
13-
} ins(%3, %4 : tensor<64x256xf32>, tensor<64x256xf32>) outs(%5 : tensor<64x256xf32>) attrs = {lowering_config = #config} {
13+
} ins(%3, %4 : tensor<64x256xf32>, tensor<64x256xf32>) outs(%5 : tensor<64x256xf32>) attrs = {lowering_config = #config} {
1414
^bb0(%in: f32, %in_0: f32, %out: f32):
1515
%7 = arith.addf %in, %in_0 : f32
1616
linalg.yield %7 : f32
@@ -32,12 +32,12 @@ module {
3232
module {
3333
func.func @inferred_dynamic(%3: tensor<?x?xf32>, %4: tensor<?x?xf32>, %5: tensor<?x?xf32>) -> tensor<?x?xf32>
3434
attributes {
35-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [16, 32, 1] subgroup_size = 64, {}>
35+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [16, 32, 1] subgroup_size = 64>
3636
} {
3737
%6 = linalg.generic {
3838
indexing_maps = [#map, #map, #map],
3939
iterator_types = ["parallel", "parallel"]
40-
} ins(%3, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%5 : tensor<?x?xf32>) attrs = {lowering_config = #config} {
40+
} ins(%3, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%5 : tensor<?x?xf32>) attrs = {lowering_config = #config} {
4141
^bb0(%in: f32, %in_0: f32, %out: f32):
4242
%7 = arith.addf %in, %in_0 : f32
4343
linalg.yield %7 : f32
@@ -62,12 +62,12 @@ module {
6262
module {
6363
func.func @inferred_small_inner_dim(%3: tensor<8x2xf32>, %4: tensor<8x2xf32>, %5: tensor<8x2xf32>) -> tensor<8x2xf32>
6464
attributes {
65-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [16, 32, 1] subgroup_size = 64, {}>
65+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [16, 32, 1] subgroup_size = 64>
6666
} {
6767
%6 = linalg.generic {
6868
indexing_maps = [#map, #map, #map],
6969
iterator_types = ["parallel", "parallel"]
70-
} ins(%3, %4 : tensor<8x2xf32>, tensor<8x2xf32>) outs(%5 : tensor<8x2xf32>) attrs = {lowering_config = #config} {
70+
} ins(%3, %4 : tensor<8x2xf32>, tensor<8x2xf32>) outs(%5 : tensor<8x2xf32>) attrs = {lowering_config = #config} {
7171
^bb0(%in: f32, %in_0: f32, %out: f32):
7272
%7 = arith.addf %in, %in_0 : f32
7373
linalg.yield %7 : f32
@@ -84,11 +84,10 @@ module {
8484

8585
// -----
8686

87-
#map = affine_map<(d0, d1) -> (d0, d1)>
8887
module {
8988
func.func @inferred_small_inner_dim_fill_vector_sizes(%0: tensor<4x16x8x4x16x2x4xf16>, %1: tensor<4x16x8x4x16x2x4xf16>) -> tensor<4x16x8x4x16x2x4xf16>
9089
attributes {
91-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [256, 1, 1] subgroup_size = 64, {}>
90+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [256, 1, 1] subgroup_size = 64>
9291
} {
9392
%2 = linalg.copy {lowering_config = #iree_gpu.derived_thread_config}
9493
ins(%0 : tensor<4x16x8x4x16x2x4xf16>)
@@ -105,12 +104,11 @@ module {
105104

106105
// -----
107106

108-
#map = affine_map<(d0, d1) -> (d0, d1)>
109107
module {
110108
func.func @inferred_small_inner_dim_dont_fill_non_contiguous(
111109
%0: tensor<4x16x4x4xf16>, %1: tensor<4x16x4x4xf16>) -> tensor<4x16x4x4xf16>
112110
attributes {
113-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [64, 1, 1] subgroup_size = 64, {}>
111+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [64, 1, 1] subgroup_size = 64>
114112
} {
115113
%2 = linalg.copy {lowering_config = #iree_gpu.derived_thread_config}
116114
ins(%0 : tensor<4x16x4x4xf16>)
@@ -127,11 +125,10 @@ module {
127125

128126
// -----
129127

130-
#map = affine_map<(d0, d1) -> (d0, d1)>
131128
module {
132129
func.func @inferred_unaligned(%0: tensor<70xf16>, %1: tensor<70xf16>) -> tensor<70xf16>
133130
attributes {
134-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [64, 1, 1] subgroup_size = 64, {}>
131+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [64, 1, 1] subgroup_size = 64>
135132
} {
136133
%2 = linalg.copy {lowering_config = #iree_gpu.derived_thread_config}
137134
ins(%0 : tensor<70xf16>)
@@ -148,11 +145,10 @@ module {
148145

149146
// -----
150147

151-
#map = affine_map<(d0, d1) -> (d0, d1)>
152148
module {
153149
func.func @inferred_smaller_load(%0: tensor<128xf16>, %1: tensor<128xf16>) -> tensor<128xf16>
154150
attributes {
155-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [64, 1, 1] subgroup_size = 64, {}>
151+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [64, 1, 1] subgroup_size = 64>
156152
} {
157153
%2 = linalg.copy {lowering_config = #iree_gpu.derived_thread_config}
158154
ins(%0 : tensor<128xf16>)
@@ -173,7 +169,7 @@ module {
173169
module {
174170
func.func @inferred_im2col(%2: tensor<2x34x34x128xf16>, %3: tensor<2x128x8xf16>) -> tensor<2x128x8xf16>
175171
attributes {
176-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [16, 32, 1] subgroup_size = 64, {}>
172+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [16, 32, 1] subgroup_size = 64>
177173
} {
178174
%4 = iree_linalg_ext.im2col {lowering_config = #config}
179175
strides = [1, 1] dilations = [1, 1] kernel_size = [3, 3]
@@ -198,7 +194,7 @@ module {
198194
module {
199195
func.func @inferred_im2col_batch_last(%2: tensor<16x26x18x32xbf16>, %3: tensor<32x1x1x32xbf16>) -> tensor<32x1x1x32xbf16>
200196
attributes {
201-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [256, 1, 1] subgroup_size = 64, {}>
197+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [256, 1, 1] subgroup_size = 64>
202198
} {
203199
%4 = iree_linalg_ext.im2col {lowering_config = #config}
204200
strides = [1, 1] dilations = [1, 1] kernel_size = [24, 16]
@@ -220,31 +216,29 @@ module {
220216
// -----
221217

222218
#config = #iree_gpu.derived_thread_config
223-
func.func @scatter(%arg0: tensor<3x32x16xf32>, %arg1: tensor<3x1xi32>) -> tensor<3x32x16xf32>
219+
func.func @scatter(%arg0: tensor<3x32x16xf32>, %arg1: tensor<3x1xi32>, %arg2: tensor<3x32x16xf32>) -> tensor<3x32x16xf32>
224220
attributes {
225-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [64, 1, 1] subgroup_size = 64, {}>
221+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [64, 1, 1] subgroup_size = 64>
226222
} {
227-
%cst = arith.constant 0.000000e+00 : f32
228-
%0 = tensor.empty() : tensor<3x32x16xf32>
229223
%1 = iree_linalg_ext.scatter {lowering_config = #config} dimension_map = [0] unique_indices(true)
230-
ins(%arg0, %arg1 : tensor<3x32x16xf32>, tensor<3x1xi32>) outs(%0 : tensor<3x32x16xf32>) {
231-
^bb0(%arg2: f32, %arg3: f32):
232-
iree_linalg_ext.yield %arg2 : f32
224+
ins(%arg0, %arg1 : tensor<3x32x16xf32>, tensor<3x1xi32>) outs(%arg2 : tensor<3x32x16xf32>) {
225+
^bb0(%in: f32, %out: f32):
226+
iree_linalg_ext.yield %in : f32
233227
} -> tensor<3x32x16xf32>
234228
return %1 : tensor<3x32x16xf32>
235229
}
236230

237231
// CHECK-LABEL: func.func @scatter
238232
// CHECK: scf.forall ({{.*}}) = (0, 0, 0) to (3, 32, 16) step (1, 1, 4)
239-
// CHECK: linalg_ext.scatter
233+
// CHECK: iree_linalg_ext.scatter
240234
// CHECK: scf.forall.in_parallel
241235

242236
// -----
243237

244238
#config = #iree_gpu.derived_thread_config
245239
func.func @map_scatter(%arg0: tensor<2x32xf32>, %arg1: tensor<64x256xf32>) -> tensor<64x256xf32>
246240
attributes {
247-
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [16, 32] subgroup_size = 64, {}>
241+
translation_info = #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [16, 32] subgroup_size = 64>
248242
} {
249243
%true = arith.constant true
250244
%1 = iree_linalg_ext.map_scatter {lowering_config = #config} %arg0 into %arg1 {

compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_padding_online_attention.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ func.func @online_attention_tile_then_pad(%query: tensor<192x1024x64xf32>, %key:
6969

7070
// CHECK: arith.constant 0xFF800000 : f32
7171
// CHECK-COUNT-3: tensor.pad
72-
// CHECK: iree_linalg_ext.online_attention {{.*}} ins(%{{[0-9a-z_]*}}, %{{[0-9a-z_]*}}, %{{[0-9a-z_]*}}, %{{[0-9a-z_]*}}, %{{[0-9a-z_]*}}
73-
// CHECK-SAME: : tensor<192x1024x64xf32>, tensor<192x32x64xf32>, tensor<192x32x64xf32>, f32, tensor<192x1024x32xf32>
72+
// CHECK: iree_linalg_ext.online_attention {{.*}} ins(%{{[A-Za-z0-9_]+}}, %{{[A-Za-z0-9_]+}}, %{{[A-Za-z0-9_]+}}, %{{[A-Za-z0-9_]+}}, %{{[A-Za-z0-9_]+}}
73+
// CHECK-SAME: : tensor<192x1024x64xf32>, tensor<192x32x64xf32>, tensor<192x32x64xf32>, f32, tensor<192x1024x32xf32>)
7474
%out:3 = iree_linalg_ext.online_attention
7575
{
7676
indexing_maps = [#mapQ, #mapK, #mapV, #mapS, #mapM, #mapO, #mapR, #mapR],

compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_padding_partial_reduction.mlir

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ func.func @sum_exp_sub_reduction(%arg0: tensor<1x?xf32>, %arg1: tensor<1xf32>, %
5858
// So we check that the selected value in the padded region is one of the
5959
// NaN values.
6060

61-
// CHECK-LABEL: max_reduction
61+
// CHECK-LABEL: func.func @max_reduction
6262
// CHECK-DAG: %[[NANVAL:.+]] = arith.constant 0xFFC00000 : f32
6363
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
6464
// CHECK-DAG: %[[DIMARG0:.+]] = tensor.dim %arg0, %[[C1]] : tensor<1x?xf32>
@@ -86,7 +86,7 @@ func.func @max_reduction(%arg0: tensor<1x?xf32>, %arg1: tensor<1xf32>) -> tensor
8686

8787
// -----
8888

89-
// CHECK-LABEL: min_reduction
89+
// CHECK-LABEL: func.func @min_reduction
9090
// CHECK-DAG: %[[NANVAL:.+]] = arith.constant 0x7FC00000 : f32
9191
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
9292
// CHECK-DAG: %[[DIMARG0:.+]] = tensor.dim %arg0, %[[C1]] : tensor<1x?xf32>
@@ -117,7 +117,7 @@ func.func @min_reduction(%arg0: tensor<1x?xf32>, %arg1: tensor<1xf32>) -> tensor
117117

118118
// This reduction corresponds to a standard inner product.
119119

120-
// CHECK-LABEL: standard_inner_product
120+
// CHECK-LABEL: func.func @standard_inner_product
121121
// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f16
122122
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
123123
// CHECK-DAG: %[[DIMARG0:.+]] = tensor.dim %arg0, %[[C1]] : tensor<1x?xf16>
@@ -150,7 +150,7 @@ func.func @standard_inner_product(%arg0 : tensor<1x?xf16>, %arg1 : tensor<1x?xf1
150150
// Inner product where the accumulation (add) is in f16 but the multiplication is in f32
151151
// Check for an f16 zero as the reduction identity.
152152

153-
// CHECK-LABEL: standard_inner_product_with_trunc
153+
// CHECK-LABEL: func.func @standard_inner_product_with_trunc
154154
// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 : f16
155155
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
156156
// CHECK-DAG: %[[DIMARG0:.+]] = tensor.dim %arg0, %[[C1]] : tensor<1x?xf32>
@@ -189,7 +189,7 @@ func.func @standard_inner_product_with_trunc(%arg0 : tensor<1x?xf32>, %arg1 : te
189189
// In this example, the reduction type is multiplicative, so we check that
190190
// the value selected in the padded part of the iteration space is 1, the multiplicative identity.
191191

192-
// CHECK-LABEL: product_of_sum_reduction
192+
// CHECK-LABEL: func.func @product_of_sum_reduction
193193
// CHECK: %[[ONE:.+]] = arith.constant 1.000000e+00 : f16
194194
// CHECK: %[[CMP:.+]] = arith.cmpi
195195
// CHECK: %[[ADD:.+]] = arith.addf
@@ -215,7 +215,7 @@ func.func @product_of_sum_reduction(%arg0 : tensor<1x?xf16>, %arg1 : tensor<1x?x
215215
// Reductions in multiple dimensions have a 2-D region to check for padding.
216216
// Check for 2 compare ops, and an 'and' to combine them
217217

218-
// CHECK-LABEL: multi_dim_reduction
218+
// CHECK-LABEL: func.func @multi_dim_reduction
219219
// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z]+]]: tensor<?x?xf16>, %
220220
// CHECK-DAG: %[[ZEROF16:.+]] = arith.constant 0.000000e+00 : f16
221221
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
@@ -253,11 +253,11 @@ func.func @multi_dim_reduction(%arg0 : tensor<?x?xf16>, %arg1 : tensor<?x?xf16>,
253253

254254
// Multiple reductions in parallel in a linalg.generic op.
255255

256-
// CHECK-LABEL: minmax_reduction
256+
// CHECK-LABEL: func.func @minmax_reduction
257257
// CHECK-DAG: %[[NAN0:.+]] = arith.constant 0xFFC00000 : f32
258258
// CHECK-DAG: %[[NAN1:.+]] = arith.constant 0x7FC00000 : f32
259-
// CHECK-DAG: %[[SELECT0:.+]] = arith.select {{.*}} %[[NAN0]] : f32
260-
// CHECK-DAG: %[[SELECT0:.+]] = arith.select {{.*}} %[[NAN1]] : f32
259+
// CHECK-DAG: {{.+}} = arith.select {{.+}}, {{.+}}, %[[NAN0]] : f32
260+
// CHECK-DAG: {{.+}} = arith.select {{.+}}, {{.+}}, %[[NAN1]] : f32
261261
#map = affine_map<(d0, d1) -> (d0, d1)>
262262
#map1 = affine_map<(d0, d1) -> (d0)>
263263
func.func @minmax_reduction(%arg0: tensor<1x?xf32>, %arg1: tensor<1xf32>, %arg2 : tensor<1xf32>) -> (tensor<1xf32>, tensor<1xf32>) {
@@ -280,7 +280,7 @@ func.func @minmax_reduction(%arg0: tensor<1x?xf32>, %arg1: tensor<1xf32>, %arg2
280280
// The reduction dimension is perfectly tiled by the partial reduction (1024 % 128 == 0).
281281
// We confirm that we directly optimize this case, where the arith.select condition is always true.
282282

283-
// CHECK-LABEL: reduction_static_complete_tile
283+
// CHECK-LABEL: func.func @reduction_static_complete_tile
284284
// CHECK-NOT: arith.select
285285
#map = affine_map<(d0, d1) -> (d0, d1)>
286286
#map1 = affine_map<(d0, d1) -> (d0)>
@@ -301,7 +301,7 @@ func.func @reduction_static_complete_tile(%arg0: tensor<1x1024xf32>, %arg1: tens
301301

302302
// The reduction dimension is not perfectly tiled by the partial reduction (1024 % 100 != 0).
303303

304-
// CHECK-LABEL: reduction_static_incomplete_tile
304+
// CHECK-LABEL: func.func @reduction_static_incomplete_tile
305305
// CHECK: arith.select
306306
#map = affine_map<(d0, d1) -> (d0, d1)>
307307
#map1 = affine_map<(d0, d1) -> (d0)>

0 commit comments

Comments
 (0)