1111#map1 = affine_map <(d0 , d1 , d2 , d3 , d4 ) -> (d1 , d3 , d4 )>
1212#map2 = affine_map <(d0 , d1 , d2 , d3 , d4 ) -> (d0 , d1 , d2 , d3 )>
1313func.func @expanded_matmul_transpose_b (%lhs: tensor <2 x64 x2048 xf16 >, %rhs: tensor <10 x64 x2048 xf16 >) -> tensor <2 x10 x64 x64 xf32 > {
14- %c0 = arith.constant 0 : index
1514 %cst = arith.constant 0.000000e+00 : f32
1615 %5 = tensor.empty () : tensor <2 x10 x64 x64 xf32 >
1716 %6 = linalg.fill ins (%cst : f32 ) outs (%5 : tensor <2 x10 x64 x64 xf32 >) -> tensor <2 x10 x64 x64 xf32 >
@@ -49,7 +48,6 @@ func.func @expanded_matmul_transpose_b(%lhs: tensor<2x64x2048xf16>, %rhs: tensor
4948#map1 = affine_map <(d0 , d1 , d2 , d3 , d4 , d5 ) -> (d1 , d3 , d4 , d5 )>
5049#map2 = affine_map <(d0 , d1 , d2 , d3 , d4 , d5 ) -> (d0 , d1 , d2 , d3 )>
5150func.func @multi_dim_mma_schedule (%lhs: tensor <10 x32 x128 x16 xf16 >, %rhs: tensor <4 x32 x128 x16 xf16 >) -> tensor <10 x4 x32 x32 xf32 > {
52- %c0 = arith.constant 0 : index
5351 %cst = arith.constant 0.000000e+00 : f32
5452 %5 = tensor.empty () : tensor <10 x4 x32 x32 xf32 >
5553 %6 = linalg.fill ins (%cst : f32 ) outs (%5 : tensor <10 x4 x32 x32 xf32 >) -> tensor <10 x4 x32 x32 xf32 >
@@ -119,7 +117,6 @@ func.func @dynamic_multi_dim_mma_schedule(%lhs: tensor<?x6x16x?x16xf16>, %rhs: t
119117
120118func.func @mfma_matmul_1024x1024x1024 (%lhs: tensor <1024 x1024 xf16 >, %rhs: tensor <1024 x1024 xf16 >) -> tensor <1024 x1024 xf32 > {
121119 %cst = arith.constant 0.000000e+00 : f32
122- %c0 = arith.constant 0 : index
123120 %5 = tensor.empty () : tensor <1024 x1024 xf32 >
124121 %6 = linalg.fill ins (%cst : f32 ) outs (%5 : tensor <1024 x1024 xf32 >) -> tensor <1024 x1024 xf32 >
125122 %7 = linalg.matmul ins (%lhs , %rhs : tensor <1024 x1024 xf16 >, tensor <1024 x1024 xf16 >) outs (%6 : tensor <1024 x1024 xf32 >) -> tensor <1024 x1024 xf32 >
@@ -212,7 +209,6 @@ func.func @mfma_matmul_m_aligned_intrinsic(%lhs: tensor<176x1024xi8>, %rhs: tens
212209
213210module {
214211 func.func @conv_nhwc (%3: tensor <2 x258 x514 x768 xf16 >, %4: tensor <3 x3 x768 x256 xf16 >) -> tensor <2 x256 x512 x256 xf32 > {
215- %c0 = arith.constant 0 : index
216212 %cst = arith.constant 0.000000e+00 : f32
217213 %5 = tensor.empty () : tensor <2 x256 x512 x256 xf32 >
218214 %6 = linalg.fill ins (%cst : f32 ) outs (%5 : tensor <2 x256 x512 x256 xf32 >) -> tensor <2 x256 x512 x256 xf32 >
@@ -249,7 +245,6 @@ func.func @matmul_dynamic_M(%arg0: tensor<?x256xf32>, %arg1: tensor<256x256xf32>
249245module {
250246 func.func @elementwise_dynamic_dim (%11: tensor <?x256 xf16 >, %12: tensor <?x256 xf16 >) -> tensor <?x256 xf16 > {
251247 %c0 = arith.constant 0 : index
252- %cst = arith.constant 0.000000e+00 : f32
253248 %8 = tensor.dim %11 , %c0 : tensor <?x256 xf16 >
254249 %13 = tensor.empty (%8 ) : tensor <?x256 xf16 >
255250 %15 = linalg.add ins (%11 , %12 : tensor <?x256 xf16 >, tensor <?x256 xf16 >) outs (%13 : tensor <?x256 xf16 >) -> tensor <?x256 xf16 >
@@ -266,7 +261,6 @@ module {
266261// -----
267262
268263func.func @elementwise_unaligned (%11: tensor <180 x180 xf16 >, %12: tensor <180 x180 xf16 >) -> tensor <180 x180 xf16 > {
269- %cst = arith.constant 0.000000e+00 : f32
270264 %13 = tensor.empty () : tensor <180 x180 xf16 >
271265 %15 = linalg.add ins (%11 , %12 : tensor <180 x180 xf16 >, tensor <180 x180 xf16 >) outs (%13 : tensor <180 x180 xf16 >) -> tensor <180 x180 xf16 >
272266 return %15 : tensor <180 x180 xf16 >
@@ -278,7 +272,6 @@ func.func @elementwise_unaligned(%11: tensor<180x180xf16>, %12: tensor<180x180xf
278272// -----
279273
280274func.func @elementwise_large_rank (%11: tensor <3 x5 x7 x11 x13 x17 x19 x23 xf16 >, %12: tensor <3 x5 x7 x11 x13 x17 x19 x23 xf16 >) -> tensor <3 x5 x7 x11 x13 x17 x19 x23 xf16 > {
281- %cst = arith.constant 0.000000e+00 : f32
282275 %13 = tensor.empty () : tensor <3 x5 x7 x11 x13 x17 x19 x23 xf16 >
283276 %15 = linalg.add ins (%11 , %12 : tensor <3 x5 x7 x11 x13 x17 x19 x23 xf16 >, tensor <3 x5 x7 x11 x13 x17 x19 x23 xf16 >) outs (%13 : tensor <3 x5 x7 x11 x13 x17 x19 x23 xf16 >) -> tensor <3 x5 x7 x11 x13 x17 x19 x23 xf16 >
284277 return %15 : tensor <3 x5 x7 x11 x13 x17 x19 x23 xf16 >
@@ -293,9 +286,6 @@ func.func @elementwise_large_rank(%11: tensor<3x5x7x11x13x17x19x23xf16>, %12: te
293286
294287func.func @multi_mma_data_tiled_unrolled_MFMA_F32_16x16x4_F32 (
295288 %3: tensor <1 x8 x8 x4 x16 x4 xf32 >, %4: tensor <1 x8 x4 x2 x4 x16 x4 xf32 >, %5: tensor <1 x1 x4 x8 x2 x4 x16 x4 xf32 >) -> tensor <1 x1 x4 x8 x2 x4 x16 x4 xf32 > {
296- %c0 = arith.constant 0 : index
297- %c65536 = arith.constant 65536 : index
298- %c131072 = arith.constant 131072 : index
299289 %6 = iree_codegen.inner_tiled ins (%3 , %4 ) outs (%5 ) {
300290 indexing_maps = [affine_map <(d0 , d1 , d2 ) -> (d0 , d2 )>,
301291 affine_map <(d0 , d1 , d2 ) -> (d1 , d2 )>,
@@ -323,9 +313,9 @@ func.func @multi_mma_data_tiled_unrolled_MFMA_F32_16x16x4_F32(
323313// -----
324314
325315func.func @unaligned_to_intrinsic_batched_matmul (%lhs : tensor <12 x8 x577 xf32 >, %rhs : tensor <12 x577 x577 xf32 >) -> tensor <12 x8 x577 xf32 > {
326- %c0 = arith.constant 0.0 : f32
316+ %cst = arith.constant 0.0 : f32
327317 %empty = tensor.empty () : tensor <12 x8 x577 xf32 >
328- %fill = linalg.fill ins (%c0 : f32 ) outs (%empty : tensor <12 x8 x577 xf32 >) -> tensor <12 x8 x577 xf32 >
318+ %fill = linalg.fill ins (%cst : f32 ) outs (%empty : tensor <12 x8 x577 xf32 >) -> tensor <12 x8 x577 xf32 >
329319 %mm = linalg.batch_matmul ins (%lhs , %rhs : tensor <12 x8 x577 xf32 >, tensor <12 x577 x577 xf32 >) outs (%fill : tensor <12 x8 x577 xf32 >) -> tensor <12 x8 x577 xf32 >
330320 return %mm : tensor <12 x8 x577 xf32 >
331321}
@@ -413,9 +403,9 @@ func.func @unaligned_dynamic_matmul_with_two_reduce_dim(%arg0: tensor<196x?x4xf3
413403// -----
414404
415405func.func @unaligned_to_intrinsic_batched_matmul_tiling_check (%lhs : tensor <12 x577 x577 xf32 >, %rhs : tensor <12 x577 x1024 xf32 >) -> tensor <12 x577 x1024 xf32 > {
416- %c0 = arith.constant 0.0 : f32
406+ %cst = arith.constant 0.0 : f32
417407 %empty = tensor.empty () : tensor <12 x577 x1024 xf32 >
418- %fill = linalg.fill ins (%c0 : f32 ) outs (%empty : tensor <12 x577 x1024 xf32 >) -> tensor <12 x577 x1024 xf32 >
408+ %fill = linalg.fill ins (%cst : f32 ) outs (%empty : tensor <12 x577 x1024 xf32 >) -> tensor <12 x577 x1024 xf32 >
419409 %mm = linalg.batch_matmul ins (%lhs , %rhs : tensor <12 x577 x577 xf32 >, tensor <12 x577 x1024 xf32 >) outs (%fill : tensor <12 x577 x1024 xf32 >) -> tensor <12 x577 x1024 xf32 >
420410 return %mm : tensor <12 x577 x1024 xf32 >
421411}
0 commit comments