11// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=PRODUCER-CONSUMER
22// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{fusion-maximal mode=sibling}))' -split-input-file | FileCheck %s --check-prefix=SIBLING-MAXIMAL
3+ // All fusion: producer-consumer and sibling.
4+ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion))' -split-input-file
35// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(spirv.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=SPIRV
46
57// Part I of fusion tests in mlir/test/Transforms/loop-fusion.mlir.
@@ -108,6 +110,7 @@ func.func @check_src_dst_step(%m : memref<100xf32>,
108110func.func @reduce_add_non_maximal_f32_f32 (%arg0: memref <64 x64 xf32 , 1 >, %arg1 : memref <1 x64 xf32 , 1 >, %arg2 : memref <1 x64 xf32 , 1 >) {
109111 %cst_0 = arith.constant 0.000000e+00 : f32
110112 %cst_1 = arith.constant 1.000000e+00 : f32
113+ // This nest writes to %arg1 but can be eliminated post sibling fusion.
111114 affine.for %arg3 = 0 to 1 {
112115 affine.for %arg4 = 0 to 64 {
113116 %accum = affine.for %arg5 = 0 to 64 iter_args (%prevAccum = %cst_0 ) -> f32 {
@@ -137,11 +140,11 @@ func.func @reduce_add_non_maximal_f32_f32(%arg0: memref<64x64xf32, 1>, %arg1 : m
137140// since the destination loop and source loop trip counts do not
138141// match.
139142// SIBLING-MAXIMAL: %[[cst_0:.*]] = arith.constant 0.000000e+00 : f32
140- // SIBLING-MAXIMAL-NEXT: %[[cst_1:.*]] = arith.constant 1.000000e+00 : f32
141- // SIBLING-MAXIMAL-NEXT: affine.for %[[idx_0:.*]] = 0 to 1 {
142- // SIBLING-MAXIMAL-NEXT: affine.for %[[idx_1:.*]] = 0 to 64 {
143- // SIBLING-MAXIMAL-NEXT: %[[result_1:.*]] = affine.for %[[idx_2:.*]] = 0 to 32 iter_args(%[[iter_0:.*]] = %[[cst_1]]) -> (f32) {
144- // SIBLING-MAXIMAL-NEXT: %[[result_0:.*]] = affine.for %[[idx_3:.*]] = 0 to 64 iter_args(%[[iter_1:.*]] = %[[cst_0]]) -> (f32) {
143+ // SIBLING-MAXIMAL-NEXT: %[[cst_1:.*]] = arith.constant 1.000000e+00 : f32
144+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 1 {
145+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 64 {
146+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 32 iter_args(%{{.*}} = %[[cst_1]]) -> (f32) {
147+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 64 iter_args(%{{.*}} = %[[cst_0]]) -> (f32) {
145148
146149// -----
147150
@@ -315,11 +318,16 @@ func.func @same_memref_load_store(%producer : memref<32xf32>, %consumer: memref<
315318 return
316319}
317320
321+ // -----
322+
318323// PRODUCER-CONSUMER-LABEL: func @same_memref_load_multiple_stores
324+ // ALL-LABEL: func @same_memref_load_multiple_stores
319325func.func @same_memref_load_multiple_stores (%producer : memref <32 xf32 >, %producer_2 : memref <32 xf32 >, %consumer: memref <16 xf32 >){
320326 %cst = arith.constant 2.000000e+00 : f32
321- // Source isn't removed.
327+ // Ensure that source isn't removed during both producer-consumer fusion and
328+ // sibling fusion.
322329 // PRODUCER-CONSUMER: affine.for %{{.*}} = 0 to 32
330+ // ALL: affine.for %{{.*}} = 0 to 32
323331 affine.for %arg3 = 0 to 32 {
324332 %0 = affine.load %producer [%arg3 ] : memref <32 xf32 >
325333 %2 = arith.mulf %0 , %cst : f32
@@ -343,5 +351,8 @@ func.func @same_memref_load_multiple_stores(%producer : memref<32xf32>, %produce
343351 // PRODUCER-CONSUMER-NEXT: arith.addf
344352 // PRODUCER-CONSUMER-NEXT: affine.store
345353 // PRODUCER-CONSUMER-NEXT: }
354+ // ALL: affine.for %{{.*}} = 0 to 16
355+ // ALL: mulf
356+ // ALL: addf
346357 return
347358}
0 commit comments