11// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=PRODUCER-CONSUMER
22// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{mode=producer fusion-maximal}))' -split-input-file | FileCheck %s --check-prefix=PRODUCER-CONSUMER-MAXIMAL
33// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{fusion-maximal mode=sibling}))' -split-input-file | FileCheck %s --check-prefix=SIBLING-MAXIMAL
4+ // All fusion: producer-consumer and sibling.
5+ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion))' -split-input-file | FileCheck %s --check-prefix=ALL
46// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(spirv.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=SPIRV
57
68// Part I of fusion tests in mlir/test/Transforms/loop-fusion.mlir.
@@ -109,6 +111,7 @@ func.func @check_src_dst_step(%m : memref<100xf32>,
109111func.func @reduce_add_non_maximal_f32_f32 (%arg0: memref <64 x64 xf32 , 1 >, %arg1 : memref <1 x64 xf32 , 1 >, %arg2 : memref <1 x64 xf32 , 1 >) {
110112 %cst_0 = arith.constant 0.000000e+00 : f32
111113 %cst_1 = arith.constant 1.000000e+00 : f32
114+ // This nest writes to %arg1 but can be eliminated post sibling fusion.
112115 affine.for %arg3 = 0 to 1 {
113116 affine.for %arg4 = 0 to 64 {
114117 %accum = affine.for %arg5 = 0 to 64 iter_args (%prevAccum = %cst_0 ) -> f32 {
@@ -138,11 +141,11 @@ func.func @reduce_add_non_maximal_f32_f32(%arg0: memref<64x64xf32, 1>, %arg1 : m
138141// since the destination loop and source loop trip counts do not
139142// match.
140143// SIBLING-MAXIMAL: %[[cst_0:.*]] = arith.constant 0.000000e+00 : f32
141- // SIBLING-MAXIMAL-NEXT: %[[cst_1:.*]] = arith.constant 1.000000e+00 : f32
142- // SIBLING-MAXIMAL-NEXT: affine.for %[[idx_0:.*]] = 0 to 1 {
143- // SIBLING-MAXIMAL-NEXT: affine.for %[[idx_1:.*]] = 0 to 64 {
144- // SIBLING-MAXIMAL-NEXT: %[[result_1:.*]] = affine.for %[[idx_2:.*]] = 0 to 32 iter_args(%[[iter_0:.*]] = %[[cst_1]]) -> (f32) {
145- // SIBLING-MAXIMAL-NEXT: %[[result_0:.*]] = affine.for %[[idx_3:.*]] = 0 to 64 iter_args(%[[iter_1:.*]] = %[[cst_0]]) -> (f32) {
144+ // SIBLING-MAXIMAL-NEXT: %[[cst_1:.*]] = arith.constant 1.000000e+00 : f32
145+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 1 {
146+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 64 {
147+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 32 iter_args(%{{.*}} = %[[cst_1]]) -> (f32) {
148+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 64 iter_args(%{{.*}} = %[[cst_0]]) -> (f32) {
146149
147150// -----
148151
@@ -316,11 +319,16 @@ func.func @same_memref_load_store(%producer : memref<32xf32>, %consumer: memref<
316319 return
317320}
318321
322+ // -----
323+
319324// PRODUCER-CONSUMER-LABEL: func @same_memref_load_multiple_stores
325+ // ALL-LABEL: func @same_memref_load_multiple_stores
320326func.func @same_memref_load_multiple_stores (%producer : memref <32 xf32 >, %producer_2 : memref <32 xf32 >, %consumer: memref <16 xf32 >){
321327 %cst = arith.constant 2.000000e+00 : f32
322- // Source isn't removed.
328+ // Ensure that source isn't removed during both producer-consumer fusion and
329+ // sibling fusion.
323330 // PRODUCER-CONSUMER: affine.for %{{.*}} = 0 to 32
331+ // ALL: affine.for %{{.*}} = 0 to 32
324332 affine.for %arg3 = 0 to 32 {
325333 %0 = affine.load %producer [%arg3 ] : memref <32 xf32 >
326334 %2 = arith.mulf %0 , %cst : f32
@@ -344,6 +352,9 @@ func.func @same_memref_load_multiple_stores(%producer : memref<32xf32>, %produce
344352 // PRODUCER-CONSUMER-NEXT: arith.addf
345353 // PRODUCER-CONSUMER-NEXT: affine.store
346354 // PRODUCER-CONSUMER-NEXT: }
355+ // ALL: affine.for %{{.*}} = 0 to 16
356+ // ALL: mulf
357+ // ALL: addf
347358 return
348359}
349360
0 commit comments