@@ -765,10 +765,10 @@ transform.sequence failures(propagate) {
765765
766766// CHECK-LABEL:  func.func @no_hoisting_collapse_shape 
767767//       CHECK:    scf.for {{.*}} { 
768- //       CHECK:      vector.transfer_write 
769- //       CHECK:      vector.transfer_read 
770- //       CHECK:      vector.transfer_write 
771- //       CHECK:    } 
768+ //       CHECK:      vector.transfer_write {{.*}} : vector<4xi32>, memref<4xi32>  
769+ //       CHECK-NEXT :      vector.transfer_read {{.*}} : memref<1x4x1xi32>, vector<1x4x1xi32>  
770+ //       CHECK-NEXT :      vector.transfer_write {{.*}} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>  
771+ //       CHECK-NEXT :    } 
772772
773773func.func  @no_hoisting_collapse_shape (%in_0:  memref <1 x20 x1 xi32 >, %1:  memref <9 x1 xi32 >, %vec:  vector <4 xi32 >) {
774774  %c0_i32  = arith.constant  0  : i32 
@@ -827,3 +827,48 @@ transform.sequence failures(propagate) {
827827  transform.structured.hoist_redundant_vector_transfers  %0 
828828    : (!transform.any_op ) -> !transform.any_op 
829829}
830+ 
831+ // ----- 
832+ 
833+ // Regression test - hoisting the following `vector.transfer_{read|write}` pair 
834+ // would not be safe: 
835+ //    %lhs = vector.transfer_read %collapsed_1[%c0] 
836+ //    vector.transfer_write %op, %collapsed_1[%c0] 
837+ // That's because the following `vector.transfer_read` reads from the same 
838+ // memory (i.e. `%collapsed_1` and `%collapsed_2` alias): 
839+ //    %acc = vector.transfer_read %collapsed_2[%c0] 
840+ 
841+ // CHECK-LABEL:  func.func @no_hoisting_write_to_memref 
842+ //       CHECK:    scf.for {{.*}} { 
843+ //       CHECK:      vector.transfer_read {{.*}} :  memref<2xi32>, vector<1xi32> 
844+ //       CHECK-NEXT:      vector.transfer_read {{.*}} :  memref<2xi32>, vector<1xi32> 
845+ //       CHECK-NEXT:      vector.outerproduct {{.*}} : vector<1xi32>, i32 
846+ //       CHECK-NEXT:      vector.transfer_write {{.*}} : vector<1xi32>, memref<2xi32> 
847+ //       CHECK-NEXT:    } 
848+ 
849+ func.func  @no_hoisting_write_to_memref (%rhs:  i32 , %arg1:  vector <1 xi32 >) {
850+   %c0_i32  = arith.constant  0  : i32 
851+   %c0  = arith.constant  0  : index 
852+   %c1  = arith.constant  1  : index 
853+   %c4  = arith.constant  4  : index 
854+   %c20  = arith.constant  20  : index 
855+   %alloca  = memref.alloca () {alignment  = 64  : i64 } : memref <1 x1 x2 xi32 >
856+   %cast  = memref.cast  %alloca  : memref <1 x1 x2 xi32 > to  memref <1 x1 x2 xi32 >
857+   %collapsed_1  = memref.collapse_shape  %alloca  [[0 , 1 , 2 ]] : memref <1 x1 x2 xi32 > into  memref <2 xi32 >
858+   scf.for  %_  = %c0  to  %c20  step  %c4  {
859+     %collapsed_2  = memref.collapse_shape  %alloca  [[0 , 1 , 2 ]] : memref <1 x1 x2 xi32 > into  memref <2 xi32 >
860+     %lhs  = vector.transfer_read  %collapsed_1 [%c0 ], %c0_i32  {in_bounds  = [true ]} : memref <2 xi32 >, vector <1 xi32 >
861+     %acc  = vector.transfer_read  %collapsed_2 [%c0 ], %c0_i32  {in_bounds  = [true ]} : memref <2 xi32 >, vector <1 xi32 >
862+     %op  = vector.outerproduct  %lhs , %rhs , %acc  {kind  = #vector.kind <add >} : vector <1 xi32 >, i32 
863+     vector.transfer_write  %op , %collapsed_1 [%c0 ] {in_bounds  = [true ]} : vector <1 xi32 >, memref <2 xi32 >
864+   }
865+   return 
866+ }
867+ 
868+ transform.sequence  failures (propagate ) {
869+ ^bb1 (%arg1:  !transform.any_op ):
870+   %0  = transform.structured.match  ops {[" func.func"  ]} in  %arg1 
871+     : (!transform.any_op ) -> !transform.any_op 
872+   transform.structured.hoist_redundant_vector_transfers  %0 
873+     : (!transform.any_op ) -> !transform.any_op 
874+ }
0 commit comments