@@ -946,3 +946,38 @@ func.func @index_switch(%pred: index, %b: tensor<5xf32>, %c: tensor<5xf32>) -> t
946946 // CHECK: return %[[r]]
947947 return %0 : tensor <5 xf32 >
948948}
949+
950+ // -----
951+
952+ // See Issue https://github.com/llvm/llvm-project/issues/133964 . Checks that
953+ // tensor.parallel_insert_slice dest operand does not have read semantics.
954+ func.func @check_scfforall_inplace_bufferizer (%arg0 : tensor <?x?xf32 >,
955+ %arg1 : tensor <?x?xf32 >,
956+ %arg2 : tensor <?xf32 > {bufferization.writable = true }) -> tensor <?xf32 > {
957+ %c0 = arith.constant 0 : index
958+ %c1 = arith.constant 1 : index
959+ %d0 = tensor.dim %arg2 , %c0 : tensor <?xf32 >
960+ %d1 = tensor.dim %arg1 , %c1 : tensor <?x?xf32 >
961+ %0 = scf.forall (%arg3 ) in (%c1 ) shared_outs (%arg4 = %arg2 ) -> (tensor <?xf32 >) {
962+ %1 = tensor.extract_slice %arg0 [0 , 0 ][%d0 , %d1 ][1 , 1 ] : tensor <?x?xf32 > to tensor <?x?xf32 >
963+ %2 = tensor.extract_slice %arg1 [0 , 0 ][%d0 , %d1 ][1 , 1 ] : tensor <?x?xf32 > to tensor <?x?xf32 >
964+ %3 = linalg.generic {
965+ indexing_maps = [affine_map <(d0 , d1 ) -> (d0 , d1 )>,
966+ affine_map <(d0 , d1 ) -> (d0 , d1 )>,
967+ affine_map <(d0 , d1 ) -> (d0 )>],
968+ iterator_types = [" parallel" , " reduction" ]}
969+ ins (%1 , %2 : tensor <?x?xf32 >, tensor <?x?xf32 >)
970+ outs (%arg4 : tensor <?xf32 >) {
971+ ^bb0 (%b0 : f32 , %b1: f32 , %b2 : f32 ):
972+ %4 = arith.mulf %b0 , %b1 : f32
973+ %5 = arith.addf %4 , %b2 : f32
974+ linalg.yield %5 : f32
975+ } -> tensor <?xf32 >
976+ scf.forall.in_parallel {
977+ tensor.parallel_insert_slice %3 into %arg4 [0 ] [%d0 ] [1 ] : tensor <?xf32 > into tensor <?xf32 >
978+ }
979+ }
980+ return %0 : tensor <?xf32 >
981+ }
982+ // CHECK-LABEL: func @check_scfforall_inplace_bufferizer
983+ // CHECK-NOT: memref.alloc
0 commit comments