@@ -266,3 +266,50 @@ func.func @tile_linalg_matmul(
266266 -> tensor <128 x128 xf32 >
267267 return %0 : tensor <128 x128 xf32 >
268268}
269+
270+ // -----
271+
272+ #map = affine_map <(d0 ) -> (d0 )>
273+
274+ // CHECK-LABEL: splited_dynamic_linalg_generic
275+ func.func @splited_dynamic_linalg_generic (%arg0: tensor <?xi16 >, %arg1: tensor <?xi16 >) -> tensor <?xi16 > {
276+ %c80 = arith.constant 80 : index
277+ %c0 = arith.constant 0 : index
278+ %dim = tensor.dim %arg1 , %c0 : tensor <?xi16 >
279+ %0 = tensor.empty (%dim ) : tensor <?xi16 >
280+ %1 = arith.divui %dim , %c80 : index
281+ %2 = arith.muli %1 , %c80 : index
282+ %3 = arith.remui %dim , %c80 : index
283+ %extracted_slice = tensor.extract_slice %arg0 [0 ] [%2 ] [1 ] : tensor <?xi16 > to tensor <?xi16 >
284+ %extracted_slice_0 = tensor.extract_slice %arg1 [0 ] [%2 ] [1 ] : tensor <?xi16 > to tensor <?xi16 >
285+ %extracted_slice_1 = tensor.extract_slice %0 [0 ] [%2 ] [1 ] : tensor <?xi16 > to tensor <?xi16 >
286+ // CHECK: scf.for
287+ // CHECK-NOT: affine.min
288+ %4 = linalg.generic {index ing_maps = [#map , #map , #map ], iterator_types = [" parallel" ]} ins (%extracted_slice , %extracted_slice_0 : tensor <?xi16 >, tensor <?xi16 >) outs (%extracted_slice_1 : tensor <?xi16 >) {
289+ ^bb0 (%in_1: i16 , %in_2: i16 , %out: i16 ):
290+ %6 = arith.addi %in_1 , %in_2 : i16
291+ linalg.yield %6 : i16
292+ } -> tensor <?xi16 >
293+ %inserted_slice = tensor.insert_slice %4 into %0 [%2 ] [%2 ] [1 ] : tensor <?xi16 > into tensor <?xi16 >
294+ %extracted_slice_2 = tensor.extract_slice %arg0 [%2 ] [%3 ] [1 ] : tensor <?xi16 > to tensor <?xi16 >
295+ %extracted_slice_3 = tensor.extract_slice %arg1 [%2 ] [%3 ] [1 ] : tensor <?xi16 > to tensor <?xi16 >
296+ %extracted_slice_4 = tensor.extract_slice %0 [%2 ] [%3 ] [1 ] : tensor <?xi16 > to tensor <?xi16 >
297+ // CHECK-NOT: scf.for
298+ %5 = linalg.generic {index ing_maps = [#map , #map , #map ], iterator_types = [" parallel" ]} ins (%extracted_slice_2 , %extracted_slice_3 : tensor <?xi16 >, tensor <?xi16 >) outs (%extracted_slice_4 : tensor <?xi16 >) {
299+ ^bb0 (%in_1: i16 , %in_2: i16 , %out: i16 ):
300+ %7 = arith.addi %in_1 , %in_2 : i16
301+ linalg.yield %7 : i16
302+ } -> tensor <?xi16 >
303+ %inserted_slice_0 = tensor.insert_slice %5 into %inserted_slice [%2 ] [%3 ] [1 ] : tensor <?xi16 > into tensor <?xi16 >
304+ return %inserted_slice_0 : tensor <?xi16 >
305+ }
306+
307+
308+ module attributes {transform.with_named_sequence } {
309+ transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
310+ %0 = transform.structured.match ops {[" linalg.generic" ]} in %arg1 : (!transform.any_op ) -> !transform.any_op
311+ %const = transform.structured.match ops {[" arith.constant" ]} in %arg1 : (!transform.any_op ) -> !transform.any_op
312+ %1 , %loop = transform.structured.tile_using_for %0 tile_sizes [%const ] : (!transform.any_op , !transform.any_op ) -> (!transform.any_op , !transform.any_op )
313+ transform.yield
314+ }
315+ }
0 commit comments