@@ -177,9 +177,14 @@ func.func @non_monotonic_affine_expr(%arg0 : tensor<7xf32>) -> tensor<7xf32> {
177177 %0 = tensor.dim %arg0, %c0 : tensor<7xf32>
178178 %empty = tensor.empty() : tensor<7xf32>
179179
180- // CHECK: %[[OUT:.*]] = tensor.empty() : tensor<7xf32>
181- // CHECK: scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[OUT]]) -> (tensor<7xf32>) {
182- // CHECK: tensor.extract_slice %[[TC0]][0] [7] [1] : tensor<7xf32> to tensor<7xf32>
180+ // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
181+ // CHECK-DAG: %[[OUT:.*]] = tensor.empty() : tensor<7xf32>
182+ // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
183+ // CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index
184+ // CHECK-DAG: %[[C7_1:.*]] = arith.constant 7 : index
185+ // CHECK: scf.for %[[IV0:.+]] = %[[C0]] to %[[C7]] step %[[C7_1]] iter_args(%[[TC0:.*]] = %[[OUT]]) -> (tensor<7xf32>) {
186+ // CHECK: tensor.extract_slice %[[ARG0]][0] [7] [1] : tensor<7xf32> to tensor<7xf32>
187+ // CHECK: tensor.extract_slice %[[TC0]][%[[IV0]]] [7] [1] : tensor<7xf32> to tensor<7xf32>
183188 %generic = linalg.generic
184189 {indexing_maps = [affine_map<(d0) -> (d0 mod 4)>,
185190 affine_map<(d0) -> (d0)>],
@@ -199,3 +204,44 @@ module attributes {transform.with_named_sequence} {
199204 transform.yield
200205 }
201206}
207+
208+ // -----
209+
210+ #identity = affine_map<(d0, d1) -> (d0, d1)>
211+ #identity1 = affine_map<(d0, d1) -> (d0 mod 3, d1)>
212+
213+ // CHECK-LABEL: func @tile_monotonic_outer_dim
214+ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<4x10xf32>
215+ func.func @tile_monotonic_outer_dim(%in: tensor<4x10xf32>) -> tensor<4x10xf32> {
216+ %empty = tensor.empty() : tensor<4x10xf32>
217+ %1 = linalg.generic {indexing_maps = [#identity, #identity1], iterator_types = ["parallel", "parallel"]}
218+ ins(%in : tensor<4x10xf32>) outs(%empty : tensor<4x10xf32>) {
219+ ^bb1(%a: f32, %b: f32):
220+ linalg.yield %a : f32
221+ } -> tensor<4x10xf32>
222+
223+ // CHECK: %[[C4:.+]] = arith.constant 4 : index
224+ // CHECK: %[[C4_1:.+]] = arith.constant 4 : index
225+ // CHECK: %[[C5:.+]] = arith.constant 5 : index
226+ // CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %[[C4]] step %[[C4_1]] iter_args(%[[ARG1:.+]] = %[[OUT:.+]]) -> (tensor<4x10xf32>) {
227+ // CHECK: scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[ARG2:.+]] = %[[ARG1]]) -> (tensor<4x10xf32>) {
228+ // CHECK: %[[INSLICE:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], %[[IV1]]] [4, 5] [1, 1] : tensor<4x10xf32> to tensor<4x5xf32>
229+ // CHECK: %[[OUTSLICE:.+]] = tensor.extract_slice %[[ARG2]][0, %[[IV1]]] [4, 5] [1, 1] : tensor<4x10xf32> to tensor<4x5xf32>
230+ // CHECK: %[[RES:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%[[INSLICE]] : tensor<4x5xf32>) outs(%[[OUTSLICE]] : tensor<4x5xf32>) {
231+ // CHECK: ^bb0(%in: f32, %out: f32):
232+ // CHECK: linalg.yield %in : f32
233+ // CHECK: } -> tensor<4x5xf32>
234+ // CHECK: %[[INSERT_SLICE:.+]] = tensor.insert_slice %[[RES]] into %[[ARG2]][0, %[[IV1]]] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<4x10xf32>
235+ // CHECK: scf.yield %[[INSERT_SLICE]] : tensor<4x10xf32>
236+ // CHECK: }
237+
238+ return %1 : tensor<4x10xf32>
239+ }
240+
241+ module attributes {transform.with_named_sequence} {
242+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
243+ %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
244+ %1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [4, 5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
245+ transform.yield
246+ }
247+ }
0 commit comments