@@ -658,33 +658,20 @@ module attributes {transform.with_named_sequence} {
658658 }
659659}
660660
661- // CHECK: func.func private @tile_one_consumer_using_tile_and_fuse(%[[VAL_0:.*]]: tensor<16x128x48x96xf32>, %[[VAL_1:.*]]: tensor<16x96x48x128xf32>) -> tensor<16x96x48x128xf32> {
662- // CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
663- // CHECK: %[[VAL_3:.*]] = arith.constant 16 : index
664- // CHECK: %[[VAL_4:.*]] = arith.constant 128 : index
665- // CHECK: %[[VAL_5:.*]] = arith.constant 48 : index
666- // CHECK: %[[VAL_6:.*]] = arith.constant 96 : index
667- // CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
668- // CHECK: %[[VAL_8:.*]] = scf.for %[[VAL_9:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_7]] iter_args(%[[VAL_10:.*]] = %[[VAL_1]]) -> (tensor<16x96x48x128xf32>) {
669- // CHECK: %[[VAL_11:.*]] = scf.for %[[VAL_12:.*]] = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_3]] iter_args(%[[VAL_13:.*]] = %[[VAL_10]]) -> (tensor<16x96x48x128xf32>) {
670- // CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (tensor<16x96x48x128xf32>) {
671- // CHECK: %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_6]] step %[[VAL_3]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (tensor<16x96x48x128xf32>) {
672- // CHECK: %[[VAL_20:.*]] = tensor.extract_slice %[[VAL_0]]{{\[}}%[[VAL_9]], %[[VAL_12]], %[[VAL_15]], %[[VAL_18]]] [1, 16, 16, 16] [1, 1, 1, 1] : tensor<16x128x48x96xf32> to tensor<1x16x16x16xf32>
673- // CHECK: %[[VAL_21:.*]] = tensor.extract_slice %[[VAL_19]]{{\[}}%[[VAL_9]], %[[VAL_18]], %[[VAL_15]], %[[VAL_12]]] [1, 16, 16, 16] [1, 1, 1, 1] : tensor<16x96x48x128xf32> to tensor<1x16x16x16xf32>
674- // CHECK: %[[VAL_22:.*]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[VAL_20]] : tensor<1x16x16x16xf32>) outs(%[[VAL_21]] : tensor<1x16x16x16xf32>) {
675- // CHECK: ^bb0(%[[VAL_23:.*]]: f32, %[[VAL_24:.*]]: f32):
676- // CHECK: linalg.yield %[[VAL_23]] : f32
677- // CHECK: } -> tensor<1x16x16x16xf32>
678- // CHECK: %[[VAL_25:.*]] = tensor.insert_slice %[[VAL_26:.*]] into %[[VAL_19]]{{\[}}%[[VAL_9]], %[[VAL_18]], %[[VAL_15]], %[[VAL_12]]] [1, 16, 16, 16] [1, 1, 1, 1] : tensor<1x16x16x16xf32> into tensor<16x96x48x128xf32>
679- // CHECK: scf.yield %[[VAL_25]] : tensor<16x96x48x128xf32>
680- // CHECK: }
681- // CHECK: scf.yield %[[VAL_27:.*]] : tensor<16x96x48x128xf32>
682- // CHECK: }
683- // CHECK: scf.yield %[[VAL_28:.*]] : tensor<16x96x48x128xf32>
684- // CHECK: }
685- // CHECK: scf.yield %[[VAL_29:.*]] : tensor<16x96x48x128xf32>
686- // CHECK: }
687- // CHECK: return %[[VAL_30:.*]] : tensor<16x96x48x128xf32>
688- // CHECK: }
689- // CHECK: }
690-
661+ // CHECK-LABEL: func private @tile_one_consumer_using_tile_and_fuse
662+ // CHECK-SAME: %[[ARG0:.*]]: tensor<16x128x48x96xf32>
663+ // CHECK-SAME: %[[ARG1:.*]]: tensor<16x96x48x128xf32>
664+ // CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =
665+ // CHECK-SAME: iter_args(%[[ITERARG0:.+]] = %[[ARG1]])
666+ // CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =
667+ // CHECK-SAME: iter_args(%[[ITERARG1:.+]] = %[[ITERARG0]])
668+ // CHECK: scf.for %[[IV2:[a-zA-Z0-9]+]] =
669+ // CHECK-SAME: iter_args(%[[ITERARG2:.+]] = %[[ITERARG1]])
670+ // CHECK: scf.for %[[IV3:[a-zA-Z0-9]+]] =
671+ // CHECK-SAME: iter_args(%[[ITERARG3:.+]] = %[[ITERARG2]])
672+ // CHECK: %[[TILEDARG0:.*]] = tensor.extract_slice %[[ARG0]]{{\[}}%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
673+ // CHECK: %[[TILEDARG1:.*]] = tensor.extract_slice %[[ITERARG3]]{{\[}}%[[IV0]], %[[IV3]], %[[IV2]], %[[IV1]]]
674+ // CHECK: %[[RES:.*]] = linalg.generic
675+ // CHECK-SAME: ins(%[[TILEDARG0]]
676+ // CHECK-SAME: outs(%[[TILEDARG1]]
677+ // CHECK: tensor.insert_slice %[[RES:.*]]
0 commit comments