88// CHECK-SAME: %[[MEMREF4:[a-zA-Z0-9]*]]: memref<?x?xf32>,
99// CHECK-SAME: %[[MEMREF5:[a-zA-Z0-9]*]]: memref<?x?xf32>,
1010// CHECK-SAME: %[[VAL:[a-zA-Z0-9]*]]: index,
11+ // CHECK-SAME: %[[LB:[a-zA-Z0-9]*]]: index,
12+ // CHECK-SAME: %[[UB:[a-zA-Z0-9]*]]: index,
1113// CHECK-SAME: %[[STEP:[a-zA-Z0-9]*]]: index,
1214// CHECK-SAME: %[[CMP:[a-zA-Z0-9]*]]: i1
1315func.func @hoist_vector_transfer_pairs (
1416 %memref0: memref <?x?xf32 >, %memref1: memref <?x?xf32 >, %memref2: memref <?x?xf32 >,
1517 %memref3: memref <?x?xf32 >, %memref4: memref <?x?xf32 >, %memref5: memref <?x?xf32 >,
16- %val: index , %step: index , %cmp: i1 ) {
17- %lb = arith.constant 0 : index
18- %ub = arith.constant 16 : index
18+ %val: index , %lb : index , %ub : index , %step: index , %cmp: i1 ) {
1919 %c0 = arith.constant 0 : index
2020 %cst = arith.constant 0.0 : f32
2121
2222// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<1xf32>
23- // CHECK: scf.for %[[I:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>) {
23+ // CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>) {
2424// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<2xf32>
25- // CHECK: scf.for %[[J:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>, vector<2xf32>) {
25+ // CHECK: scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) -> (vector<1xf32>, vector<2xf32>) {
2626// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<3xf32>
2727// CHECK: vector.transfer_read %{{.*}} : memref<?x?xf32>, vector<4xf32>
2828// CHECK: "some_crippling_use"(%[[MEMREF4]]) : (memref<?x?xf32>) -> ()
@@ -92,15 +92,15 @@ module attributes {transform.with_named_sequence} {
9292// CHECK-SAME: %[[MEMREF2:[a-zA-Z0-9]*]]: memref<?x?xf32>,
9393// CHECK-SAME: %[[MEMREF3:[a-zA-Z0-9]*]]: memref<?x?xf32>,
9494// CHECK-SAME: %[[VAL:[a-zA-Z0-9]*]]: index,
95+ // CHECK-SAME: %[[LB:[a-zA-Z0-9]*]]: index,
96+ // CHECK-SAME: %[[UB:[a-zA-Z0-9]*]]: index,
9597// CHECK-SAME: %[[STEP:[a-zA-Z0-9]*]]: index,
9698// CHECK-SAME: %[[RANDOM:[a-zA-Z0-9]*]]: index,
9799// CHECK-SAME: %[[CMP:[a-zA-Z0-9]*]]: i1
98100func.func @hoist_vector_transfer_pairs_disjoint (
99101 %memref0: memref <?x?xf32 >, %memref1: memref <?x?xf32 >,
100- %memref2: memref <?x?xf32 >, %memref3: memref <?x?xf32 >, %val: index ,
102+ %memref2: memref <?x?xf32 >, %memref3: memref <?x?xf32 >, %val: index , %lb : index , %ub : index ,
101103 %step: index , %random_index : index , %cmp: i1 ) {
102- %lb = arith.constant 0 : index
103- %ub = arith.constant 16 : index
104104 %c0 = arith.constant 0 : index
105105 %c1 = arith.constant 1 : index
106106 %c3 = arith.constant 3 : index
@@ -110,9 +110,9 @@ func.func @hoist_vector_transfer_pairs_disjoint(
110110// CHECK: vector.transfer_read %[[MEMREF2]]{{.*}} : memref<?x?xf32>, vector<3xf32>
111111// CHECK: vector.transfer_read %[[MEMREF3]]{{.*}} : memref<?x?xf32>, vector<4xf32>
112112// CHECK: vector.transfer_read %[[MEMREF3]]{{.*}} : memref<?x?xf32>, vector<4xf32>
113- // CHECK: scf.for %[[I:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) ->
113+ // CHECK: scf.for %[[I:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) ->
114114// CHECK-SAME: (vector<3xf32>, vector<3xf32>, vector<4xf32>, vector<4xf32>) {
115- // CHECK: scf.for %[[J:.*]] = {{.*}} to {{.*}} step %[[STEP]] iter_args({{.*}}) ->
115+ // CHECK: scf.for %[[J:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args({{.*}}) ->
116116// CHECK-SAME: (vector<3xf32>, vector<3xf32>, vector<4xf32>, vector<4xf32>) {
117117// CHECK: vector.transfer_read %[[MEMREF1]]{{.*}} : memref<?x?xf32>, vector<2xf32>
118118// CHECK: vector.transfer_read %[[MEMREF1]]{{.*}} : memref<?x?xf32>, vector<2xf32>
@@ -309,18 +309,18 @@ module attributes {transform.with_named_sequence} {
309309// -----
310310
311311// CHECK-LABEL: func.func @no_hoisting_zero_trip_loop
312- func.func @no_hoisting_zero_trip_loop (%arg0: memref <20 xi32 >, %arg1: memref < 20 x i32 >, % lb: index , %ub: index ) {
312+ func.func @no_hoisting_zero_trip_loop (%arg0: memref <20 xi32 >, %lb: index , %ub: index ) {
313313 %c0_i32 = arith.constant 0 : i32
314314 %c0 = arith.constant 0 : index
315315 %c1 = arith.constant 1 : index
316316 // %lb and %ub are unbounded, so do not hoist.
317317
318318 // CHECK: scf.for {{.*}} {
319319 // CHECK-NEXT: vector.transfer_read
320- // CHECK-NEXT: vector.transfer_write
320+ // CHECK-NEXT: "prevent.dce"
321321 scf.for %arg2 = %lb to %ub step %c1 {
322322 %read = vector.transfer_read %arg0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
323- vector.transfer_write %read , %arg1 [ %c0 ] { in_bounds = [ true ]} : vector <4 xi32 >, memref < 20 x i32 >
323+ " prevent.dce " ( %read ) : ( vector <4 xi32 >) ->()
324324 }
325325
326326 // %lb_0 is in range [%lb, 8], and %ub_0 is in range [4, %ub].
@@ -330,24 +330,23 @@ func.func @no_hoisting_zero_trip_loop(%arg0: memref<20xi32>, %arg1: memref<20xi3
330330
331331 // CHECK: scf.for {{.*}} {
332332 // CHECK-NEXT: vector.transfer_read
333- // CHECK-NEXT: vector.transfer_write
333+ // CHECK-NEXT: "prevent.dce"
334334 scf.for %arg2 = %lb_0 to %ub_0 step %c1 {
335335 %read = vector.transfer_read %arg0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
336- vector.transfer_write %read , %arg1 [ %c0 ] { in_bounds = [ true ]} : vector <4 xi32 >, memref < 20 x i32 >
336+ " prevent.dce " ( %read ) : ( vector <4 xi32 >) ->()
337337 }
338338
339339 // %lb_1 is in range [%lb, 4], and %ub_1 is in range [8, %ub].
340340 // Since %lb_1 is guaranteed to be less than %ub_1, hoisting is possible.
341341 %lb_1 = affine.min affine_map <(d0 ) -> (d0 , 4 )>(%lb )
342342 %ub_1 = affine.max affine_map <(d0 ) -> (d0 , 8 )>(%ub )
343343
344- // CHECK: vector.transfer_read
344+ // CHECK: vector.transfer_read
345345 // CHECK: scf.for {{.*}} {
346346 // CHECK-NEXT: "prevent.dce"
347347 scf.for %arg2 = %lb_1 to %ub_1 step %c1 {
348348 %read = vector.transfer_read %arg0 [%c0 ], %c0_i32 {in_bounds = [true ]} : memref <20 xi32 >, vector <4 xi32 >
349349 " prevent.dce" (%read ) : (vector <4 xi32 >) ->()
350- vector.transfer_write %read , %arg1 [%c0 ] {in_bounds = [true ]} : vector <4 xi32 >, memref <20 xi32 >
351350 }
352351 return
353352}
@@ -356,7 +355,7 @@ module attributes {transform.with_named_sequence} {
356355 transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
357356 %0 = transform.structured.match ops {[" func.func" ]} in %arg1
358357 : (!transform.any_op ) -> !transform.any_op
359- transform.structured.hoist_redundant_vector_transfers %0
358+ transform.structured.hoist_redundant_vector_transfers %0 { verify_non_zero_trip }
360359 : (!transform.any_op ) -> !transform.any_op
361360 transform.yield
362361 }
@@ -492,7 +491,7 @@ module attributes {transform.with_named_sequence} {
492491// CHECK: #[[$MAP4:.+]] = affine_map<()[s0] -> (s0 + 4)>
493492
494493// CHECK-LABEL: func.func @hoist_vector_transfer_pairs_disjoint_dynamic
495- // CHECK-SAME: (%[[BUFFER:.+]]: memref<?x?xf32>, %{{.+}}: index, %[[I0:.+]]: index)
494+ // CHECK-SAME: (%[[BUFFER:.+]]: memref<?x?xf32>, %{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[I0:.+]]: index)
496495
497496// CHECK: %[[PLUS1:.+]] = affine.apply #[[$MAP1]]()[%[[I0]]]
498497// CHECK: %[[PLUS4:.+]] = affine.apply #[[$MAP4]]()[%[[I0]]]
@@ -507,9 +506,7 @@ module attributes {transform.with_named_sequence} {
507506// CHECK: vector.transfer_write %{{.+}}, %[[BUFFER]][%[[I0]], %[[I0]]]
508507
509508func.func @hoist_vector_transfer_pairs_disjoint_dynamic (
510- %buffer: memref <?x?xf32 >, %step: index , %i0 : index ) {
511- %lb = arith.constant 0 : index
512- %ub = arith.constant 16 : index
509+ %buffer: memref <?x?xf32 >, %lb : index , %ub : index , %step: index , %i0 : index ) {
513510 %cst = arith.constant 0.0 : f32
514511 %i1 = affine.apply affine_map <(d0 ) -> (d0 + 1 )>(%i0 )
515512 %i2 = affine.apply affine_map <(d0 ) -> (d0 + 4 )>(%i0 )
@@ -552,9 +549,7 @@ module attributes {transform.with_named_sequence} {
552549// CHECK-COUNT-2: vector.transfer_write
553550
554551func.func @hoist_vector_transfer_pairs_overlapping_dynamic (
555- %buffer: memref <?x?xf32 >, %step: index , %i0 : index ) {
556- %lb = arith.constant 0 : index
557- %ub = arith.constant 16 : index
552+ %buffer: memref <?x?xf32 >, %lb : index , %ub : index , %step: index , %i0 : index ) {
558553 %cst = arith.constant 0.0 : f32
559554 %i1 = affine.apply affine_map <(d0 ) -> (d0 + 3 )>(%i0 )
560555
@@ -594,9 +589,7 @@ module attributes {transform.with_named_sequence} {
594589// CHECK: return
595590
596591func.func @hoist_vector_transfer_pairs_disjoint_dynamic (
597- %buffer: memref <?x?xf32 >, %step: index , %i0 : index , %i1 : index ) {
598- %lb = arith.constant 0 : index
599- %ub = arith.constant 16 : index
592+ %buffer: memref <?x?xf32 >, %lb : index , %ub : index , %step: index , %i0 : index , %i1 : index ) {
600593 %cst = arith.constant 0.0 : f32
601594 %i2 = affine.apply affine_map <(d0 ) -> ((d0 floordiv 32 ) * 16 )>(%i1 )
602595 %i3 = affine.apply affine_map <(d0 ) -> ((d0 floordiv 32 ) * 16 + 8 )>(%i1 )
@@ -633,7 +626,7 @@ module attributes {transform.with_named_sequence} {
633626// Test hoisting of vector.extract/vector.broadcast pairs
634627
635628// CHECK-LABEL: func.func @hoist_vector_broadcasts
636- // CHECK-SAME: (%{{.+}}: index, %[[VEC:.+]]: vector<3x4xf32>) -> vector<3x4xf32> {
629+ // CHECK-SAME: (%{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[VEC:.+]]: vector<3x4xf32>) -> vector<3x4xf32> {
637630// CHECK: %[[EXTRACT:.+]] = vector.extract %[[VEC]][0] : vector<4xf32> from vector<3x4xf32>
638631// CHECK-NEXT: %[[LOOP:.+]] = scf.for {{.*}} {
639632// CHECK-NEXT: %[[USE:.+]] = "some_use"({{.*}}) : (vector<4xf32>) -> vector<4xf32>
@@ -642,9 +635,7 @@ module attributes {transform.with_named_sequence} {
642635// CHECK-NEXT: %[[BCAST:.+]] = vector.broadcast %[[LOOP]] : vector<4xf32> to vector<3x4xf32>
643636// CHECK-NEXT: return %[[BCAST]] : vector<3x4xf32>
644637
645- func.func @hoist_vector_broadcasts (%step : index , %vec : vector <3 x4 xf32 >) -> vector <3 x4 xf32 > {
646- %lb = arith.constant 0 : index
647- %ub = arith.constant 16 : index
638+ func.func @hoist_vector_broadcasts (%lb : index , %ub : index , %step : index , %vec : vector <3 x4 xf32 >) -> vector <3 x4 xf32 > {
648639 %bcast_vec = scf.for %arg0 = %lb to %ub step %step iter_args (%iarg = %vec ) -> vector <3 x4 xf32 > {
649640 %extract = vector.extract %iarg [0 ] : vector <4 xf32 > from vector <3 x4 xf32 >
650641 %use = " some_use" (%extract ) : (vector <4 xf32 >) -> vector <4 xf32 >
@@ -669,7 +660,7 @@ module attributes {transform.with_named_sequence} {
669660// Test hoisting of vector.extract/vector.broadcast pairs with dynamic position
670661
671662// CHECK-LABEL: func.func @hoist_vector_broadcasts
672- // CHECK-SAME: (%{{.+}}: index, %[[VEC:.+]]: vector<3x4xf32>, %[[POS:.+]]: index) -> vector<3x4xf32> {
663+ // CHECK-SAME: (%{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[VEC:.+]]: vector<3x4xf32>, %[[POS:.+]]: index) -> vector<3x4xf32> {
673664// CHECK: %[[EXTRACT:.+]] = vector.extract %[[VEC]][%[[POS]]] : vector<4xf32> from vector<3x4xf32>
674665// CHECK-NEXT: %[[LOOP:.+]] = scf.for {{.*}} {
675666// CHECK-NEXT: %[[USE:.+]] = "some_use"({{.*}}) : (vector<4xf32>) -> vector<4xf32>
@@ -678,9 +669,7 @@ module attributes {transform.with_named_sequence} {
678669// CHECK-NEXT: %[[BCAST:.+]] = vector.broadcast %[[LOOP]] : vector<4xf32> to vector<3x4xf32>
679670// CHECK-NEXT: return %[[BCAST]] : vector<3x4xf32>
680671
681- func.func @hoist_vector_broadcasts_dynamic (%step : index , %vec : vector <3 x4 xf32 >, %pos: index ) -> vector <3 x4 xf32 > {
682- %lb = arith.constant 0 : index
683- %ub = arith.constant 16 : index
672+ func.func @hoist_vector_broadcasts_dynamic (%lb : index , %ub : index , %step : index , %vec : vector <3 x4 xf32 >, %pos: index ) -> vector <3 x4 xf32 > {
684673 %bcast_vec = scf.for %arg0 = %lb to %ub step %step iter_args (%iarg = %vec ) -> vector <3 x4 xf32 > {
685674 %extract = vector.extract %iarg [%pos ] : vector <4 xf32 > from vector <3 x4 xf32 >
686675 %use = " some_use" (%extract ) : (vector <4 xf32 >) -> vector <4 xf32 >
@@ -705,7 +694,7 @@ module attributes {transform.with_named_sequence} {
705694// Test hoisting of vector.extract/vector.broadcast pairs with multiple iter_args
706695
707696// CHECK-LABEL: func.func @hoist_vector_broadcasts_multiple
708- // CHECK-SAME: (%{{.+}}: index, %[[VEC1:.+]]: vector<3x4xf32>,
697+ // CHECK-SAME: (%{{.+}}: index, %{{.+}}: index, %{{.+}}: index, % [[VEC1:.+]]: vector<3x4xf32>,
709698// CHECK-SAME: %[[VEC2:.+]]: vector<3x5xf32>) -> (vector<3x4xf32>, vector<3x5xf32>) {
710699// CHECK-DAG: %[[EXTRACT1:.+]] = vector.extract %[[VEC1]][0] : vector<4xf32> from vector<3x4xf32>
711700// CHECK-DAG: %[[EXTRACT2:.+]] = vector.extract %[[VEC2]][1] : vector<5xf32> from vector<3x5xf32>
@@ -718,9 +707,7 @@ module attributes {transform.with_named_sequence} {
718707// CHECK-DAG: %[[BCAST2:.+]] = vector.broadcast %[[LOOP]]#1 : vector<5xf32> to vector<3x5xf32>
719708// CHECK-NEXT: return %[[BCAST1]], %[[BCAST2]] : vector<3x4xf32>, vector<3x5xf32>
720709
721- func.func @hoist_vector_broadcasts_multiple (%step : index , %vec1 : vector <3 x4 xf32 >, %vec2 : vector <3 x5 xf32 >) -> (vector <3 x4 xf32 >, vector <3 x5 xf32 >) {
722- %lb = arith.constant 0 : index
723- %ub = arith.constant 16 : index
710+ func.func @hoist_vector_broadcasts_multiple (%lb : index , %ub : index , %step : index , %vec1 : vector <3 x4 xf32 >, %vec2 : vector <3 x5 xf32 >) -> (vector <3 x4 xf32 >, vector <3 x5 xf32 >) {
724711 %bcast_vec:2 = scf.for %arg0 = %lb to %ub step %step iter_args (%iarg = %vec1 , %iarg2 = %vec2 ) -> (vector <3 x4 xf32 >, vector <3 x5 xf32 >) {
725712 %extract1 = vector.extract %iarg [0 ] : vector <4 xf32 > from vector <3 x4 xf32 >
726713 %extract2 = vector.extract %iarg2 [1 ] : vector <5 xf32 > from vector <3 x5 xf32 >
0 commit comments