11; REQUIRES: asserts
2- ; RUN: opt -mattr=+neon,+dotprod - passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 -disable-output %s 2>&1 | FileCheck %s
2+ ; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output %s 2>&1 | FileCheck %s
33
44target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
55target triple = "aarch64-none-unknown-elf"
66
77; Tests for printing VPlans that are enabled under AArch64
88
9- define i32 @print_partial_reduction (ptr %a , ptr %b ) {
9+ define i32 @print_partial_reduction (ptr %a , ptr %b ) "target-features" = "+neon,+dotprod" {
1010; CHECK: VPlan 'Initial VPlan for VF={8,16},UF>=1' {
1111; CHECK-NEXT: Live-in vp<[[VF:%.]]> = VF
1212; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF
@@ -69,60 +69,37 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
6969; CHECK-NEXT: No successors
7070; CHECK-NEXT: }
7171; CHECK: VPlan 'Final VPlan for VF={8,16},UF={1}' {
72+ ; CHECK-NEXT: Live-in ir<1024> = vector-trip-count
7273; CHECK-NEXT: Live-in ir<1024> = original trip-count
7374; CHECK-EMPTY:
7475; CHECK-NEXT: ir-bb<entry>:
75- ; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb< vector.main.loop.iter.check>
76+ ; CHECK-NEXT: Successor(s): vector.ph
7677; CHECK-EMPTY:
77- ; CHECK-NEXT: ir-bb<vector.main.loop.iter.check>:
78- ; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
79- ; CHECK-EMPTY:
80- ; CHECK-NEXT: ir-bb<vector.ph>:
81- ; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4>
78+ ; CHECK-NEXT: vector.ph:
79+ ; CHECK-NEXT: EMIT vp<%1> = reduction-start-vector ir<0>, ir<0>, ir<4>
8280; CHECK-NEXT: Successor(s): vector.body
8381; CHECK-EMPTY:
8482; CHECK-NEXT: vector.body:
85- ; CHECK-NEXT: EMIT-SCALAR vp<[[EP_IV:%.+]] > = phi [ ir<0>, ir-bb< vector.ph> ], [ vp<%index.next>, vector.body ]
86- ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<[[RDX_START]] >, ir<%add> (VF scaled by 1/4)
87- ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[EP_IV]] >
83+ ; CHECK-NEXT: EMIT-SCALAR vp<%index > = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
84+ ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<%1 >, ir<%add> (VF scaled by 1/4)
85+ ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index >
8886; CHECK-NEXT: WIDEN ir<%load.a> = load ir<%gep.a>
89- ; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<[[EP_IV]] >
87+ ; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index >
9088; CHECK-NEXT: WIDEN ir<%load.b> = load ir<%gep.b>
9189; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
9290; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32
9391; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
9492; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul>
95- ; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]] > = add nuw vp<[[EP_IV]] >, ir<16>
96- ; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]] >, ir<1024>
93+ ; CHECK-NEXT: EMIT vp<%index.next > = add nuw vp<%index >, ir<16>
94+ ; CHECK-NEXT: EMIT branch-on-count vp<%index.next >, ir<1024>
9795; CHECK-NEXT: Successor(s): middle.block, vector.body
9896; CHECK-EMPTY:
9997; CHECK-NEXT: middle.block:
100- ; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add>
101- ; CHECK-NEXT: EMIT branch-on-cond ir<true>
102- ; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
98+ ; CHECK-NEXT: EMIT vp<%3> = compute-reduction-result ir<%accum>, ir<%add>
99+ ; CHECK-NEXT: Successor(s): ir-bb<exit>
103100; CHECK-EMPTY:
104101; CHECK-NEXT: ir-bb<exit>:
105- ; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[RED_RESULT]]> from middle.block)
106- ; CHECK-NEXT: No successors
107- ; CHECK-EMPTY:
108- ; CHECK-NEXT: ir-bb<scalar.ph>:
109- ; CHECK-NEXT: EMIT-SCALAR vp<[[EP_RESUME:%.+]]> = phi [ ir<1024>, middle.block ], [ ir<0>, ir-bb<entry> ]
110- ; CHECK-NEXT: EMIT-SCALAR vp<[[EP_MERGE:%.+]]> = phi [ vp<[[RED_RESULT]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
111- ; CHECK-NEXT: EMIT-SCALAR vp<%6> = resume-for-epilogue vp<%vec.epilog.resume.val>
112- ; CHECK-NEXT: Successor(s): ir-bb<for.body>
113- ; CHECK-EMPTY:
114- ; CHECK-NEXT: ir-bb<for.body>:
115- ; CHECK-NEXT: IR %accum = phi i32 [ 0, %scalar.ph ], [ %add, %for.body ] (extra operand: vp<[[EP_MERGE]]> from ir-bb<scalar.ph>)
116- ; CHECK-NEXT: IR %gep.a = getelementptr i8, ptr %a, i64 %iv
117- ; CHECK-NEXT: IR %load.a = load i8, ptr %gep.a, align 1
118- ; CHECK-NEXT: IR %ext.a = zext i8 %load.a to i32
119- ; CHECK-NEXT: IR %gep.b = getelementptr i8, ptr %b, i64 %iv
120- ; CHECK-NEXT: IR %load.b = load i8, ptr %gep.b, align 1
121- ; CHECK-NEXT: IR %ext.b = zext i8 %load.b to i32
122- ; CHECK-NEXT: IR %mul = mul i32 %ext.b, %ext.a
123- ; CHECK-NEXT: IR %add = add i32 %mul, %accum
124- ; CHECK-NEXT: IR %iv.next = add i64 %iv, 1
125- ; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
102+ ; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<%3> from middle.block)
126103; CHECK-NEXT: No successors
127104; CHECK-NEXT: }
128105entry:
@@ -141,8 +118,12 @@ for.body: ; preds = %for.body, %entry
141118 %add = add i32 %mul , %accum
142119 %iv.next = add i64 %iv , 1
143120 %exitcond.not = icmp eq i64 %iv.next , 1024
144- br i1 %exitcond.not , label %exit , label %for.body
121+ br i1 %exitcond.not , label %exit , label %for.body , !llvm.loop !0
145122
146123exit:
147124 ret i32 %add
148125}
126+
127+ !0 = distinct !{!0 , !2 , !3 }
128+ !2 = !{!"llvm.loop.interleave.count" , i32 1 }
129+ !3 = !{!"llvm.loop.vectorize.predicate.enable" , i1 false }
0 commit comments