Skip to content

Commit e3c2509

Browse files
committed
Rework to just remove the assert and not try to infer anything from SCEV. Add more tests that show how overflow can also occur at %tc = 0
1 parent 4ba9c49 commit e3c2509

File tree

2 files changed

+182
-37
lines changed

2 files changed

+182
-37
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,19 +2491,8 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
24912491
Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
24922492

24932493
Value *Step = CreateStep();
2494-
ScalarEvolution &SE = *PSE.getSE();
2495-
const SCEV *TC2OverflowSCEV = SE.applyLoopGuards(SE.getSCEV(LHS), OrigLoop);
2496-
const SCEV *StepSCEV = SE.getSCEV(Step);
2497-
2498-
// Check if (UMax - n) < (VF * UF).
2499-
if (SE.isKnownPredicate(ICmpInst::ICMP_ULT, TC2OverflowSCEV, StepSCEV)) {
2500-
CheckMinIters = Builder.getTrue();
2501-
} else if (!SE.isKnownPredicate(
2502-
CmpInst::getInversePredicate(CmpInst::ICMP_ULT),
2503-
TC2OverflowSCEV, StepSCEV)) {
2504-
// Don't execute the vector loop if (UMax - n) < (VF * UF).
2505-
CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
2506-
} // else n + (VF * UF) <= UMax, use CheckMinIters preset to false
2494+
// Don't execute the vector loop if (UMax - n) < (VF * UF).
2495+
CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
25072496
}
25082497

25092498
// Create new preheader for vector loop.

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll

Lines changed: 180 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,45 +4,124 @@
44
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
55
; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s
66

7-
; If we know the IV will never overflow then we can skip the IV overflow check
7+
; TODO: We know the IV will never overflow here so we can skip the overflow
8+
; check
89

9-
define void @f(ptr %p, i64 %tc) vscale_range(2, 1024) {
10-
; CHECK-LABEL: define void @f(
10+
define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) {
11+
; CHECK-LABEL: define void @trip_count_max_1024(
1112
; CHECK-SAME: ptr [[P:%.*]], i64 [[TC:%.*]]) #[[ATTR0:[0-9]+]] {
1213
; CHECK-NEXT: [[ENTRY:.*:]]
1314
; CHECK-NEXT: [[GUARD:%.*]] = icmp ugt i64 [[TC]], 1024
1415
; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
1516
; CHECK: [[LOOP_PREHEADER]]:
17+
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TC]], i64 1)
18+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[UMAX]]
19+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
20+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
21+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
22+
; CHECK-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
23+
; CHECK: [[VECTOR_PH]]:
24+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
25+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
26+
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
27+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP6]]
28+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
29+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
30+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
31+
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
32+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
33+
; CHECK: [[VECTOR_BODY]]:
34+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
35+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
36+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[UMAX]], [[EVL_BASED_IV]]
37+
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
38+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
39+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]]
40+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0
41+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
42+
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
43+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
44+
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
45+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
46+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
47+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
48+
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
49+
; CHECK: [[MIDDLE_BLOCK]]:
50+
; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
51+
; CHECK: [[SCALAR_PH]]:
52+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
53+
; CHECK-NEXT: br label %[[LOOP:.*]]
54+
; CHECK: [[LOOP]]:
55+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
56+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[I]]
57+
; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[GEP]], align 8
58+
; CHECK-NEXT: [[Y:%.*]] = add i64 [[X]], 1
59+
; CHECK-NEXT: store i64 [[Y]], ptr [[GEP]], align 8
60+
; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1
61+
; CHECK-NEXT: [[DONE:%.*]] = icmp uge i64 [[I_NEXT]], [[TC]]
62+
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
63+
; CHECK: [[EXIT_LOOPEXIT]]:
64+
; CHECK-NEXT: br label %[[EXIT]]
65+
; CHECK: [[EXIT]]:
66+
; CHECK-NEXT: ret void
67+
;
68+
entry:
69+
%guard = icmp ugt i64 %tc, 1024
70+
br i1 %guard, label %exit, label %loop
71+
loop:
72+
%i = phi i64 [%i.next, %loop], [0, %entry]
73+
%gep = getelementptr i64, ptr %p, i64 %i
74+
%x = load i64, ptr %gep
75+
%y = add i64 %x, 1
76+
store i64 %y, ptr %gep
77+
%i.next = add i64 %i, 1
78+
%done = icmp uge i64 %i.next, %tc
79+
br i1 %done, label %exit, label %loop
80+
exit:
81+
ret void
82+
}
83+
84+
; If %tc = 0 the IV will overflow, so we need to emit an overflow check
85+
; FIXME: The check still allows %tc =0
86+
87+
define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
88+
; CHECK-LABEL: define void @overflow_at_0(
89+
; CHECK-SAME: ptr [[P:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] {
90+
; CHECK-NEXT: [[ENTRY:.*:]]
91+
; CHECK-NEXT: [[GUARD:%.*]] = icmp ugt i64 [[TC]], 1024
92+
; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
93+
; CHECK: [[LOOP_PREHEADER]]:
1694
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[TC]]
1795
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
1896
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
19-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
97+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
98+
; CHECK-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
2099
; CHECK: [[VECTOR_PH]]:
21-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
22-
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
23-
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
24-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP5]]
25-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
100+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
101+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
102+
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
103+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP6]]
104+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
26105
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
27-
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
28-
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2
106+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
107+
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
29108
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
30109
; CHECK: [[VECTOR_BODY]]:
31110
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
32111
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
33112
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
34-
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
35-
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[EVL_BASED_IV]], 0
36-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]]
37-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
38-
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP8]])
39-
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP8]])
40-
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP11]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP8]])
41-
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP8]] to i64
42-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[EVL_BASED_IV]]
43-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
44-
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
45-
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
113+
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
114+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
115+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]]
116+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0
117+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
118+
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
119+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP9]])
120+
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64
121+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]]
122+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
123+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
124+
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
46125
; CHECK: [[MIDDLE_BLOCK]]:
47126
; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
48127
; CHECK: [[SCALAR_PH]]:
@@ -56,7 +135,7 @@ define void @f(ptr %p, i64 %tc) vscale_range(2, 1024) {
56135
; CHECK-NEXT: store i64 [[Y]], ptr [[GEP]], align 8
57136
; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1
58137
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[TC]]
59-
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
138+
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
60139
; CHECK: [[EXIT_LOOPEXIT]]:
61140
; CHECK-NEXT: br label %[[EXIT]]
62141
; CHECK: [[EXIT]]:
@@ -77,9 +156,86 @@ loop:
77156
exit:
78157
ret void
79158
}
159+
160+
; %tc won't = 0 so the IV won't overflow
161+
162+
define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) {
163+
; CHECK-LABEL: define void @no_overflow_at_0(
164+
; CHECK-SAME: ptr [[P:%.*]], i64 [[TC:%.*]]) #[[ATTR0]] {
165+
; CHECK-NEXT: [[ENTRY:.*:]]
166+
; CHECK-NEXT: [[TC_ADD:%.*]] = add i64 [[TC]], 1
167+
; CHECK-NEXT: [[GUARD:%.*]] = icmp ugt i64 [[TC]], 1024
168+
; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
169+
; CHECK: [[LOOP_PREHEADER]]:
170+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
171+
; CHECK: [[VECTOR_PH]]:
172+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
173+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
174+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
175+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC_ADD]], [[TMP2]]
176+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
177+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
178+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
179+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
180+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
181+
; CHECK: [[VECTOR_BODY]]:
182+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
183+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
184+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TC_ADD]], [[EVL_BASED_IV]]
185+
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
186+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0
187+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP6]]
188+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0
189+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP5]])
190+
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP5]])
191+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP8]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP5]])
192+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64
193+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
194+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
195+
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
196+
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
197+
; CHECK: [[MIDDLE_BLOCK]]:
198+
; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
199+
; CHECK: [[SCALAR_PH]]:
200+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
201+
; CHECK-NEXT: br label %[[LOOP:.*]]
202+
; CHECK: [[LOOP]]:
203+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
204+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[I]]
205+
; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[GEP]], align 8
206+
; CHECK-NEXT: [[Y:%.*]] = add i64 [[X]], 1
207+
; CHECK-NEXT: store i64 [[Y]], ptr [[GEP]], align 8
208+
; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1
209+
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[TC_ADD]]
210+
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
211+
; CHECK: [[EXIT_LOOPEXIT]]:
212+
; CHECK-NEXT: br label %[[EXIT]]
213+
; CHECK: [[EXIT]]:
214+
; CHECK-NEXT: ret void
215+
;
216+
entry:
217+
%tc.add = add nuw i64 %tc, 1
218+
%guard = icmp ugt i64 %tc, 1024
219+
br i1 %guard, label %exit, label %loop
220+
loop:
221+
%i = phi i64 [%i.next, %loop], [0, %entry]
222+
%gep = getelementptr i64, ptr %p, i64 %i
223+
%x = load i64, ptr %gep
224+
%y = add i64 %x, 1
225+
store i64 %y, ptr %gep
226+
%i.next = add i64 %i, 1
227+
%done = icmp eq i64 %i.next, %tc.add
228+
br i1 %done, label %exit, label %loop
229+
exit:
230+
ret void
231+
}
80232
;.
81233
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
82234
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
83235
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
84236
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
237+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
238+
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
239+
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
240+
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
85241
;.

0 commit comments

Comments
 (0)