Skip to content

Commit 53e7443

Browse files
[LSR] Don't count conditional loads/store as enabling pre/post-index (llvm#159573)
When a load/store is conditionally executed in a loop it isn't a candidate for pre/post-index addressing, as the increment of the address would only happen on those loop iterations where the load/store is executed. Detect this and only discount the AddRec cost when the load/store is unconditional.
1 parent 9d5c354 commit 53e7443

File tree

3 files changed

+209
-43
lines changed

3 files changed

+209
-43
lines changed

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,11 @@ class LSRUse {
13181318
/// the loop, in which case some special-case heuristics may be used.
13191319
bool AllFixupsOutsideLoop = true;
13201320

1321+
/// This records whether all of the fixups using this LSRUse are unconditional
1322+
/// within the loop, meaning they will be executed on every path to the loop
1323+
/// latch. This includes fixups before early exits.
1324+
bool AllFixupsUnconditional = true;
1325+
13211326
/// RigidFormula is set to true to guarantee that this use will be associated
13221327
/// with a single formula--the one that initially matched. Some SCEV
13231328
/// expressions cannot be expanded. This allows LSR to consider the registers
@@ -1421,16 +1426,22 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
14211426
if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
14221427
TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
14231428
const SCEV *Start;
1424-
const SCEVConstant *Step;
1425-
if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step))))
1429+
const APInt *Step;
1430+
if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_scev_APInt(Step)))) {
14261431
// If the step size matches the base offset, we could use pre-indexed
14271432
// addressing.
1428-
if (((AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() &&
1429-
Step->getAPInt() == F.BaseOffset.getFixedValue()) ||
1430-
((AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) &&
1431-
SE->isLoopInvariant(Start, L)))
1433+
bool CanPreIndex = (AMK & TTI::AMK_PreIndexed) &&
1434+
F.BaseOffset.isFixed() &&
1435+
*Step == F.BaseOffset.getFixedValue();
1436+
bool CanPostIndex = (AMK & TTI::AMK_PostIndexed) &&
1437+
!isa<SCEVConstant>(Start) &&
1438+
SE->isLoopInvariant(Start, L);
1439+
// We can only pre or post index when the load/store is unconditional.
1440+
if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
14321441
LoopCost = 0;
1442+
}
14331443
}
1444+
14341445
// If the loop counts down to zero and we'll be using a hardware loop then
14351446
// the addrec will be combined into the hardware loop instruction.
14361447
if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() &&
@@ -1783,6 +1794,9 @@ void LSRUse::print(raw_ostream &OS) const {
17831794
if (AllFixupsOutsideLoop)
17841795
OS << ", all-fixups-outside-loop";
17851796

1797+
if (AllFixupsUnconditional)
1798+
OS << ", all-fixups-unconditional";
1799+
17861800
if (WidestFixupType)
17871801
OS << ", widest fixup type: " << *WidestFixupType;
17881802
}
@@ -2213,6 +2227,7 @@ class LSRInstance {
22132227
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
22142228
void CountRegisters(const Formula &F, size_t LUIdx);
22152229
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
2230+
bool IsFixupExecutedEachIncrement(const LSRFixup &LF) const;
22162231

22172232
void CollectLoopInvariantFixupsAndFormulae();
22182233

@@ -3607,6 +3622,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
36073622
LF.PostIncLoops = TmpPostIncLoops;
36083623
LF.Offset = Offset;
36093624
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3625+
LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
36103626

36113627
// Create SCEV as Formula for calculating baseline cost
36123628
if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
@@ -3680,6 +3696,14 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
36803696
return true;
36813697
}
36823698

3699+
/// Test whether this fixup will be executed each time the corresponding IV
3700+
/// increment instruction is executed.
3701+
bool LSRInstance::IsFixupExecutedEachIncrement(const LSRFixup &LF) const {
3702+
// If the fixup block dominates the IV increment block then there is no path
3703+
// through the loop to the increment that doesn't pass through the fixup.
3704+
return DT.dominates(LF.UserInst->getParent(), IVIncInsertPos->getParent());
3705+
}
3706+
36833707
/// Check for other uses of loop-invariant values which we're tracking. These
36843708
/// other uses will pin these values in registers, making them less profitable
36853709
/// for elimination.
@@ -3803,6 +3827,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
38033827
LF.OperandValToReplace = U;
38043828
LF.Offset = Offset;
38053829
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3830+
LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
38063831
if (!LU.WidestFixupType ||
38073832
SE.getTypeSizeInBits(LU.WidestFixupType) <
38083833
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
@@ -4940,6 +4965,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
49404965
LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
49414966

49424967
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4968+
LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
49434969

49444970
// Transfer the fixups of LU to LUThatHas.
49454971
for (LSRFixup &Fixup : LU.Fixups) {

llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,77 +6,81 @@ define void @arm_min_q31(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocap
66
; CHECK: @ %bb.0: @ %entry
77
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
88
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
9+
; CHECK-NEXT: .pad #4
10+
; CHECK-NEXT: sub sp, #4
911
; CHECK-NEXT: ldr.w r12, [r0]
1012
; CHECK-NEXT: subs.w r9, r1, #1
1113
; CHECK-NEXT: beq .LBB0_3
1214
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
13-
; CHECK-NEXT: and r8, r9, #3
15+
; CHECK-NEXT: and r6, r9, #3
1416
; CHECK-NEXT: subs r7, r1, #2
1517
; CHECK-NEXT: cmp r7, #3
1618
; CHECK-NEXT: bhs .LBB0_4
1719
; CHECK-NEXT: @ %bb.2:
18-
; CHECK-NEXT: movs r6, #0
19-
; CHECK-NEXT: b .LBB0_6
20+
; CHECK-NEXT: mov.w r10, #0
21+
; CHECK-NEXT: cbnz r6, .LBB0_7
22+
; CHECK-NEXT: b .LBB0_10
2023
; CHECK-NEXT: .LBB0_3:
21-
; CHECK-NEXT: movs r6, #0
24+
; CHECK-NEXT: mov.w r10, #0
2225
; CHECK-NEXT: b .LBB0_10
2326
; CHECK-NEXT: .LBB0_4: @ %while.body.preheader.new
2427
; CHECK-NEXT: bic r7, r9, #3
25-
; CHECK-NEXT: movs r6, #1
28+
; CHECK-NEXT: str r6, [sp] @ 4-byte Spill
2629
; CHECK-NEXT: subs r7, #4
30+
; CHECK-NEXT: movs r6, #1
31+
; CHECK-NEXT: mov.w r8, #0
32+
; CHECK-NEXT: mov.w r10, #0
2733
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
28-
; CHECK-NEXT: movs r6, #0
29-
; CHECK-NEXT: movs r7, #4
3034
; CHECK-NEXT: .LBB0_5: @ %while.body
3135
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
32-
; CHECK-NEXT: ldr r10, [r0, #16]!
33-
; CHECK-NEXT: sub.w r9, r9, #4
34-
; CHECK-NEXT: ldrd r5, r4, [r0, #-12]
35-
; CHECK-NEXT: ldr r11, [r0, #-4]
36+
; CHECK-NEXT: ldr r11, [r0, #16]!
37+
; CHECK-NEXT: ldrd r5, r7, [r0, #-12]
38+
; CHECK-NEXT: ldr r4, [r0, #-4]
3639
; CHECK-NEXT: cmp r12, r5
37-
; CHECK-NEXT: it gt
38-
; CHECK-NEXT: subgt r6, r7, #3
3940
; CHECK-NEXT: csel r5, r5, r12, gt
40-
; CHECK-NEXT: cmp r5, r4
41+
; CHECK-NEXT: csinc r6, r10, r8, le
42+
; CHECK-NEXT: cmp r5, r7
4143
; CHECK-NEXT: it gt
42-
; CHECK-NEXT: subgt r6, r7, #2
43-
; CHECK-NEXT: csel r5, r4, r5, gt
44-
; CHECK-NEXT: cmp r5, r11
44+
; CHECK-NEXT: addgt.w r6, r8, #2
45+
; CHECK-NEXT: csel r7, r7, r5, gt
46+
; CHECK-NEXT: cmp r7, r4
4547
; CHECK-NEXT: it gt
46-
; CHECK-NEXT: subgt r6, r7, #1
47-
; CHECK-NEXT: csel r5, r11, r5, gt
48-
; CHECK-NEXT: cmp r5, r10
49-
; CHECK-NEXT: csel r6, r7, r6, gt
50-
; CHECK-NEXT: add.w r7, r7, #4
51-
; CHECK-NEXT: csel r12, r10, r5, gt
48+
; CHECK-NEXT: addgt.w r6, r8, #3
49+
; CHECK-NEXT: csel r7, r4, r7, gt
50+
; CHECK-NEXT: add.w r8, r8, #4
51+
; CHECK-NEXT: cmp r7, r11
52+
; CHECK-NEXT: csel r10, r8, r6, gt
53+
; CHECK-NEXT: csel r12, r11, r7, gt
5254
; CHECK-NEXT: le lr, .LBB0_5
53-
; CHECK-NEXT: .LBB0_6: @ %while.end.loopexit.unr-lcssa
54-
; CHECK-NEXT: cmp.w r8, #0
55-
; CHECK-NEXT: beq .LBB0_10
56-
; CHECK-NEXT: @ %bb.7: @ %while.body.epil
55+
; CHECK-NEXT: @ %bb.6: @ %while.end.loopexit.unr-lcssa.loopexit
56+
; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload
57+
; CHECK-NEXT: sub.w r9, r9, r8
58+
; CHECK-NEXT: cbz r6, .LBB0_10
59+
; CHECK-NEXT: .LBB0_7: @ %while.body.epil
5760
; CHECK-NEXT: ldr r7, [r0, #4]
5861
; CHECK-NEXT: sub.w r1, r1, r9
5962
; CHECK-NEXT: cmp r12, r7
60-
; CHECK-NEXT: csel r6, r1, r6, gt
63+
; CHECK-NEXT: csel r10, r1, r10, gt
6164
; CHECK-NEXT: csel r12, r7, r12, gt
62-
; CHECK-NEXT: cmp.w r8, #1
65+
; CHECK-NEXT: cmp r6, #1
6366
; CHECK-NEXT: beq .LBB0_10
6467
; CHECK-NEXT: @ %bb.8: @ %while.body.epil.1
6568
; CHECK-NEXT: ldr r7, [r0, #8]
6669
; CHECK-NEXT: cmp r12, r7
67-
; CHECK-NEXT: csinc r6, r6, r1, le
70+
; CHECK-NEXT: csinc r10, r10, r1, le
6871
; CHECK-NEXT: csel r12, r7, r12, gt
69-
; CHECK-NEXT: cmp.w r8, #2
72+
; CHECK-NEXT: cmp r6, #2
7073
; CHECK-NEXT: beq .LBB0_10
7174
; CHECK-NEXT: @ %bb.9: @ %while.body.epil.2
7275
; CHECK-NEXT: ldr r0, [r0, #12]
7376
; CHECK-NEXT: cmp r12, r0
7477
; CHECK-NEXT: it gt
75-
; CHECK-NEXT: addgt r6, r1, #2
78+
; CHECK-NEXT: addgt.w r10, r1, #2
7679
; CHECK-NEXT: csel r12, r0, r12, gt
7780
; CHECK-NEXT: .LBB0_10: @ %while.end
7881
; CHECK-NEXT: str.w r12, [r2]
79-
; CHECK-NEXT: str r6, [r3]
82+
; CHECK-NEXT: str.w r10, [r3]
83+
; CHECK-NEXT: add sp, #4
8084
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
8185
entry:
8286
%0 = load i32, ptr %pSrc, align 4

llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll

Lines changed: 140 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,31 +119,29 @@ for.end:
119119
; We can't use postindex addressing on the conditional load of qval and can't
120120
; convert the loop condition to a compare with zero, so we should instead use
121121
; offset addressing.
122-
; FIXME: Currently we don't notice the load of qval is conditional, and attempt
123-
; postindex addressing anyway.
124122
define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
125123
; CHECK-LABEL: define i32 @conditional_load(
126124
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[N:%.*]]) {
127125
; CHECK-NEXT: [[ENTRY:.*]]:
128126
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
129127
; CHECK: [[FOR_BODY]]:
130128
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
131-
; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
132129
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
133130
; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[RET_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
134131
; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
135132
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[PVAL]], 0
136133
; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
137134
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[FOR_INC]], label %[[IF_THEN:.*]]
138135
; CHECK: [[IF_THEN]]:
136+
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[IDX]], 2
137+
; CHECK-NEXT: [[LSR_IV:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP0]]
139138
; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV]], align 4
140139
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[RET]], [[QVAL]]
141140
; CHECK-NEXT: br label %[[FOR_INC]]
142141
; CHECK: [[FOR_INC]]:
143142
; CHECK-NEXT: [[RET_NEXT]] = phi i32 [ [[ADD]], %[[IF_THEN]] ], [ [[RET]], %[[FOR_BODY]] ]
144143
; CHECK-NEXT: [[IDX_NEXT]] = add nuw nsw i64 [[IDX]], 1
145144
; CHECK-NEXT: [[NVAL:%.*]] = load volatile i64, ptr [[N]], align 8
146-
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4
147145
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IDX_NEXT]], [[NVAL]]
148146
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]]
149147
; CHECK: [[EXIT]]:
@@ -176,3 +174,141 @@ for.inc:
176174
exit:
177175
ret i32 %ret.next
178176
}
177+
178+
; We can use postindex addressing for both loads here, even though the second
179+
; may not be executed on every loop iteration.
180+
define i32 @early_exit_load(ptr %p, ptr %q, ptr %n) {
181+
; CHECK-LABEL: define i32 @early_exit_load(
182+
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[N:%.*]]) {
183+
; CHECK-NEXT: [[ENTRY:.*]]:
184+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
185+
; CHECK: [[FOR_BODY]]:
186+
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
187+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
188+
; CHECK-NEXT: [[RET_PHI:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
189+
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
190+
; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
191+
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[PVAL]], 0
192+
; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
193+
; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_INC]], label %[[EXIT:.*]]
194+
; CHECK: [[FOR_INC]]:
195+
; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV]], align 4
196+
; CHECK-NEXT: [[ADD]] = add nsw i32 [[QVAL]], [[RET_PHI]]
197+
; CHECK-NEXT: [[IDX_NEXT]] = add nuw nsw i64 [[IDX]], 1
198+
; CHECK-NEXT: [[NVAL:%.*]] = load volatile i64, ptr [[N]], align 8
199+
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4
200+
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i64 [[IDX_NEXT]], [[NVAL]]
201+
; CHECK-NEXT: br i1 [[CMP2]], label %[[FOR_BODY]], label %[[EXIT]]
202+
; CHECK: [[EXIT]]:
203+
; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[RET_PHI]], %[[FOR_BODY]] ], [ [[ADD]], %[[FOR_INC]] ]
204+
; CHECK-NEXT: ret i32 [[RET]]
205+
;
206+
entry:
207+
br label %for.body
208+
209+
for.body:
210+
%ret.phi = phi i32 [ %add, %for.inc ], [ 0, %entry ]
211+
%idx = phi i64 [ %idx.next, %for.inc ], [ 0, %entry ]
212+
%paddr = getelementptr inbounds nuw i32, ptr %p, i64 %idx
213+
%pval = load i32, ptr %paddr, align 4
214+
%cmp1 = icmp eq i32 %pval, 0
215+
br i1 %cmp1, label %for.inc, label %exit
216+
217+
for.inc:
218+
%qaddr = getelementptr inbounds nuw i32, ptr %q, i64 %idx
219+
%qval = load i32, ptr %qaddr, align 4
220+
%add = add nsw i32 %qval, %ret.phi
221+
%idx.next = add nuw nsw i64 %idx, 1
222+
%nval = load volatile i64, ptr %n, align 8
223+
%cmp2 = icmp slt i64 %idx.next, %nval
224+
br i1 %cmp2, label %for.body, label %exit
225+
226+
exit:
227+
%ret = phi i32 [ %ret.phi, %for.body ], [ %add, %for.inc ]
228+
ret i32 %ret
229+
}
230+
231+
; The control-flow before and after the load of qval shouldn't prevent postindex
232+
; addressing from happening.
233+
; FIXME: We choose postindex addressing, but the scevgep is placed in for.inc so
234+
; during codegen we will fail to actually generate a postindex load.
235+
define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
236+
; CHECK-LABEL: define void @middle_block_load(
237+
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i64 [[N:%.*]]) {
238+
; CHECK-NEXT: [[ENTRY:.*]]:
239+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
240+
; CHECK: [[FOR_BODY]]:
241+
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
242+
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
243+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_INC]] ], [ [[N]], %[[ENTRY]] ]
244+
; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV2]], align 4
245+
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[PVAL]], 0
246+
; CHECK-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
247+
; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN1:.*]], label %[[IF_ELSE1:.*]]
248+
; CHECK: [[IF_THEN1]]:
249+
; CHECK-NEXT: tail call void @otherfn1()
250+
; CHECK-NEXT: br label %[[IF_END:.*]]
251+
; CHECK: [[IF_ELSE1]]:
252+
; CHECK-NEXT: tail call void @otherfn2()
253+
; CHECK-NEXT: br label %[[IF_END]]
254+
; CHECK: [[IF_END]]:
255+
; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
256+
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[QVAL]], 0
257+
; CHECK-NEXT: br i1 [[CMP2]], label %[[IF_THEN2:.*]], label %[[IF_ELSE2:.*]]
258+
; CHECK: [[IF_THEN2]]:
259+
; CHECK-NEXT: tail call void @otherfn1()
260+
; CHECK-NEXT: br label %[[FOR_INC]]
261+
; CHECK: [[IF_ELSE2]]:
262+
; CHECK-NEXT: tail call void @otherfn2()
263+
; CHECK-NEXT: br label %[[FOR_INC]]
264+
; CHECK: [[FOR_INC]]:
265+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
266+
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
267+
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
268+
; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT:.*]], label %[[FOR_BODY]]
269+
; CHECK: [[EXIT]]:
270+
; CHECK-NEXT: ret void
271+
;
272+
entry:
273+
br label %for.body
274+
275+
for.body:
276+
%idx = phi i64 [ %idx.next, %for.inc ], [ 0, %entry ]
277+
%paddr = getelementptr inbounds nuw i32, ptr %p, i64 %idx
278+
%pval = load i32, ptr %paddr, align 4
279+
%cmp1 = icmp sgt i32 %pval, 0
280+
br i1 %cmp1, label %if.then1, label %if.else1
281+
282+
if.then1:
283+
tail call void @otherfn1()
284+
br label %if.end
285+
286+
if.else1:
287+
tail call void @otherfn2()
288+
br label %if.end
289+
290+
if.end:
291+
%qaddr = getelementptr inbounds nuw i32, ptr %q, i64 %idx
292+
%qval = load i32, ptr %qaddr, align 4
293+
%cmp2 = icmp sgt i32 %qval, 0
294+
br i1 %cmp2, label %if.then2, label %if.else2
295+
296+
if.then2:
297+
tail call void @otherfn1()
298+
br label %for.inc
299+
300+
if.else2:
301+
tail call void @otherfn2()
302+
br label %for.inc
303+
304+
for.inc:
305+
%idx.next = add nuw nsw i64 %idx, 1
306+
%cmp3 = icmp eq i64 %idx.next, %n
307+
br i1 %cmp3, label %exit, label %for.body
308+
309+
exit:
310+
ret void
311+
}
312+
313+
declare dso_local void @otherfn1()
314+
declare dso_local void @otherfn2()

0 commit comments

Comments
 (0)