Skip to content

Commit a2ddb02

Browse files
authored
[LoopInterchange] Don't consider loops with BTC=0 (#167113)
Do not consider loops with a zero backedge taken count as candidates for interchange. This seems like a sensible thing because it suggests the loop doesn't execute and there is no point in interchanging. As a bonus, this seems to avoid triggering an assert about phis and their uses from source code, so this is a partial fix for #163954 but it needs more work to properly fix that.
1 parent 68d2ce8 commit a2ddb02

File tree

5 files changed

+197
-37
lines changed

5 files changed

+197
-37
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,6 +1462,24 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
14621462
bool LoopInterchangeProfitability::isProfitable(
14631463
const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
14641464
unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1465+
// Do not consider loops with a backedge that isn't taken, e.g. an
1466+
// unconditional branch true/false, as candidates for interchange.
1467+
// TODO: when interchange is forced, we should probably also allow
1468+
// interchange for these loops, and thus this logic should be moved just
1469+
// below the cost-model ignore check below. But this check is done first
1470+
// to avoid the issue in #163954.
1471+
const SCEV *InnerBTC = SE->getBackedgeTakenCount(InnerLoop);
1472+
const SCEV *OuterBTC = SE->getBackedgeTakenCount(OuterLoop);
1473+
if (InnerBTC && InnerBTC->isZero()) {
1474+
LLVM_DEBUG(dbgs() << "Inner loop back-edge isn't taken, rejecting "
1475+
"single iteration loop\n");
1476+
return false;
1477+
}
1478+
if (OuterBTC && OuterBTC->isZero()) {
1479+
LLVM_DEBUG(dbgs() << "Outer loop back-edge isn't taken, rejecting "
1480+
"single iteration loop\n");
1481+
return false;
1482+
}
14651483

14661484
// Return true if interchange is forced and the cost-model ignored.
14671485
if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; RUN: opt < %s -passes=loop-interchange -verify-dom-info -verify-loop-info \
2+
; RUN: -pass-remarks-output=%t -pass-remarks='loop-interchange' -S
3+
; RUN: cat %t | FileCheck %s
4+
5+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6+
7+
@D = common global [100 x [100 x [100 x i32]]] zeroinitializer
8+
9+
; The outer loop's backedge isn't taken. Check the loop with BTC=0 is considered
10+
; unprofitable, but that we still interchange the two inner loops.
11+
;
12+
; for(int i=0;i<1;i++)
13+
; for(int j=0;j<100;j++)
14+
; for(int k=0;k<100;k++)
15+
; D[i][k][j] = D[i][k][j]+t;
16+
;
17+
18+
; CHECK: --- !Analysis
19+
; CHECK-NEXT: Pass: loop-interchange
20+
; CHECK-NEXT: Name: Dependence
21+
; CHECK-NEXT: Function: interchange_i_and_j
22+
; CHECK-NEXT: Args:
23+
; CHECK-NEXT: - String: Computed dependence info, invoking the transform.
24+
; CHECK-NEXT: ...
25+
; CHECK-NEXT: --- !Passed
26+
; CHECK-NEXT: Pass: loop-interchange
27+
; CHECK-NEXT: Name: Interchanged
28+
; CHECK-NEXT: Function: interchange_i_and_j
29+
; CHECK-NEXT: Args:
30+
; CHECK-NEXT: - String: Loop interchanged with enclosing loop.
31+
; CHECK-NEXT: ...
32+
; CHECK-NEXT: --- !Missed
33+
; CHECK-NEXT: Pass: loop-interchange
34+
; CHECK-NEXT: Name: InterchangeNotProfitable
35+
; CHECK-NEXT: Function: interchange_i_and_j
36+
; CHECK-NEXT: Args:
37+
; CHECK-NEXT: - String: Insufficient information to calculate the cost of loop for interchange.
38+
; CHECK-NEXT: ...
39+
40+
define void @interchange_i_and_j(i32 %t){
41+
entry:
42+
br label %outer.header
43+
44+
outer.header:
45+
%i = phi i64 [ 0, %entry ], [ %inc16, %for.inc15 ]
46+
br label %inner1.header
47+
48+
inner1.header:
49+
%j = phi i64 [ 0, %outer.header ], [ %inc13, %for.inc12 ]
50+
br label %inner2.body
51+
52+
inner2.body:
53+
%k = phi i64 [ 0, %inner1.header ], [ %inc, %inner2.body ]
54+
%arrayidx8 = getelementptr inbounds [100 x [100 x i32]], ptr @D, i64 %i, i64 %k, i64 %j
55+
%0 = load i32, ptr %arrayidx8
56+
%add = add nsw i32 %0, %t
57+
store i32 %add, ptr %arrayidx8
58+
%inc = add nuw nsw i64 %k, 1
59+
%exitcond = icmp eq i64 %inc, 100
60+
br i1 %exitcond, label %for.inc12, label %inner2.body
61+
62+
for.inc12:
63+
%inc13 = add nuw nsw i64 %j, 1
64+
%exitcond29 = icmp eq i64 %inc13, 100
65+
br i1 %exitcond29, label %for.inc15, label %inner1.header
66+
67+
for.inc15:
68+
%inc16 = add nuw nsw i64 %i, 1
69+
%exitcond30 = icmp eq i64 %inc16, 1
70+
br i1 %exitcond30, label %for.end17, label %outer.header
71+
72+
for.end17:
73+
ret void
74+
}

llvm/test/Transforms/LoopInterchange/pr43326.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ for.end: ; preds = %for.inc
6464

6565
for.inc10: ; preds = %for.end
6666
%j.next = add i8 %j, -1
67-
%cmp = icmp sgt i8 %j.next, -1
67+
%cmp = icmp sgt i8 %j.next, -10
6868
br i1 %cmp, label %inner1.header, label %for.end11
6969

7070
for.end11: ; preds = %for.inc10
@@ -75,8 +75,8 @@ for.end11: ; preds = %for.inc10
7575

7676
for.inc12: ; preds = %for.end11
7777
%inc13 = add nsw i32 %inc1312, 1
78-
%tobool.not = icmp eq i32 %inc13, 0
79-
br i1 %tobool.not, label %for.cond.for.end14_crit_edge, label %outer.header
78+
%tobool.not = icmp slt i32 %inc13, 42
79+
br i1 %tobool.not, label %outer.header, label %for.cond.for.end14_crit_edge
8080

8181
for.cond.for.end14_crit_edge: ; preds = %for.inc12
8282
%inc13.lcssa = phi i32 [ %inc13, %for.inc12 ]

llvm/test/Transforms/LoopInterchange/pr57148.ll

Lines changed: 48 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,19 @@ define void @test1() {
3535
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX55]], align 1
3636
; CHECK-NEXT: [[ADD61:%.*]] = add i32 undef, undef
3737
; CHECK-NEXT: [[INC63:%.*]] = add nsw i16 [[K_09]], 1
38+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[K_09]], 42
3839
; CHECK-NEXT: br label [[FOR_END67]]
3940
; CHECK: for.body42.split:
4041
; CHECK-NEXT: [[ADD61_LCSSA:%.*]] = phi i32 [ [[ADD61]], [[FOR_END67]] ]
4142
; CHECK-NEXT: [[TMP1]] = add nsw i16 [[K_09]], 1
42-
; CHECK-NEXT: br i1 true, label [[FOR_END64]], label [[FOR_BODY42]]
43+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[K_09]], 42
44+
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY42]], label [[FOR_END64]]
4345
; CHECK: for.end64:
4446
; CHECK-NEXT: [[ADD61_LCSSA_LCSSA:%.*]] = phi i32 [ [[ADD61_LCSSA]], [[FOR_BODY42_SPLIT]] ]
4547
; CHECK-NEXT: store i32 [[ADD61_LCSSA_LCSSA]], ptr undef, align 1
4648
; CHECK-NEXT: [[INC66]] = add nuw nsw i16 [[J_010]], 1
47-
; CHECK-NEXT: br i1 true, label [[FOR_COND75_PREHEADER:%.*]], label [[FOR_COND37_PREHEADER]]
49+
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i16 [[J_010]], 43
50+
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND37_PREHEADER]], label [[FOR_COND75_PREHEADER:%.*]]
4851
; CHECK: for.end67:
4952
; CHECK-NEXT: [[INC69]] = add nuw nsw i16 [[I_011]], 1
5053
; CHECK-NEXT: [[EXITCOND13_NOT:%.*]] = icmp eq i16 [[INC69]], 2
@@ -72,12 +75,14 @@ for.body42: ; preds = %for.body42, %for.co
7275
%0 = load i32, ptr %arrayidx55, align 1
7376
%add61 = add i32 undef, undef
7477
%inc63 = add nsw i16 %k.09, 1
75-
br i1 true, label %for.end64, label %for.body42
78+
%cmp = icmp slt i16 %k.09, 42
79+
br i1 %cmp, label %for.body42, label %for.end64
7680

7781
for.end64: ; preds = %for.body42
7882
store i32 %add61, ptr undef, align 1
7983
%inc66 = add nuw nsw i16 %j.010, 1
80-
br i1 true, label %for.end67, label %for.cond37.preheader
84+
%cmp2 = icmp slt i16 %j.010, 43
85+
br i1 %cmp2, label %for.cond37.preheader, label %for.end67
8186

8287
for.end67: ; preds = %for.end64
8388
%inc69 = add nuw nsw i16 %i.011, 1
@@ -88,7 +93,6 @@ for.cond75: ; preds = %for.cond75, %for.en
8893
br label %for.cond75
8994
}
9095

91-
9296
; Make sure that we split the phi nodes in the middle loop header
9397
; into a separate basic block to avoid the situation where use of
9498
; the outermost indvar appears before its def after interchanging
@@ -98,40 +102,42 @@ for.cond75: ; preds = %for.cond75, %for.en
98102
define void @test2() {
99103
; CHECK-LABEL: @test2(
100104
; CHECK-NEXT: entry:
101-
; CHECK-NEXT: br label [[FOR_COND37_PREHEADER_PREHEADER:%.*]]
102-
; CHECK: for.cond33.preheader.preheader:
103105
; CHECK-NEXT: br label [[FOR_COND33_PREHEADER:%.*]]
106+
; CHECK: for.cond33.preheader.preheader:
107+
; CHECK-NEXT: br label [[FOR_COND33_PREHEADER1:%.*]]
104108
; CHECK: for.cond33.preheader:
105109
; CHECK-NEXT: [[I_166:%.*]] = phi i16 [ [[INC69:%.*]], [[FOR_INC68:%.*]] ], [ 0, [[FOR_COND33_PREHEADER_PREHEADER:%.*]] ]
106110
; CHECK-NEXT: [[ARRAYIDX60:%.*]] = getelementptr inbounds [2 x [4 x i32]], ptr @c, i16 0, i16 [[I_166]], i16 [[J_165:%.*]]
107-
; CHECK-NEXT: br label [[VECTOR_BODY85_SPLIT1:%.*]]
108-
; CHECK: for.cond37.preheader.preheader:
109111
; CHECK-NEXT: br label [[FOR_COND37_PREHEADER:%.*]]
112+
; CHECK: for.cond37.preheader.preheader:
113+
; CHECK-NEXT: br label [[FOR_COND37_PREHEADER1:%.*]]
110114
; CHECK: for.cond37.preheader:
111-
; CHECK-NEXT: [[J_165]] = phi i16 [ [[INC66:%.*]], [[MIDDLE_BLOCK80:%.*]] ], [ 0, [[FOR_COND37_PREHEADER_PREHEADER]] ]
112-
; CHECK-NEXT: br label [[FOR_COND37_PREHEADER_SPLIT:%.*]]
113-
; CHECK: for.cond37.preheader.split:
115+
; CHECK-NEXT: [[J_165]] = phi i16 [ [[INC66:%.*]], [[MIDDLE_BLOCK80:%.*]] ], [ 0, [[FOR_COND33_PREHEADER]] ]
114116
; CHECK-NEXT: br label [[VECTOR_BODY85:%.*]]
117+
; CHECK: for.cond37.preheader.split:
118+
; CHECK-NEXT: br label [[VECTOR_BODY86:%.*]]
115119
; CHECK: vector.body85:
116-
; CHECK-NEXT: [[INDEX86:%.*]] = phi i16 [ 0, [[FOR_COND37_PREHEADER_SPLIT]] ], [ [[TMP3:%.*]], [[VECTOR_BODY85_SPLIT:%.*]] ]
120+
; CHECK-NEXT: [[INDEX86:%.*]] = phi i16 [ 0, [[VECTOR_BODY85]] ], [ [[TMP5:%.*]], [[VECTOR_BODY85_SPLIT:%.*]] ]
117121
; CHECK-NEXT: br label [[FOR_COND33_PREHEADER_PREHEADER]]
118122
; CHECK: vector.body85.split1:
119123
; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i16 [[INDEX86]], 2
120124
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 [[TMP0]], i16 [[J_165]]
121125
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1
122-
; CHECK-NEXT: [[INDEX_NEXT87:%.*]] = add nuw i16 [[INDEX86]], 4
126+
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i16 [[INDEX86]], 4
127+
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i16 [[INDEX86]], 42
123128
; CHECK-NEXT: br label [[FOR_INC68]]
124129
; CHECK: vector.body85.split:
125-
; CHECK-NEXT: [[TMP3]] = add nuw i16 [[INDEX86]], 4
126-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK80]], label [[VECTOR_BODY85]]
130+
; CHECK-NEXT: [[TMP5]] = add nuw i16 [[INDEX86]], 4
131+
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i16 [[INDEX86]], 42
132+
; CHECK-NEXT: br i1 [[TMP4]], label [[VECTOR_BODY86]], label [[MIDDLE_BLOCK80]]
127133
; CHECK: middle.block80:
128134
; CHECK-NEXT: [[INC66]] = add nuw nsw i16 [[J_165]], 1
129135
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC66]], 42
130-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND75_PREHEADER:%.*]], label [[FOR_COND37_PREHEADER]]
136+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND37_PREHEADER1]], label [[FOR_COND75_PREHEADER:%.*]]
131137
; CHECK: for.inc68:
132138
; CHECK-NEXT: [[INC69]] = add nuw nsw i16 [[I_166]], 1
133-
; CHECK-NEXT: [[EXITCOND77_NOT:%.*]] = icmp eq i16 [[INC69]], 2
134-
; CHECK-NEXT: br i1 [[EXITCOND77_NOT]], label [[VECTOR_BODY85_SPLIT]], label [[FOR_COND33_PREHEADER]]
139+
; CHECK-NEXT: [[EXITCOND77_NOT:%.*]] = icmp slt i16 [[INC69]], 24
140+
; CHECK-NEXT: br i1 [[EXITCOND77_NOT]], label [[FOR_COND33_PREHEADER1]], label [[VECTOR_BODY85_SPLIT]]
135141
; CHECK: for.cond75.preheader:
136142
; CHECK-NEXT: unreachable
137143
;
@@ -153,17 +159,18 @@ vector.body85: ; preds = %vector.body85, %for
153159
%1 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %0, i16 %j.165
154160
%2 = load i32, ptr %1, align 1
155161
%index.next87 = add nuw i16 %index86, 4
156-
br i1 true, label %middle.block80, label %vector.body85
162+
%cmp2 = icmp slt i16 %index86, 42
163+
br i1 %cmp2, label %vector.body85, label %middle.block80
157164

158165
middle.block80: ; preds = %vector.body85
159166
%inc66 = add nuw nsw i16 %j.165, 1
160167
%cmp = icmp slt i16 %inc66, 42
161-
br i1 %cmp, label %for.inc68, label %for.cond37.preheader
168+
br i1 %cmp, label %for.cond37.preheader, label %for.inc68
162169

163170
for.inc68: ; preds = %middle.block80
164171
%inc69 = add nuw nsw i16 %i.166, 1
165-
%exitcond77.not = icmp eq i16 %inc69, 2
166-
br i1 %exitcond77.not, label %for.cond75.preheader, label %for.cond33.preheader
172+
%exitcond77.not = icmp slt i16 %inc69, 24
173+
br i1 %exitcond77.not, label %for.cond33.preheader, label %for.cond75.preheader
167174

168175
for.cond75.preheader: ; preds = %for.inc68
169176
unreachable
@@ -178,11 +185,11 @@ define void @test3() {
178185
; CHECK-NEXT: br label [[FOR_COND33_PREHEADER:%.*]]
179186
; CHECK: for.cond33.preheader:
180187
; CHECK-NEXT: [[I_011:%.*]] = phi i16 [ [[INC69:%.*]], [[FOR_END67:%.*]] ], [ 0, [[FOR_COND33_PREHEADER_PREHEADER:%.*]] ]
181-
; CHECK-NEXT: br label [[FOR_BODY42_SPLIT1:%.*]]
188+
; CHECK-NEXT: br label [[FOR_COND38_PREHEADER:%.*]]
182189
; CHECK: for.body42.preheader:
183190
; CHECK-NEXT: br label [[FOR_BODY42:%.*]]
184191
; CHECK: for.cond38.preheader.preheader:
185-
; CHECK-NEXT: br label [[FOR_COND38_PREHEADER:%.*]]
192+
; CHECK-NEXT: br label [[FOR_COND38_PREHEADER1:%.*]]
186193
; CHECK: for.cond37.preheader.preheader:
187194
; CHECK-NEXT: br label [[FOR_COND37_PREHEADER:%.*]]
188195
; CHECK: for.cond37.preheader:
@@ -192,27 +199,31 @@ define void @test3() {
192199
; CHECK-NEXT: [[K_010:%.*]] = phi i16 [ [[INC67:%.*]], [[FOR_END65:%.*]] ], [ 0, [[FOR_COND38_PREHEADER_PREHEADER]] ]
193200
; CHECK-NEXT: br label [[FOR_BODY42_PREHEADER:%.*]]
194201
; CHECK: for.body42:
195-
; CHECK-NEXT: [[K_09:%.*]] = phi i16 [ [[TMP1:%.*]], [[FOR_BODY42_SPLIT:%.*]] ], [ -512, [[FOR_BODY42_PREHEADER]] ]
202+
; CHECK-NEXT: [[K_09:%.*]] = phi i16 [ [[TMP3:%.*]], [[FOR_BODY42_SPLIT:%.*]] ], [ -512, [[FOR_BODY42_PREHEADER]] ]
196203
; CHECK-NEXT: br label [[FOR_COND33_PREHEADER_PREHEADER]]
197204
; CHECK: for.body42.split1:
198205
; CHECK-NEXT: [[SUB51:%.*]] = add nsw i16 [[K_09]], 512
199206
; CHECK-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds [1024 x [512 x [4 x i32]]], ptr @d, i16 0, i16 [[SUB51]], i16 [[J_010]], i16 [[K_010]]
200207
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX55]], align 1
201208
; CHECK-NEXT: [[ADD61:%.*]] = add i32 undef, undef
202-
; CHECK-NEXT: [[INC63:%.*]] = add nsw i16 [[K_09]], 1
209+
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i16 [[K_09]], 1
210+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[K_09]], 42
203211
; CHECK-NEXT: br label [[FOR_END67]]
204212
; CHECK: for.body42.split:
205213
; CHECK-NEXT: [[ADD61_LCSSA:%.*]] = phi i32 [ [[ADD61]], [[FOR_END67]] ]
206-
; CHECK-NEXT: [[TMP1]] = add nsw i16 [[K_09]], 1
207-
; CHECK-NEXT: br i1 true, label [[FOR_END65]], label [[FOR_BODY42]]
214+
; CHECK-NEXT: [[TMP3]] = add nsw i16 [[K_09]], 1
215+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[K_09]], 42
216+
; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY42]], label [[FOR_END65]]
208217
; CHECK: for.end65:
209218
; CHECK-NEXT: [[ADD61_LCSSA_LCSSA:%.*]] = phi i32 [ [[ADD61_LCSSA]], [[FOR_BODY42_SPLIT]] ]
210219
; CHECK-NEXT: store i32 [[ADD61_LCSSA_LCSSA]], ptr undef, align 1
211220
; CHECK-NEXT: [[INC67]] = add nuw nsw i16 [[K_010]], 1
212-
; CHECK-NEXT: br i1 true, label [[FOR_END64]], label [[FOR_COND38_PREHEADER]]
221+
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i16 [[K_010]], 44
222+
; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_COND38_PREHEADER1]], label [[FOR_END64]]
213223
; CHECK: for.end64:
214224
; CHECK-NEXT: [[INC66]] = add nuw nsw i16 [[J_010]], 1
215-
; CHECK-NEXT: br i1 true, label [[FOR_COND75_PREHEADER:%.*]], label [[FOR_COND37_PREHEADER]]
225+
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i16 [[J_010]], 43
226+
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND37_PREHEADER]], label [[FOR_COND75_PREHEADER:%.*]]
216227
; CHECK: for.end67:
217228
; CHECK-NEXT: [[INC69]] = add nuw nsw i16 [[I_011]], 1
218229
; CHECK-NEXT: [[EXITCOND13_NOT:%.*]] = icmp eq i16 [[INC69]], 2
@@ -244,16 +255,19 @@ for.body42: ; preds = %for.body42, %for.co
244255
%0 = load i32, ptr %arrayidx55, align 1
245256
%add61 = add i32 undef, undef
246257
%inc63 = add nsw i16 %k.09, 1
247-
br i1 true, label %for.end65, label %for.body42
258+
%cmp = icmp slt i16 %k.09, 42
259+
br i1 %cmp, label %for.body42, label %for.end65
248260

249261
for.end65: ; preds = %for.body42
250262
store i32 %add61, ptr undef, align 1
251263
%inc67 = add nuw nsw i16 %k.010, 1
252-
br i1 true, label %for.end64, label %for.cond38.preheader
264+
%cmp3 = icmp slt i16 %k.010, 44
265+
br i1 %cmp3, label %for.cond38.preheader, label %for.end64
253266

254267
for.end64: ; preds = %for.end65
255268
%inc66 = add nuw nsw i16 %j.010, 1
256-
br i1 true, label %for.end67, label %for.cond37.preheader
269+
%cmp2 = icmp slt i16 %j.010, 43
270+
br i1 %cmp2, label %for.cond37.preheader, label %for.end67
257271

258272
for.end67: ; preds = %for.end64
259273
%inc69 = add nuw nsw i16 %i.011, 1
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt < %s -passes=loop-interchange -loop-interchange-profitabilities=ignore -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
3+
4+
; Test case for issue: https://github.com/llvm/llvm-project/issues/163954
5+
6+
define void @test() {
7+
; CHECK-LABEL: define void @test() {
8+
; CHECK-NEXT: [[ENTRY:.*]]:
9+
; CHECK-NEXT: br label %[[OUTER_HEADER:.*]]
10+
; CHECK: [[OUTER_HEADER]]:
11+
; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[DOTLCSSA:%.*]], %[[OUTER_LATCH:.*]] ]
12+
; CHECK-NEXT: br label %[[INNER_HEADER:.*]]
13+
; CHECK: [[INNER_HEADER]]:
14+
; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[J_NEXT:%.*]], %[[INNER_LATCH:.*]] ]
15+
; CHECK-NEXT: [[TMP0:%.*]] = phi i8 [ [[I]], %[[OUTER_HEADER]] ], [ [[TMP1:%.*]], %[[INNER_LATCH]] ]
16+
; CHECK-NEXT: br label %[[INNER_BODY:.*]]
17+
; CHECK: [[INNER_BODY]]:
18+
; CHECK-NEXT: br i1 true, label %[[INNER_LATCH]], label %[[INNER_BODY]]
19+
; CHECK: [[INNER_LATCH]]:
20+
; CHECK-NEXT: [[TMP1]] = or i8 [[TMP0]], 0
21+
; CHECK-NEXT: [[J_NEXT]] = add i64 [[J]], 1
22+
; CHECK-NEXT: br i1 true, label %[[OUTER_LATCH]], label %[[INNER_HEADER]]
23+
; CHECK: [[OUTER_LATCH]]:
24+
; CHECK-NEXT: [[DOTLCSSA]] = phi i8 [ [[TMP1]], %[[INNER_LATCH]] ]
25+
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[OUTER_HEADER]]
26+
; CHECK: [[EXIT]]:
27+
; CHECK-NEXT: ret void
28+
;
29+
entry:
30+
br label %outer.header
31+
32+
outer.header:
33+
%i = phi i8 [ 0, %entry ], [ %1, %outer.latch ]
34+
br label %inner.header
35+
36+
inner.header:
37+
%j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.latch ]
38+
%0 = phi i8 [ %i, %outer.header ], [ %1, %inner.latch ]
39+
br label %inner.body
40+
41+
inner.body:
42+
br i1 true, label %inner.latch, label %inner.body ; another (self) loop, but never taken
43+
44+
inner.latch:
45+
%1 = or i8 %0, 0
46+
%j.next = add i64 %j, 1
47+
br i1 true, label %outer.latch, label %inner.header
48+
49+
outer.latch:
50+
br i1 true, label %exit, label %outer.header
51+
52+
exit:
53+
ret void
54+
}

0 commit comments

Comments
 (0)