Skip to content

Commit ac593de

Browse files
kawashima-fjtstellar
authored andcommitted
[LoopReroll] Fix rerolling loop with extra instructions
Fixes PR47627 This fix suppresses rerolling a loop which has an unrerollable instruction. Sample IR for the explanation below: ``` define void @foo([2 x i32]* nocapture %a) { entry: br label %loop loop: ; base instruction %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ] ; unrerollable instructions %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %indvar, i64 0 store i32 999, i32* %stptrx, align 4 ; extra simple arithmetic operations, used by root instructions %plus20 = add nuw nsw i64 %indvar, 20 %plus10 = add nuw nsw i64 %indvar, 10 ; root instruction 0 %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 %value0 = load i32, i32* %ldptr0, align 4 %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 store i32 %value0, i32* %stptr0, align 4 ; root instruction 1 %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 %value1 = load i32, i32* %ldptr1, align 4 %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 store i32 %value1, i32* %stptr1, align 4 ; loop-increment and latch %indvar.next = add nuw nsw i64 %indvar, 1 %exitcond = icmp eq i64 %indvar.next, 5 br i1 %exitcond, label %exit, label %loop exit: ret void } ``` In the loop rerolling pass, `%indvar` and `%indvar.next` are appended to the `LoopIncs` vector in the `LoopReroll::DAGRootTracker::findRoots` function. Before this fix, two instructions with `unrerollable instructions` comment above are marked as `IL_All` at the end of the `LoopReroll::DAGRootTracker::collectUsedInstructions` function, as well as instructions with `extra simple arithmetic operations` comment and `loop-increment and latch` comment. It is incorrect because `IL_All` means that the instruction should be executed in all iterations of the rerolled loop but the `store` instruction should not. This fix rejects instructions which may have side effects and don't belong to def-use chains of any root instructions and reductions. See https://bugs.llvm.org/show_bug.cgi?id=47627 for more information. (cherry picked from commit d9a9c99)
1 parent 471a386 commit ac593de

File tree

2 files changed

+274
-0
lines changed

2 files changed

+274
-0
lines changed

llvm/lib/Transforms/Scalar/LoopRerollPass.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,6 +1081,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
10811081
DenseSet<Instruction*> V;
10821082
collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
10831083
for (auto *I : V) {
1084+
if (I->mayHaveSideEffects()) {
1085+
LLVM_DEBUG(dbgs() << "LRR: Aborting - "
1086+
<< "An instruction which does not belong to any root "
1087+
<< "sets must not have side effects: " << *I);
1088+
return false;
1089+
}
10841090
Uses[I].set(IL_All);
10851091
}
10861092

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
; RUN: opt -S -loop-reroll %s | FileCheck %s
2+
target triple = "aarch64--linux-gnu"
3+
4+
define void @rerollable1([2 x i32]* nocapture %a) {
5+
entry:
6+
br label %loop
7+
8+
loop:
9+
10+
; CHECK-LABEL: loop:
11+
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
12+
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 20, i64 %iv
13+
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [2 x i32], [2 x i32]* %a, i64 10, i64 %iv
14+
; CHECK-NEXT: [[VALUE:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
15+
; CHECK-NEXT: store i32 [[VALUE]], i32* [[SCEVGEP2]], align 4
16+
17+
; base instruction
18+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
19+
20+
; NO unrerollable instructions
21+
22+
; extra simple arithmetic operations, used by root instructions
23+
%plus20 = add nuw nsw i64 %iv, 20
24+
%plus10 = add nuw nsw i64 %iv, 10
25+
26+
; root instruction 0
27+
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
28+
%value0 = load i32, i32* %ldptr0, align 4
29+
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
30+
store i32 %value0, i32* %stptr0, align 4
31+
32+
; root instruction 1
33+
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
34+
%value1 = load i32, i32* %ldptr1, align 4
35+
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
36+
store i32 %value1, i32* %stptr1, align 4
37+
38+
; loop-increment
39+
%iv.next = add nuw nsw i64 %iv, 1
40+
41+
; latch
42+
%exitcond = icmp eq i64 %iv.next, 5
43+
br i1 %exitcond, label %exit, label %loop
44+
45+
exit:
46+
ret void
47+
}
48+
49+
define void @unrerollable1([2 x i32]* nocapture %a) {
50+
entry:
51+
br label %loop
52+
53+
loop:
54+
55+
; CHECK-LABEL: loop:
56+
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
57+
; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
58+
; CHECK-NEXT: store i32 999, i32* %stptrx, align 4
59+
60+
; base instruction
61+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
62+
63+
; unrerollable instructions using %iv
64+
%stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0
65+
store i32 999, i32* %stptrx, align 4
66+
67+
; extra simple arithmetic operations, used by root instructions
68+
%plus20 = add nuw nsw i64 %iv, 20
69+
%plus10 = add nuw nsw i64 %iv, 10
70+
71+
; root instruction 0
72+
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
73+
%value0 = load i32, i32* %ldptr0, align 4
74+
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
75+
store i32 %value0, i32* %stptr0, align 4
76+
77+
; root instruction 1
78+
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
79+
%value1 = load i32, i32* %ldptr1, align 4
80+
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
81+
store i32 %value1, i32* %stptr1, align 4
82+
83+
; loop-increment
84+
%iv.next = add nuw nsw i64 %iv, 1
85+
86+
; latch
87+
%exitcond = icmp eq i64 %iv.next, 5
88+
br i1 %exitcond, label %exit, label %loop
89+
90+
exit:
91+
ret void
92+
}
93+
94+
define void @unrerollable2([2 x i32]* nocapture %a) {
95+
entry:
96+
br label %loop
97+
98+
loop:
99+
100+
; CHECK-LABEL: loop:
101+
; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
102+
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
103+
; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
104+
; CHECK-NEXT: store i32 999, i32* %stptrx, align 4
105+
106+
; base instruction
107+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
108+
109+
; loop-increment
110+
%iv.next = add nuw nsw i64 %iv, 1
111+
112+
; unrerollable instructions using %iv.next
113+
%stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv.next, i64 0
114+
store i32 999, i32* %stptrx, align 4
115+
116+
; extra simple arithmetic operations, used by root instructions
117+
%plus20 = add nuw nsw i64 %iv, 20
118+
%plus10 = add nuw nsw i64 %iv, 10
119+
120+
; root instruction 0
121+
%ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0
122+
%value0 = load i32, i32* %ldptr0, align 4
123+
%stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0
124+
store i32 %value0, i32* %stptr0, align 4
125+
126+
; root instruction 1
127+
%ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1
128+
%value1 = load i32, i32* %ldptr1, align 4
129+
%stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1
130+
store i32 %value1, i32* %stptr1, align 4
131+
132+
; latch
133+
%exitcond = icmp eq i64 %iv.next, 5
134+
br i1 %exitcond, label %exit, label %loop
135+
136+
exit:
137+
ret void
138+
}
139+
140+
define dso_local void @rerollable2() {
141+
entry:
142+
br label %loop
143+
144+
loop:
145+
146+
; CHECK-LABEL: loop:
147+
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
148+
; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
149+
; CHECK-NEXT: {{%.*}} = add i32 %iv, {{20|24}}
150+
151+
; induction variable
152+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
153+
154+
; scale instruction
155+
%iv.mul3 = mul nuw nsw i32 %iv, 3
156+
157+
; extra simple arithmetic operations, used by root instructions
158+
%iv.scaled = add nuw nsw i32 %iv.mul3, 20
159+
160+
; NO unrerollable instructions
161+
162+
; root set 1
163+
164+
; base instruction
165+
%iv.scaled.div5 = udiv i32 %iv.scaled, 5
166+
tail call void @bar(i32 %iv.scaled.div5)
167+
; root instruction 0
168+
%iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1
169+
%iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5
170+
tail call void @bar(i32 %iv.scaled.add1.div5)
171+
; root instruction 2
172+
%iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2
173+
%iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5
174+
tail call void @bar(i32 %iv.scaled.add2.div5)
175+
176+
; root set 2
177+
178+
; base instruction
179+
%iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4
180+
%iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5
181+
tail call void @bar(i32 %iv.scaled.add4.div5)
182+
; root instruction 0
183+
%iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5
184+
%iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5
185+
tail call void @bar(i32 %iv.scaled.add5.div5)
186+
; root instruction 2
187+
%iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6
188+
%iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5
189+
tail call void @bar(i32 %iv.scaled.add6.div5)
190+
191+
; loop-increment
192+
%iv.next = add nuw nsw i32 %iv, 1
193+
194+
; latch
195+
%cmp = icmp ult i32 %iv.next, 3
196+
br i1 %cmp, label %loop, label %exit
197+
198+
exit:
199+
ret void
200+
}
201+
202+
define dso_local void @unrerollable3() {
203+
entry:
204+
br label %loop
205+
206+
loop:
207+
208+
; CHECK-LABEL: loop:
209+
; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
210+
; CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3
211+
; CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20
212+
; CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7
213+
; CHECK-NEXT: tail call void @bar(i32 %iv.mul7)
214+
215+
; induction variable
216+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
217+
218+
; scale instruction
219+
%iv.mul3 = mul nuw nsw i32 %iv, 3
220+
221+
; extra simple arithmetic operations, used by root instructions
222+
%iv.scaled = add nuw nsw i32 %iv.mul3, 20
223+
224+
; unrerollable instructions using %iv
225+
%iv.mul7 = mul nuw nsw i32 %iv, 7
226+
tail call void @bar(i32 %iv.mul7)
227+
228+
; root set 1
229+
230+
; base instruction
231+
%iv.scaled.div5 = udiv i32 %iv.scaled, 5
232+
tail call void @bar(i32 %iv.scaled.div5)
233+
; root instruction 0
234+
%iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1
235+
%iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5
236+
tail call void @bar(i32 %iv.scaled.add1.div5)
237+
; root instruction 2
238+
%iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2
239+
%iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5
240+
tail call void @bar(i32 %iv.scaled.add2.div5)
241+
242+
; root set 2
243+
244+
; base instruction
245+
%iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4
246+
%iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5
247+
tail call void @bar(i32 %iv.scaled.add4.div5)
248+
; root instruction 0
249+
%iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5
250+
%iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5
251+
tail call void @bar(i32 %iv.scaled.add5.div5)
252+
; root instruction 2
253+
%iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6
254+
%iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5
255+
tail call void @bar(i32 %iv.scaled.add6.div5)
256+
257+
; loop-increment
258+
%iv.next = add nuw nsw i32 %iv, 1
259+
260+
; latch
261+
%cmp = icmp ult i32 %iv.next, 3
262+
br i1 %cmp, label %loop, label %exit
263+
264+
exit:
265+
ret void
266+
}
267+
268+
declare dso_local void @bar(i32)

0 commit comments

Comments
 (0)