Skip to content

Commit f8e9723

Browse files
authored
ARM: Enable terminal rule (llvm#165958)
1 parent 2fc2e1f commit f8e9723

File tree

14 files changed

+302
-309
lines changed

14 files changed

+302
-309
lines changed

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
377377
bool isRWPI() const;
378378

379379
bool useMachineScheduler() const { return UseMISched; }
380+
bool enableTerminalRule() const override { return true; }
380381
bool useMachinePipeliner() const { return UseMIPipeliner; }
381382
bool hasMinSize() const { return OptMinSize; }
382383
bool isThumb1Only() const { return isThumb() && !hasThumb2(); }

llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,22 @@ define dso_local i32 @test_500_504(ptr nocapture readonly %x) {
77
; CHECK-NEXT: .save {r7, lr}
88
; CHECK-NEXT: push {r7, lr}
99
; CHECK-NEXT: mov.w lr, #126
10-
; CHECK-NEXT: adr r2, .LCPI0_0
11-
; CHECK-NEXT: vldrw.u32 q0, [r2]
12-
; CHECK-NEXT: mov.w r2, #500
13-
; CHECK-NEXT: vdup.32 q1, r2
14-
; CHECK-NEXT: movs r1, #0
10+
; CHECK-NEXT: adr r1, .LCPI0_0
11+
; CHECK-NEXT: vldrw.u32 q0, [r1]
12+
; CHECK-NEXT: mov.w r1, #500
13+
; CHECK-NEXT: mov.w r12, #0
14+
; CHECK-NEXT: vdup.32 q1, r1
1515
; CHECK-NEXT: movs r2, #0
1616
; CHECK-NEXT: .LBB0_1: @ %vector.body
1717
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
18-
; CHECK-NEXT: vqadd.u32 q2, q0, r1
19-
; CHECK-NEXT: adds r1, #4
18+
; CHECK-NEXT: vqadd.u32 q2, q0, r2
19+
; CHECK-NEXT: adds r2, #4
2020
; CHECK-NEXT: vptt.u32 hi, q1, q2
2121
; CHECK-NEXT: vldrwt.u32 q2, [r0], #16
22-
; CHECK-NEXT: vaddvat.u32 r2, q2
22+
; CHECK-NEXT: vaddvat.u32 r12, q2
2323
; CHECK-NEXT: le lr, .LBB0_1
2424
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
25-
; CHECK-NEXT: mov r0, r2
25+
; CHECK-NEXT: mov r0, r12
2626
; CHECK-NEXT: pop {r7, pc}
2727
; CHECK-NEXT: .p2align 4
2828
; CHECK-NEXT: @ %bb.3:

llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,29 +28,29 @@ define void @arm_min_q31(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocap
2828
; CHECK-NEXT: str r6, [sp] @ 4-byte Spill
2929
; CHECK-NEXT: subs r7, #4
3030
; CHECK-NEXT: movs r6, #1
31-
; CHECK-NEXT: mov.w r8, #0
3231
; CHECK-NEXT: mov.w r10, #0
32+
; CHECK-NEXT: mov.w r8, #0
3333
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
3434
; CHECK-NEXT: .LBB0_5: @ %while.body
3535
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
3636
; CHECK-NEXT: ldr r11, [r0, #16]!
37-
; CHECK-NEXT: ldrd r5, r7, [r0, #-12]
37+
; CHECK-NEXT: ldrd r5, r6, [r0, #-12]
3838
; CHECK-NEXT: ldr r4, [r0, #-4]
3939
; CHECK-NEXT: cmp r12, r5
4040
; CHECK-NEXT: csel r5, r5, r12, gt
41-
; CHECK-NEXT: csinc r6, r10, r8, le
42-
; CHECK-NEXT: cmp r5, r7
41+
; CHECK-NEXT: csinc r7, r10, r8, le
42+
; CHECK-NEXT: cmp r5, r6
4343
; CHECK-NEXT: it gt
44-
; CHECK-NEXT: addgt.w r6, r8, #2
45-
; CHECK-NEXT: csel r7, r7, r5, gt
46-
; CHECK-NEXT: cmp r7, r4
44+
; CHECK-NEXT: addgt.w r7, r8, #2
45+
; CHECK-NEXT: csel r6, r6, r5, gt
46+
; CHECK-NEXT: cmp r6, r4
4747
; CHECK-NEXT: it gt
48-
; CHECK-NEXT: addgt.w r6, r8, #3
49-
; CHECK-NEXT: csel r7, r4, r7, gt
48+
; CHECK-NEXT: addgt.w r7, r8, #3
49+
; CHECK-NEXT: csel r6, r4, r6, gt
5050
; CHECK-NEXT: add.w r8, r8, #4
51-
; CHECK-NEXT: cmp r7, r11
52-
; CHECK-NEXT: csel r10, r8, r6, gt
53-
; CHECK-NEXT: csel r12, r11, r7, gt
51+
; CHECK-NEXT: cmp r6, r11
52+
; CHECK-NEXT: csel r10, r8, r7, gt
53+
; CHECK-NEXT: csel r12, r11, r6, gt
5454
; CHECK-NEXT: le lr, .LBB0_5
5555
; CHECK-NEXT: @ %bb.6: @ %while.end.loopexit.unr-lcssa.loopexit
5656
; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload

llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
2121
; ENABLED-NEXT: it lt
2222
; ENABLED-NEXT: bxlt lr
2323
; ENABLED-NEXT: .LBB0_1: @ %for.body.lr.ph
24-
; ENABLED-NEXT: push.w {r4, r5, r6, r7, r9, r10, r11, lr}
24+
; ENABLED-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
2525
; ENABLED-NEXT: mov r11, r0
26-
; ENABLED-NEXT: ldr r0, [sp, #32]
26+
; ENABLED-NEXT: ldr r0, [sp, #36]
2727
; ENABLED-NEXT: add.w r9, r2, #3
2828
; ENABLED-NEXT: mov.w r12, #0
29+
; ENABLED-NEXT: mov.w r8, #1
2930
; ENABLED-NEXT: mov r10, r11
3031
; ENABLED-NEXT: uxth r0, r0
3132
; ENABLED-NEXT: rsbs r5, r0, #0
@@ -49,18 +50,16 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
4950
; ENABLED-NEXT: @ %bb.5: @ %vector.ph
5051
; ENABLED-NEXT: @ in Loop: Header=BB0_4 Depth=1
5152
; ENABLED-NEXT: bic r0, r9, #3
52-
; ENABLED-NEXT: movs r7, #1
53-
; ENABLED-NEXT: subs r0, #4
5453
; ENABLED-NEXT: sub.w r4, r2, r12
54+
; ENABLED-NEXT: subs r0, #4
5555
; ENABLED-NEXT: vmov.i32 q1, #0x0
56-
; ENABLED-NEXT: add.w r6, r7, r0, lsr #2
56+
; ENABLED-NEXT: mov r7, r10
57+
; ENABLED-NEXT: add.w r6, r8, r0, lsr #2
5758
; ENABLED-NEXT: adds r0, r2, #3
5859
; ENABLED-NEXT: sub.w r0, r0, r12
5960
; ENABLED-NEXT: bic r0, r0, #3
6061
; ENABLED-NEXT: subs r0, #4
61-
; ENABLED-NEXT: add.w r0, r7, r0, lsr #2
62-
; ENABLED-NEXT: mov r7, r10
63-
; ENABLED-NEXT: dls lr, r0
62+
; ENABLED-NEXT: add.w lr, r8, r0, lsr #2
6463
; ENABLED-NEXT: mov r0, r11
6564
; ENABLED-NEXT: .LBB0_6: @ %vector.body
6665
; ENABLED-NEXT: @ Parent Loop BB0_4 Depth=1
@@ -83,7 +82,7 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
8382
; ENABLED-NEXT: vaddv.u32 r0, q0
8483
; ENABLED-NEXT: b .LBB0_3
8584
; ENABLED-NEXT: .LBB0_8:
86-
; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r9, r10, r11, lr}
85+
; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
8786
; ENABLED-NEXT: bx lr
8887
;
8988
; NOREDUCTIONS-LABEL: varying_outer_2d_reduction:
@@ -92,11 +91,12 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
9291
; NOREDUCTIONS-NEXT: it lt
9392
; NOREDUCTIONS-NEXT: bxlt lr
9493
; NOREDUCTIONS-NEXT: .LBB0_1: @ %for.body.lr.ph
95-
; NOREDUCTIONS-NEXT: push.w {r4, r5, r6, r7, r9, r10, r11, lr}
94+
; NOREDUCTIONS-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
9695
; NOREDUCTIONS-NEXT: mov r11, r0
97-
; NOREDUCTIONS-NEXT: ldr r0, [sp, #32]
96+
; NOREDUCTIONS-NEXT: ldr r0, [sp, #36]
9897
; NOREDUCTIONS-NEXT: add.w r9, r2, #3
9998
; NOREDUCTIONS-NEXT: mov.w r12, #0
99+
; NOREDUCTIONS-NEXT: mov.w r8, #1
100100
; NOREDUCTIONS-NEXT: mov r10, r11
101101
; NOREDUCTIONS-NEXT: uxth r0, r0
102102
; NOREDUCTIONS-NEXT: rsbs r5, r0, #0
@@ -120,18 +120,16 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
120120
; NOREDUCTIONS-NEXT: @ %bb.5: @ %vector.ph
121121
; NOREDUCTIONS-NEXT: @ in Loop: Header=BB0_4 Depth=1
122122
; NOREDUCTIONS-NEXT: bic r0, r9, #3
123-
; NOREDUCTIONS-NEXT: movs r7, #1
124-
; NOREDUCTIONS-NEXT: subs r0, #4
125123
; NOREDUCTIONS-NEXT: sub.w r4, r2, r12
124+
; NOREDUCTIONS-NEXT: subs r0, #4
126125
; NOREDUCTIONS-NEXT: vmov.i32 q1, #0x0
127-
; NOREDUCTIONS-NEXT: add.w r6, r7, r0, lsr #2
126+
; NOREDUCTIONS-NEXT: mov r7, r10
127+
; NOREDUCTIONS-NEXT: add.w r6, r8, r0, lsr #2
128128
; NOREDUCTIONS-NEXT: adds r0, r2, #3
129129
; NOREDUCTIONS-NEXT: sub.w r0, r0, r12
130130
; NOREDUCTIONS-NEXT: bic r0, r0, #3
131131
; NOREDUCTIONS-NEXT: subs r0, #4
132-
; NOREDUCTIONS-NEXT: add.w r0, r7, r0, lsr #2
133-
; NOREDUCTIONS-NEXT: mov r7, r10
134-
; NOREDUCTIONS-NEXT: dls lr, r0
132+
; NOREDUCTIONS-NEXT: add.w lr, r8, r0, lsr #2
135133
; NOREDUCTIONS-NEXT: mov r0, r11
136134
; NOREDUCTIONS-NEXT: .LBB0_6: @ %vector.body
137135
; NOREDUCTIONS-NEXT: @ Parent Loop BB0_4 Depth=1
@@ -154,7 +152,7 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
154152
; NOREDUCTIONS-NEXT: vaddv.u32 r0, q0
155153
; NOREDUCTIONS-NEXT: b .LBB0_3
156154
; NOREDUCTIONS-NEXT: .LBB0_8:
157-
; NOREDUCTIONS-NEXT: pop.w {r4, r5, r6, r7, r9, r10, r11, lr}
155+
; NOREDUCTIONS-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
158156
; NOREDUCTIONS-NEXT: bx lr
159157
entry:
160158
%conv = sext i16 %N to i32

llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -165,74 +165,73 @@ define dso_local i32 @b(ptr %c, i32 %d, i32 %e, ptr %n) "frame-pointer"="all" {
165165
; CHECK-NEXT: sub sp, #16
166166
; CHECK-NEXT: wls lr, r1, .LBB2_3
167167
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
168-
; CHECK-NEXT: adds r6, r3, #4
169-
; CHECK-NEXT: adds r1, r0, #4
168+
; CHECK-NEXT: add.w r9, r3, #4
169+
; CHECK-NEXT: add.w r10, r0, #4
170170
; CHECK-NEXT: mvn r8, #1
171-
; CHECK-NEXT: @ implicit-def: $r9
171+
; CHECK-NEXT: @ implicit-def: $r6
172172
; CHECK-NEXT: @ implicit-def: $r4
173173
; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
174174
; CHECK-NEXT: .LBB2_2: @ %while.body
175175
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
176-
; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
176+
; CHECK-NEXT: ldr.w r1, [r10]
177177
; CHECK-NEXT: asrs r2, r4, #31
178-
; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
179-
; CHECK-NEXT: ldr r1, [r1]
178+
; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill
180179
; CHECK-NEXT: muls r1, r3, r1
181180
; CHECK-NEXT: adds r4, r4, r1
182181
; CHECK-NEXT: adc.w r1, r2, r1, asr #31
183182
; CHECK-NEXT: adds.w r2, r4, #-2147483648
184-
; CHECK-NEXT: ldrd r2, r4, [r8]
185-
; CHECK-NEXT: adc r5, r1, #0
186-
; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
187-
; CHECK-NEXT: smull r4, r2, r4, r9
188-
; CHECK-NEXT: asrs r1, r5, #31
183+
; CHECK-NEXT: ldrd r5, r4, [r8]
184+
; CHECK-NEXT: adc r2, r1, #0
189185
; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
190-
; CHECK-NEXT: subs r4, r5, r4
191-
; CHECK-NEXT: sbcs r1, r2
192-
; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
193-
; CHECK-NEXT: adds.w r10, r4, #-2147483648
194-
; CHECK-NEXT: adc r1, r1, #0
195-
; CHECK-NEXT: ldr r4, [r2, #-4]
186+
; CHECK-NEXT: smull r4, r5, r4, r6
187+
; CHECK-NEXT: asrs r1, r2, #31
188+
; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
189+
; CHECK-NEXT: subs r4, r2, r4
190+
; CHECK-NEXT: sbcs r1, r5
191+
; CHECK-NEXT: adds.w r6, r4, #-2147483648
192+
; CHECK-NEXT: ldr r4, [r10, #-4]
193+
; CHECK-NEXT: adc r11, r1, #0
194+
; CHECK-NEXT: mov r1, r9
195+
; CHECK-NEXT: add.w r10, r10, #4
196196
; CHECK-NEXT: muls r4, r3, r4
197197
; CHECK-NEXT: adds r3, #4
198198
; CHECK-NEXT: adds.w r12, r4, #-2147483648
199199
; CHECK-NEXT: asr.w r5, r4, #31
200-
; CHECK-NEXT: ldr r4, [r6]
200+
; CHECK-NEXT: ldr.w r4, [r9]
201201
; CHECK-NEXT: adc r5, r5, #0
202202
; CHECK-NEXT: mul r2, r4, r0
203-
; CHECK-NEXT: adds r0, #4
204203
; CHECK-NEXT: add.w r2, r2, #-2147483648
205204
; CHECK-NEXT: asrl r12, r5, r2
206-
; CHECK-NEXT: smull r2, r5, r4, r12
207-
; CHECK-NEXT: lsll r2, r5, #30
208-
; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
209-
; CHECK-NEXT: asr.w r11, r5, #31
210-
; CHECK-NEXT: mov r12, r5
211-
; CHECK-NEXT: lsll r12, r11, r4
212-
; CHECK-NEXT: mul r2, r2, r9
213-
; CHECK-NEXT: lsrl r12, r11, #2
214-
; CHECK-NEXT: adds r2, #2
215-
; CHECK-NEXT: lsll r12, r11, r2
205+
; CHECK-NEXT: smull r2, r9, r4, r12
206+
; CHECK-NEXT: mov r12, r0
207+
; CHECK-NEXT: lsll r2, r9, #30
208+
; CHECK-NEXT: asr.w r5, r9, #31
209+
; CHECK-NEXT: mov r2, r9
210+
; CHECK-NEXT: mov r9, r1
211+
; CHECK-NEXT: ldrd r1, r0, [sp, #4] @ 8-byte Folded Reload
212+
; CHECK-NEXT: lsll r2, r5, r4
213+
; CHECK-NEXT: lsrl r2, r5, #2
214+
; CHECK-NEXT: muls r0, r1, r0
215+
; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
216+
; CHECK-NEXT: adds r0, #2
217+
; CHECK-NEXT: lsll r2, r5, r0
218+
; CHECK-NEXT: add.w r0, r2, #-2147483648
216219
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
217-
; CHECK-NEXT: add.w r5, r12, #-2147483648
218-
; CHECK-NEXT: asrl r10, r1, r5
219-
; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
220-
; CHECK-NEXT: lsrl r10, r1, #2
221-
; CHECK-NEXT: movs r1, #2
222-
; CHECK-NEXT: mov r9, r10
223-
; CHECK-NEXT: str.w r10, [r1]
224-
; CHECK-NEXT: ldr r1, [r8], #-4
225-
; CHECK-NEXT: mls r5, r1, r4, r5
226-
; CHECK-NEXT: adds.w r4, r5, #-2147483648
227-
; CHECK-NEXT: asr.w r1, r5, #31
220+
; CHECK-NEXT: asrl r6, r11, r0
221+
; CHECK-NEXT: movs r0, #2
222+
; CHECK-NEXT: lsrl r6, r11, #2
223+
; CHECK-NEXT: str r6, [r0]
224+
; CHECK-NEXT: ldr r0, [r8], #-4
225+
; CHECK-NEXT: mls r0, r0, r4, r1
226+
; CHECK-NEXT: adds.w r4, r0, #-2147483648
227+
; CHECK-NEXT: asr.w r1, r0, #31
228228
; CHECK-NEXT: adc r1, r1, #0
229229
; CHECK-NEXT: lsrl r4, r1, #2
230-
; CHECK-NEXT: rsbs r1, r4, #0
231-
; CHECK-NEXT: str r1, [r2]
232-
; CHECK-NEXT: str r1, [r6, #-4]
233-
; CHECK-NEXT: adds r6, #4
234-
; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
235-
; CHECK-NEXT: adds r1, #4
230+
; CHECK-NEXT: rsbs r0, r4, #0
231+
; CHECK-NEXT: str r0, [r2]
232+
; CHECK-NEXT: str r0, [r9, #-4]
233+
; CHECK-NEXT: add.w r9, r9, #4
234+
; CHECK-NEXT: add.w r0, r12, #4
236235
; CHECK-NEXT: le lr, .LBB2_2
237236
; CHECK-NEXT: .LBB2_3: @ %while.end
238237
; CHECK-NEXT: add sp, #16

0 commit comments

Comments
 (0)