Skip to content

Commit c83ec5b

Browse files
committed
[EraVM] Enable main induction variable removal in LSR
This patch makes `shouldFoldTerminatingConditionAfterLSR` return `true` for EraVM target. Thus LSR will try to replace main induction variable of a loop with one of secondary IVs. Thus it reduces the number of instructions in a loop by eliminating main IV increment. This transformation increases register pressure, but it's rarely the problem for EraVM. PR: #599, Issue: #580.
1 parent 094f9f5 commit c83ec5b

File tree

5 files changed

+66
-73
lines changed

5 files changed

+66
-73
lines changed

llvm/lib/Target/EraVM/EraVMTargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ class EraVMTTIImpl final : public BasicTTIImplBase<EraVMTTIImpl> {
138138
// Since we prefer inlining, use larger threshold multiplier.
139139
unsigned getInliningThresholdMultiplier() const { return 11; }
140140

141+
bool shouldFoldTerminatingConditionAfterLSR() const { return true; }
142+
141143
/// @}
142144
};
143145

llvm/test/CodeGen/EraVM/indexed-memops.ll

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,16 @@ target triple = "eravm"
1010
define void @loop1(i256 addrspace(1)* %dest, i256 addrspace(1)* %src, i256 %size) {
1111
; CHECK-LABEL: loop1:
1212
; CHECK: ; %bb.0:
13-
; CHECK-NEXT: add r0, r0, r4
13+
; CHECK-NEXT: sub.s! 1, r3, r4
14+
; CHECK-NEXT: add.le 1, r0, r3
15+
; CHECK-NEXT: shl.s 5, r3, r3
16+
; CHECK-NEXT: add r1, r3, r3
1417
; CHECK-NEXT: .BB0_1: ; %load-store-loop
1518
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
16-
; CHECK-NEXT: ld.1.inc r2, r5, r2
17-
; CHECK-NEXT: st.1.inc r1, r5, r1
18-
; CHECK-NEXT: add 1, r4, r4
19-
; CHECK-NEXT: sub! r4, r3, r5
20-
; CHECK-NEXT: jump.lt @.BB0_1
19+
; CHECK-NEXT: ld.1.inc r2, r4, r2
20+
; CHECK-NEXT: st.1.inc r1, r4, r1
21+
; CHECK-NEXT: sub! r1, r3, r4
22+
; CHECK-NEXT: jump.ne @.BB0_1
2123
; CHECK-NEXT: ; %bb.2: ; %memcpy-split
2224
; CHECK-NEXT: ret
2325

@@ -40,16 +42,18 @@ memcpy-split: ; preds = %load-store-loop
4042
define void @loop2(i256 addrspace(1)* %dest, i256 addrspace(1)* %src, i256 %size) {
4143
; CHECK-LABEL: loop2:
4244
; CHECK: ; %bb.0: ; %entry
43-
; CHECK-NEXT: add 10, r0, r4
45+
; CHECK-NEXT: sub.s! 11, r3, r4
46+
; CHECK-NEXT: add.le 11, r0, r3
47+
; CHECK-NEXT: shl.s 5, r3, r3
48+
; CHECK-NEXT: add r1, r3, r3
4449
; CHECK-NEXT: add 320, r1, r1
4550
; CHECK-NEXT: add 320, r2, r2
4651
; CHECK-NEXT: .BB1_1: ; %load-store-loop
4752
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
48-
; CHECK-NEXT: ld.1.inc r2, r5, r2
49-
; CHECK-NEXT: st.1.inc r1, r5, r1
50-
; CHECK-NEXT: add 1, r4, r4
51-
; CHECK-NEXT: sub! r4, r3, r5
52-
; CHECK-NEXT: jump.lt @.BB1_1
53+
; CHECK-NEXT: ld.1.inc r2, r4, r2
54+
; CHECK-NEXT: st.1.inc r1, r4, r1
55+
; CHECK-NEXT: sub! r1, r3, r4
56+
; CHECK-NEXT: jump.ne @.BB1_1
5357
; CHECK-NEXT: ; %bb.2: ; %memcpy-split
5458
; CHECK-NEXT: ret
5559

@@ -172,14 +176,16 @@ memcpy-split: ; preds = %load-store-loop
172176
define void @loop6(i256 addrspace(1)* %dest, i256 addrspace(3)* %src, i256 %size) {
173177
; CHECK-LABEL: loop6:
174178
; CHECK: ; %bb.0:
175-
; CHECK-NEXT: add r0, r0, r4
179+
; CHECK-NEXT: sub.s! 1, r3, r4
180+
; CHECK-NEXT: add.le 1, r0, r3
181+
; CHECK-NEXT: shl.s 5, r3, r3
182+
; CHECK-NEXT: add r1, r3, r3
176183
; CHECK-NEXT: .BB5_1: ; %load-store-loop
177184
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
178-
; CHECK-NEXT: ld.inc r2, r5, r2
179-
; CHECK-NEXT: st.1.inc r1, r5, r1
180-
; CHECK-NEXT: add 1, r4, r4
181-
; CHECK-NEXT: sub! r4, r3, r5
182-
; CHECK-NEXT: jump.lt @.BB5_1
185+
; CHECK-NEXT: ld.inc r2, r4, r2
186+
; CHECK-NEXT: st.1.inc r1, r4, r1
187+
; CHECK-NEXT: sub! r1, r3, r4
188+
; CHECK-NEXT: jump.ne @.BB5_1
183189
; CHECK-NEXT: ; %bb.2: ; %memcpy-split
184190
; CHECK-NEXT: ret
185191
br label %load-store-loop

llvm/test/CodeGen/EraVM/memcpy-expansion.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,14 @@ define fastcc void @expand-unknown(i256 addrspace(1)* %dest, i256 addrspace(3)*
5959
; CHECK-INSTRS-LABEL: expand-unknown-instrs
6060
define fastcc void @expand-unknown-instrs(i256 addrspace(1)* %dest, i256 addrspace(1)* %src, i256 %size) {
6161
; Preheader and loop.
62-
; CHECK-INSTRS: add r0, r0, r5
62+
; CHECK-INSTRS: add r1, r3, r5
6363
; CHECK-INSTRS-NEXT: add r2, r0, r6
6464
; CHECK-INSTRS-NEXT: add r1, r0, r7
6565
; CHECK-INSTRS-NEXT: .BB2_2:
6666
; CHECK-INSTRS: ld.1.inc r6, r8, r6
6767
; CHECK-INSTRS-NEXT: st.1.inc r7, r8, r7
68-
; CHECK-INSTRS-NEXT: add 1, r5, r5
69-
; CHECK-INSTRS-NEXT: sub! r5, r3, r8
70-
; CHECK-INSTRS-NEXT: jump.lt @.BB2_2
68+
; CHECK-INSTRS-NEXT: sub! r7, r5, r8
69+
; CHECK-INSTRS-NEXT: jump.ne @.BB2_2
7170
call void @llvm.memcpy.p1i256.p1i256.i256(i256 addrspace(1)* %dest, i256 addrspace(1)* %src, i256 %size, i1 false)
7271
ret void
7372
}

llvm/test/CodeGen/EraVM/memintrinsics.ll

Lines changed: 30 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -9,39 +9,37 @@ declare void @llvm.memcpy.p2i256.p2i256.i256(i256 addrspace(2)* noalias nocaptur
99

1010
; CHECK-LABEL: huge-copysize0
1111
define fastcc void @huge-copysize0(i256 addrspace(0)* %dest, i256 addrspace(0)* %src) {
12-
; CHECK: add r0, r0, [[INDEX0:r[0-9]+]]
1312
; CHECK: .BB0_1:
1413
; CHECK: shr.s 5, r2, [[SHIFTED_OFFSET0_SRC:r[0-9]+]]
1514
; CHECK: add stack[[[SHIFTED_OFFSET0_SRC]]], r0, [[LOADED_VALUE0:r[0-9]+]]
1615
; CHECK: shr.s 5, r1, [[SHIFTED_OFFSET0_DST:r[0-9]+]]
1716
; CHECK: add [[LOADED_VALUE0]], r0, stack[[[SHIFTED_OFFSET0_DST]]]
18-
; CHECK: add 1, [[INDEX0]], [[INDEX0]]
19-
; CHECK: sub.s! @CPI0_0[0], [[INDEX0]], r4
20-
; CHECK: jump.lt @.BB0_1
17+
; CHECK: add 32, r1, r1
18+
; CHECK: sub! r1, r3, r4
19+
; CHECK: jump.ne @.BB0_1
2120
; CHECK: ret
2221
call void @llvm.memcpy.p0i256.p0i256.i256(i256 addrspace(0)* %dest, i256 addrspace(0)* %src, i256 81129638414606681695789005144064, i1 false)
2322
ret void
2423
}
2524

2625
; CHECK-LABEL: huge-copysize1
2726
define fastcc void @huge-copysize1(i256 addrspace(1)* %dest, i256 addrspace(1)* %src) {
28-
; CHECK: add r0, r0, [[INDEX1:r[0-9]+]]
27+
; CHECK: add @CPI1_0[0], r1, [[LCOND:r[0-9]+]]
2928
; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
3029
; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
3130
; CHECK:.BB1_1:
3231
; CHECK: ld.1.inc [[LDBASE]], [[LDVAL:r[0-9]+]], [[LDBASE]]
3332
; CHECK: st.1.inc [[STBASE]], [[LDVAL]], [[STBASE]]
34-
; CHECK: add 1, [[INDEX1]], [[INDEX1]]
35-
; CHECK: sub.s! @CPI1_0[0], [[INDEX1]], r{{[0-9]+}}
36-
; CHECK: jump.lt @.BB1_1
33+
; CHECK: sub! [[STBASE]], [[LCOND]], r{{[0-9]+}}
34+
; CHECK: jump.ne @.BB1_1
3735

3836
; trailing part:
39-
; CHECK: add @CPI1_1[0], r1, r1
37+
; CHECK: add @CPI1_0[0], r1, r1
4038
; CHECK: ld.1 r1, [[TRAILING_PART1:r[0-9]+]]
41-
; CHECK: and @CPI1_2[0], [[TRAILING_DST1:r[0-9]+]], [[TRAILING_DST1]]
42-
; CHECK: add @CPI1_1[0], r2, r2
39+
; CHECK: and @CPI1_1[0], [[TRAILING_DST1:r[0-9]+]], [[TRAILING_DST1]]
40+
; CHECK: add @CPI1_0[0], r2, r2
4341
; CHECK: ld.1 r2, [[TRAILING_SRC1:r[0-9]+]]
44-
; CHECK: and @CPI1_3[0], [[TRAILING_SRC1]], [[TRAILING_SRC1]]
42+
; CHECK: and @CPI1_2[0], [[TRAILING_SRC1]], [[TRAILING_SRC1]]
4543
; CHECK: or [[TRAILING_SRC1]], [[TRAILING_DST1]], [[MERGED1:r[0-9]+]]
4644
; CHECK: st.1 r1, [[MERGED1:r[0-9]+]]
4745
; CHECK: ret
@@ -53,23 +51,22 @@ define fastcc void @huge-copysize1(i256 addrspace(1)* %dest, i256 addrspace(1)*
5351

5452
; CHECK-LABEL: huge-copysize2
5553
define fastcc void @huge-copysize2(i256 addrspace(2)* %dest, i256 addrspace(2)* %src) {
56-
; CHECK: add r0, r0, [[INDEX2:r[0-9]+]]
54+
; CHECK: add @CPI2_0[0], r1, [[LCOND:r[0-9]+]]
5755
; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
5856
; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
5957
; CHECK:.BB2_1:
6058
; CHECK: ld.2.inc [[LDBASE]], [[LDVAL:r[0-9]+]], [[LDBASE]]
6159
; CHECK: st.2.inc [[STBASE]], [[LDVAL]], [[STBASE]]
62-
; CHECK: add 1, [[INDEX2]], [[INDEX2]]
63-
; CHECK: sub.s! @CPI2_0[0], [[INDEX2]], r{{[0-9]+}}
64-
; CHECK: jump.lt @.BB2_1
60+
; CHECK: sub! [[STBASE]], [[LCOND]], r{{[0-9]+}}
61+
; CHECK: jump.ne @.BB2_1
6562

6663
; trailing part:
67-
; CHECK: add @CPI2_1[0], r1, r1
64+
; CHECK: add @CPI2_0[0], r1, r1
6865
; CHECK: ld.2 r1, [[TRAILING_PART2:r[0-9]+]]
69-
; CHECK: and @CPI2_2[0], [[TRAILING_DST2:r[0-9]+]], [[TRAILING_DST2]]
70-
; CHECK: add @CPI2_1[0], r2, r2
66+
; CHECK: and @CPI2_1[0], [[TRAILING_DST2:r[0-9]+]], [[TRAILING_DST2]]
67+
; CHECK: add @CPI2_0[0], r2, r2
7168
; CHECK: ld.2 r2, [[TRAILING_SRC2:r[0-9]+]]
72-
; CHECK: and @CPI2_3[0], [[TRAILING_SRC2]], [[TRAILING_SRC2]]
69+
; CHECK: and @CPI2_2[0], [[TRAILING_SRC2]], [[TRAILING_SRC2]]
7370
; CHECK: or [[TRAILING_SRC2]], [[TRAILING_DST2]], [[MERGED2:r[0-9]+]]
7471
; CHECK: st.2 r1, [[MERGED2:r[0-9]+]]
7572
; CHECK: ret
@@ -81,37 +78,35 @@ define fastcc void @huge-copysize2(i256 addrspace(2)* %dest, i256 addrspace(2)*
8178

8279
; CHECK-LABEL: normal-known-size
8380
define fastcc void @normal-known-size(i256* %dest, i256* %src) {
84-
; CHECK: add r0, r0, [[INDEX3:r[3-9]+]]
81+
; CHECK: add 1024, r1, [[LCOND:r[3-9]+]]
8582
; CHECK: .BB3_1:
8683
; CHECK: shr.s 5, [[LOAD_SHIFT_AMMOUNT:r[0-9]+]], [[SHIFTED_OFFSET3_SRC:r[3-9]+]]
8784
; CHECK: add stack[[[SHIFTED_OFFSET3_SRC]]], r0, [[LOADED_VALUE3:r[3-9]+]]
8885
; CHECK: shr.s 5, [[STORE_SHIFT_AMMOUNT:r[0-9]+]], [[SHIFTED_OFFSET3_DST:r[0-9]+]]
8986
; CHECK: add [[LOADED_VALUE3]], r0, stack[[[SHIFTED_OFFSET3_DST]]]
90-
; CHECK: add 32, [[STORE_SHIFT_AMMOUNT]], [[STORE_SHIFT_AMMOUNT]]
9187
; CHECK: add 32, [[LOAD_SHIFT_AMMOUNT]], [[LOAD_SHIFT_AMMOUNT]]
92-
; CHECK: add 1, [[INDEX3]], [[INDEX3]]
93-
; CHECK: sub.s! 32, [[INDEX3]], r{{[0-9]+}}
94-
; CHECK: jump.lt @.BB3_1
88+
; CHECK: add 32, r1, r1
89+
; CHECK: sub! r1, [[LCOND]], r{{[0-9]+}}
90+
; CHECK: jump.ne @.BB3_1
9591
; CHECK: ret
9692
call void @llvm.memcpy.p0i256.p0i256.i256(i256* %dest, i256* %src, i256 1024, i1 false)
9793
ret void
9894
}
9995

10096
; CHECK-LABEL: normal-known-size-2
10197
define fastcc void @normal-known-size-2(i256* %dest, i256* %src) {
102-
; CHECK: add r0, r0, [[INDEX4:r[3-9]+]]
103-
; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
104-
; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
98+
; CHECK: add 1056, r1, [[LCOND:r[0-9]+]]
99+
; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
100+
; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
105101
; CHECK: .BB4_1:
106102
; CHECK: shr.s 5, [[SHIFT_COUNT_SRC:r[0-9]+]], [[SHIFTED_OFFSET4_SRC:r[3-9]+]]
107103
; CHECK: add stack[[[SHIFTED_OFFSET4_SRC]]], r0, [[LOADED_VALUE4:r[3-9]+]]
108104
; CHECK: shr.s 5, [[SHIFT_COUNT_DST:r[0-9]+]], [[SHIFTED_OFFSET4_DST:r[0-9]+]]
109105
; CHECK: add [[LOADED_VALUE4]], r0, stack[[[SHIFTED_OFFSET4_DST]]]
110-
; CHECK: add 32, [[SHIFT_COUNT_DST]], [[SHIFT_COUNT_DST]]
111-
; CHECK: add 32, [[SHIFT_COUNT_SRC]], [[SHIFT_COUNT_SRC]]
112-
; CHECK: add 1, [[INDEX4]], [[INDEX4]]
113-
; CHECK: sub.s! 33, [[INDEX4]], r{{[0-9]+}}
114-
; CHECK: jump.lt @.BB4_1
106+
; CHECK: add 32, [[LDBASE]], [[LDBASE]]
107+
; CHECK: add 32, [[STBASE]], [[STBASE]]
108+
; CHECK: sub! [[STBASE]], [[LCOND]], r{{[0-9]+}}
109+
; CHECK: jump.ne @.BB4_1
115110
; CHECK: add @CPI4_0[0], r0, [[SRCMASK4:r[0-9]+]]
116111
; CHECK: shr.s 5, r2, r2
117112
; CHECK: and stack[33 + r2], [[SRCMASK4]], [[SRCMASKED_VALUE4:r[0-9]+]]
@@ -123,17 +118,10 @@ define fastcc void @normal-known-size-2(i256* %dest, i256* %src) {
123118
ret void
124119
}
125120

126-
; check that the big size copy has correct number of iterations (size / 32)
127-
; CHECK: CPI0_0:
128-
; CHECK: CPI1_0:
129-
; CHECK: CPI2_0:
130-
; CHECK: .cell 2535301200456458802993406410752
131-
132-
133121
; check that in the trailing part, the mask is correct
134122
; CHECK: CPI1_1:
135123
; CHECK: CPI2_1:
136124
; CHECK: .cell 452312848583266388373324160190187140051835877600158453279131187530910662655
137-
; CHECK: CPI1_3:
138-
; CHECK: CPI2_3:
125+
; CHECK: CPI1_2:
126+
; CHECK: CPI2_2:
139127
; CHECK: .cell -452312848583266388373324160190187140051835877600158453279131187530910662656

llvm/test/CodeGen/EraVM/memmove-expansion.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,14 @@ define i256 @test_unknown(ptr addrspace(1) %dst, ptr addrspace(1) %src, i256 %si
8989
; CHECK-INSTRS-NEXT: jump.ne @.BB0_4
9090

9191
; Preheader and forward loop.
92-
; CHECK-INSTRS: add r0, r0, r5
92+
; CHECK-INSTRS: add r5, r1, r5
93+
; CHECK-INSTRS-NEXT: add 32, r5, r5
9394
; CHECK-INSTRS-NEXT: add r2, r0, r6
9495
; CHECK-INSTRS-NEXT: add r1, r0, r7
9596
; CHECK-INSTRS-NEXT: .BB0_8:
9697
; CHECK-INSTRS: ld.1.inc r6, r8, r6
9798
; CHECK-INSTRS-NEXT: st.1.inc r7, r8, r7
98-
; CHECK-INSTRS-NEXT: add 32, r5, r5
99-
; CHECK-INSTRS-NEXT: sub! r5, r4, r8
99+
; CHECK-INSTRS-NEXT: sub! r7, r5, r8
100100
; CHECK-INSTRS-NEXT: jump.ne @.BB0_8
101101

102102
; Copy forward residual and residual.
@@ -127,13 +127,11 @@ define i256 @test_known_forward() {
127127
; CHECK-INSTRS-LABEL: test_known_forward:
128128
; CHECK-INSTRS: add 10, r0, r1
129129
; CHECK-INSTRS-NEXT: add 100, r0, r2
130-
; CHECK-INSTRS-NEXT: add r0, r0, r3
131130
; CHECK-INSTRS-NEXT: .BB1_1:
132-
; CHECK-INSTRS: ld.1.inc r2, r4, r2
133-
; CHECK-INSTRS-NEXT: st.1.inc r1, r4, r1
134-
; CHECK-INSTRS-NEXT: add 1, r3, r3
135-
; CHECK-INSTRS-NEXT: sub.s! 2, r3, r4
136-
; CHECK-INSTRS-NEXT: jump.lt @.BB1_1
131+
; CHECK-INSTRS: ld.1.inc r2, r3, r2
132+
; CHECK-INSTRS-NEXT: st.1.inc r1, r3, r1
133+
; CHECK-INSTRS-NEXT: sub.s! 74, r1, r3
134+
; CHECK-INSTRS-NEXT: jump.ne @.BB1_1
137135
; CHECK-INSTRS: ld.1 164, r2
138136
; CHECK-INSTRS-NEXT: and @CPI1_0[0], r2, r1
139137
; CHECK-INSTRS-NEXT: ld.1 74, r2

0 commit comments

Comments
 (0)