@@ -9,39 +9,37 @@ declare void @llvm.memcpy.p2i256.p2i256.i256(i256 addrspace(2)* noalias nocaptur
99
1010; CHECK-LABEL: huge-copysize0
1111define fastcc void @huge-copysize0 (i256 addrspace (0 )* %dest , i256 addrspace (0 )* %src ) {
12- ; CHECK: add r0, r0, [[INDEX0:r[0-9]+]]
1312; CHECK: .BB0_1:
1413; CHECK: shr.s 5, r2, [[SHIFTED_OFFSET0_SRC:r[0-9]+]]
1514; CHECK: add stack[[[SHIFTED_OFFSET0_SRC]]], r0, [[LOADED_VALUE0:r[0-9]+]]
1615; CHECK: shr.s 5, r1, [[SHIFTED_OFFSET0_DST:r[0-9]+]]
1716; CHECK: add [[LOADED_VALUE0]], r0, stack[[[SHIFTED_OFFSET0_DST]]]
18- ; CHECK: add 1, [[INDEX0]], [[INDEX0]]
19- ; CHECK: sub.s ! @CPI0_0[0], [[INDEX0]] , r4
20- ; CHECK: jump.lt @.BB0_1
17+ ; CHECK: add 32, r1, r1
18+ ; CHECK: sub! r1, r3 , r4
19+ ; CHECK: jump.ne @.BB0_1
2120; CHECK: ret
2221 call void @llvm.memcpy.p0i256.p0i256.i256 (i256 addrspace (0 )* %dest , i256 addrspace (0 )* %src , i256 81129638414606681695789005144064 , i1 false )
2322 ret void
2423}
2524
2625; CHECK-LABEL: huge-copysize1
2726define fastcc void @huge-copysize1 (i256 addrspace (1 )* %dest , i256 addrspace (1 )* %src ) {
28- ; CHECK: add r0, r0 , [[INDEX1 :r[0-9]+]]
27+ ; CHECK: add @CPI1_0[0], r1 , [[LCOND :r[0-9]+]]
2928; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
3029; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
3130; CHECK:.BB1_1:
3231; CHECK: ld.1.inc [[LDBASE]], [[LDVAL:r[0-9]+]], [[LDBASE]]
3332; CHECK: st.1.inc [[STBASE]], [[LDVAL]], [[STBASE]]
34- ; CHECK: add 1, [[INDEX1]], [[INDEX1]]
35- ; CHECK: sub.s! @CPI1_0[0], [[INDEX1]], r{{[0-9]+}}
36- ; CHECK: jump.lt @.BB1_1
33+ ; CHECK: sub! [[STBASE]], [[LCOND]], r{{[0-9]+}}
34+ ; CHECK: jump.ne @.BB1_1
3735
3836; trailing part:
39- ; CHECK: add @CPI1_1 [0], r1, r1
37+ ; CHECK: add @CPI1_0 [0], r1, r1
4038; CHECK: ld.1 r1, [[TRAILING_PART1:r[0-9]+]]
41- ; CHECK: and @CPI1_2 [0], [[TRAILING_DST1:r[0-9]+]], [[TRAILING_DST1]]
42- ; CHECK: add @CPI1_1 [0], r2, r2
39+ ; CHECK: and @CPI1_1 [0], [[TRAILING_DST1:r[0-9]+]], [[TRAILING_DST1]]
40+ ; CHECK: add @CPI1_0 [0], r2, r2
4341; CHECK: ld.1 r2, [[TRAILING_SRC1:r[0-9]+]]
44- ; CHECK: and @CPI1_3 [0], [[TRAILING_SRC1]], [[TRAILING_SRC1]]
42+ ; CHECK: and @CPI1_2 [0], [[TRAILING_SRC1]], [[TRAILING_SRC1]]
4543; CHECK: or [[TRAILING_SRC1]], [[TRAILING_DST1]], [[MERGED1:r[0-9]+]]
4644; CHECK: st.1 r1, [[MERGED1:r[0-9]+]]
4745; CHECK: ret
@@ -53,23 +51,22 @@ define fastcc void @huge-copysize1(i256 addrspace(1)* %dest, i256 addrspace(1)*
5351
5452; CHECK-LABEL: huge-copysize2
5553define fastcc void @huge-copysize2 (i256 addrspace (2 )* %dest , i256 addrspace (2 )* %src ) {
56- ; CHECK: add r0, r0 , [[INDEX2 :r[0-9]+]]
54+ ; CHECK: add @CPI2_0[0], r1 , [[LCOND :r[0-9]+]]
5755; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
5856; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
5957; CHECK:.BB2_1:
6058; CHECK: ld.2.inc [[LDBASE]], [[LDVAL:r[0-9]+]], [[LDBASE]]
6159; CHECK: st.2.inc [[STBASE]], [[LDVAL]], [[STBASE]]
62- ; CHECK: add 1, [[INDEX2]], [[INDEX2]]
63- ; CHECK: sub.s! @CPI2_0[0], [[INDEX2]], r{{[0-9]+}}
64- ; CHECK: jump.lt @.BB2_1
60+ ; CHECK: sub! [[STBASE]], [[LCOND]], r{{[0-9]+}}
61+ ; CHECK: jump.ne @.BB2_1
6562
6663; trailing part:
67- ; CHECK: add @CPI2_1 [0], r1, r1
64+ ; CHECK: add @CPI2_0 [0], r1, r1
6865; CHECK: ld.2 r1, [[TRAILING_PART2:r[0-9]+]]
69- ; CHECK: and @CPI2_2 [0], [[TRAILING_DST2:r[0-9]+]], [[TRAILING_DST2]]
70- ; CHECK: add @CPI2_1 [0], r2, r2
66+ ; CHECK: and @CPI2_1 [0], [[TRAILING_DST2:r[0-9]+]], [[TRAILING_DST2]]
67+ ; CHECK: add @CPI2_0 [0], r2, r2
7168; CHECK: ld.2 r2, [[TRAILING_SRC2:r[0-9]+]]
72- ; CHECK: and @CPI2_3 [0], [[TRAILING_SRC2]], [[TRAILING_SRC2]]
69+ ; CHECK: and @CPI2_2 [0], [[TRAILING_SRC2]], [[TRAILING_SRC2]]
7370; CHECK: or [[TRAILING_SRC2]], [[TRAILING_DST2]], [[MERGED2:r[0-9]+]]
7471; CHECK: st.2 r1, [[MERGED2:r[0-9]+]]
7572; CHECK: ret
@@ -81,37 +78,35 @@ define fastcc void @huge-copysize2(i256 addrspace(2)* %dest, i256 addrspace(2)*
8178
8279; CHECK-LABEL: normal-known-size
8380define fastcc void @normal-known-size (i256* %dest , i256* %src ) {
84- ; CHECK: add r0, r0 , [[INDEX3 :r[3-9]+]]
81+ ; CHECK: add 1024, r1 , [[LCOND :r[3-9]+]]
8582; CHECK: .BB3_1:
8683; CHECK: shr.s 5, [[LOAD_SHIFT_AMMOUNT:r[0-9]+]], [[SHIFTED_OFFSET3_SRC:r[3-9]+]]
8784; CHECK: add stack[[[SHIFTED_OFFSET3_SRC]]], r0, [[LOADED_VALUE3:r[3-9]+]]
8885; CHECK: shr.s 5, [[STORE_SHIFT_AMMOUNT:r[0-9]+]], [[SHIFTED_OFFSET3_DST:r[0-9]+]]
8986; CHECK: add [[LOADED_VALUE3]], r0, stack[[[SHIFTED_OFFSET3_DST]]]
90- ; CHECK: add 32, [[STORE_SHIFT_AMMOUNT]], [[STORE_SHIFT_AMMOUNT]]
9187; CHECK: add 32, [[LOAD_SHIFT_AMMOUNT]], [[LOAD_SHIFT_AMMOUNT]]
92- ; CHECK: add 1, [[INDEX3]], [[INDEX3]]
93- ; CHECK: sub.s ! 32 , [[INDEX3 ]], r{{[0-9]+}}
94- ; CHECK: jump.lt @.BB3_1
88+ ; CHECK: add 32, r1, r1
89+ ; CHECK: sub! r1 , [[LCOND ]], r{{[0-9]+}}
90+ ; CHECK: jump.ne @.BB3_1
9591; CHECK: ret
9692 call void @llvm.memcpy.p0i256.p0i256.i256 (i256* %dest , i256* %src , i256 1024 , i1 false )
9793 ret void
9894}
9995
10096; CHECK-LABEL: normal-known-size-2
10197define fastcc void @normal-known-size-2 (i256* %dest , i256* %src ) {
102- ; CHECK: add r0, r0 , [[INDEX4 :r[3 -9]+]]
103- ; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
104- ; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
98+ ; CHECK: add 1056, r1 , [[LCOND :r[0 -9]+]]
99+ ; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
100+ ; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
105101; CHECK: .BB4_1:
106102; CHECK: shr.s 5, [[SHIFT_COUNT_SRC:r[0-9]+]], [[SHIFTED_OFFSET4_SRC:r[3-9]+]]
107103; CHECK: add stack[[[SHIFTED_OFFSET4_SRC]]], r0, [[LOADED_VALUE4:r[3-9]+]]
108104; CHECK: shr.s 5, [[SHIFT_COUNT_DST:r[0-9]+]], [[SHIFTED_OFFSET4_DST:r[0-9]+]]
109105; CHECK: add [[LOADED_VALUE4]], r0, stack[[[SHIFTED_OFFSET4_DST]]]
110- ; CHECK: add 32, [[SHIFT_COUNT_DST]], [[SHIFT_COUNT_DST]]
111- ; CHECK: add 32, [[SHIFT_COUNT_SRC]], [[SHIFT_COUNT_SRC]]
112- ; CHECK: add 1, [[INDEX4]], [[INDEX4]]
113- ; CHECK: sub.s! 33, [[INDEX4]], r{{[0-9]+}}
114- ; CHECK: jump.lt @.BB4_1
106+ ; CHECK: add 32, [[LDBASE]], [[LDBASE]]
107+ ; CHECK: add 32, [[STBASE]], [[STBASE]]
108+ ; CHECK: sub! [[STBASE]], [[LCOND]], r{{[0-9]+}}
109+ ; CHECK: jump.ne @.BB4_1
115110; CHECK: add @CPI4_0[0], r0, [[SRCMASK4:r[0-9]+]]
116111; CHECK: shr.s 5, r2, r2
117112; CHECK: and stack[33 + r2], [[SRCMASK4]], [[SRCMASKED_VALUE4:r[0-9]+]]
@@ -123,17 +118,10 @@ define fastcc void @normal-known-size-2(i256* %dest, i256* %src) {
123118 ret void
124119}
125120
126- ; check that the big size copy has correct number of iterations (size / 32)
127- ; CHECK: CPI0_0:
128- ; CHECK: CPI1_0:
129- ; CHECK: CPI2_0:
130- ; CHECK: .cell 2535301200456458802993406410752
131-
132-
133121; check that in the trailing part, the mask is correct
134122; CHECK: CPI1_1:
135123; CHECK: CPI2_1:
136124; CHECK: .cell 452312848583266388373324160190187140051835877600158453279131187530910662655
137- ; CHECK: CPI1_3 :
138- ; CHECK: CPI2_3 :
125+ ; CHECK: CPI1_2 :
126+ ; CHECK: CPI2_2 :
139127; CHECK: .cell -452312848583266388373324160190187140051835877600158453279131187530910662656
0 commit comments