1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
12; RUN: llc -O3 < %s | FileCheck %s
23target datalayout = "E-p:256:256-i256:256:256-S32-a:256:256"
34target triple = "eravm"
@@ -6,114 +7,123 @@ declare void @llvm.memcpy.p0i256.p0i256.i256(i256 addrspace(0)* noalias nocaptur
67declare void @llvm.memcpy.p1i256.p1i256.i256 (i256 addrspace (1 )* noalias nocapture writeonly , i256 addrspace (1 )* noalias nocapture readonly , i256 , i1 immarg)
78declare void @llvm.memcpy.p2i256.p2i256.i256 (i256 addrspace (2 )* noalias nocapture writeonly , i256 addrspace (2 )* noalias nocapture readonly , i256 , i1 immarg)
89
9-
10- ; CHECK-LABEL: huge-copysize0
11- define fastcc void @huge-copysize0 (i256 addrspace (0 )* %dest , i256 addrspace (0 )* %src ) {
12- ; CHECK: .BB0_1:
13- ; CHECK: shr.s 5, r2, [[SHIFTED_OFFSET0_SRC:r[0-9]+]]
14- ; CHECK: add stack[[[SHIFTED_OFFSET0_SRC]]], r0, [[LOADED_VALUE0:r[0-9]+]]
15- ; CHECK: shr.s 5, r1, [[SHIFTED_OFFSET0_DST:r[0-9]+]]
16- ; CHECK: add [[LOADED_VALUE0]], r0, stack[[[SHIFTED_OFFSET0_DST]]]
17- ; CHECK: add 32, r1, r1
18- ; CHECK: sub! r1, r3, r4
19- ; CHECK: jump.ne @.BB0_1
20- ; CHECK: ret
10+ define fastcc void @huge_copysize0 (i256 addrspace (0 )* %dest , i256 addrspace (0 )* %src ) {
11+ ; CHECK-LABEL: huge_copysize0:
12+ ; CHECK: ; %bb.0:
13+ ; CHECK-NEXT: add @CPI0_0[0], r1, r3
14+ ; CHECK-NEXT: .BB0_1: ; %load-store-loop
15+ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
16+ ; CHECK-NEXT: shr.s 5, r2, r4
17+ ; CHECK-NEXT: add stack[r4], r0, r4
18+ ; CHECK-NEXT: shr.s 5, r1, r5
19+ ; CHECK-NEXT: add r4, r0, stack[r5]
20+ ; CHECK-NEXT: add 32, r2, r2
21+ ; CHECK-NEXT: add 32, r1, r1
22+ ; CHECK-NEXT: sub! r1, r3, r4
23+ ; CHECK-NEXT: jump.ne @.BB0_1
24+ ; CHECK-NEXT: ; %bb.2: ; %memcpy-split
25+ ; CHECK-NEXT: ret
2126 call void @llvm.memcpy.p0i256.p0i256.i256 (i256 addrspace (0 )* %dest , i256 addrspace (0 )* %src , i256 81129638414606681695789005144064 , i1 false )
2227 ret void
2328}
2429
25- ; CHECK-LABEL: huge-copysize1
26- define fastcc void @huge-copysize1 (i256 addrspace (1 )* %dest , i256 addrspace (1 )* %src ) {
27- ; CHECK: add @CPI1_0[0], r1, [[LCOND:r[0-9]+]]
28- ; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
29- ; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
30- ; CHECK:.BB1_1:
31- ; CHECK: ld.1.inc [[LDBASE]], [[LDVAL:r[0-9]+]], [[LDBASE]]
32- ; CHECK: st.1.inc [[STBASE]], [[LDVAL]], [[STBASE]]
33- ; CHECK: sub! [[STBASE]], [[LCOND]], r{{[0-9]+}}
34- ; CHECK: jump.ne @.BB1_1
35-
36- ; trailing part:
37- ; CHECK: add @CPI1_0[0], r1, r1
38- ; CHECK: ld.1 r1, [[TRAILING_PART1:r[0-9]+]]
39- ; CHECK: and @CPI1_1[0], [[TRAILING_DST1:r[0-9]+]], [[TRAILING_DST1]]
40- ; CHECK: add @CPI1_0[0], r2, r2
41- ; CHECK: ld.1 r2, [[TRAILING_SRC1:r[0-9]+]]
42- ; CHECK: and @CPI1_2[0], [[TRAILING_SRC1]], [[TRAILING_SRC1]]
43- ; CHECK: or [[TRAILING_SRC1]], [[TRAILING_DST1]], [[MERGED1:r[0-9]+]]
44- ; CHECK: st.1 r1, [[MERGED1:r[0-9]+]]
45- ; CHECK: ret
46-
47- ; the test explicitly has some trailing part to be copied.
30+ define fastcc void @huge_copysize1 (i256 addrspace (1 )* %dest , i256 addrspace (1 )* %src ) {
31+ ; CHECK-LABEL: huge_copysize1:
32+ ; CHECK: ; %bb.0:
33+ ; CHECK-NEXT: add @CPI1_0[0], r1, r3
34+ ; CHECK-NEXT: add r2, r0, r4
35+ ; CHECK-NEXT: add r1, r0, r5
36+ ; CHECK-NEXT: .BB1_1: ; %load-store-loop
37+ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
38+ ; CHECK-NEXT: ld.1.inc r4, r6, r4
39+ ; CHECK-NEXT: st.1.inc r5, r6, r5
40+ ; CHECK-NEXT: sub! r5, r3, r6
41+ ; CHECK-NEXT: jump.ne @.BB1_1
42+ ; CHECK-NEXT: ; %bb.2: ; %memcpy-split
43+ ; CHECK-NEXT: add @CPI1_0[0], r1, r1
44+ ; CHECK-NEXT: ld.1 r1, r3
45+ ; CHECK-NEXT: and @CPI1_1[0], r3, r3
46+ ; CHECK-NEXT: add @CPI1_0[0], r2, r2
47+ ; CHECK-NEXT: ld.1 r2, r2
48+ ; CHECK-NEXT: and @CPI1_2[0], r2, r2
49+ ; CHECK-NEXT: or r2, r3, r2
50+ ; CHECK-NEXT: st.1 r1, r2
51+ ; CHECK-NEXT: ret
4852 call void @llvm.memcpy.p1i256.p1i256.i256 (i256 addrspace (1 )* %dest , i256 addrspace (1 )* %src , i256 81129638414606681695789005144065 , i1 false )
4953 ret void
5054}
5155
52- ; CHECK-LABEL: huge-copysize2
53- define fastcc void @huge-copysize2 (i256 addrspace (2 )* %dest , i256 addrspace (2 )* %src ) {
54- ; CHECK: add @CPI2_0[0], r1, [[LCOND:r[0-9]+]]
55- ; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
56- ; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
57- ; CHECK:.BB2_1:
58- ; CHECK: ld.2.inc [[LDBASE]], [[LDVAL:r[0-9]+]], [[LDBASE]]
59- ; CHECK: st.2.inc [[STBASE]], [[LDVAL]], [[STBASE]]
60- ; CHECK: sub! [[STBASE]], [[LCOND]], r{{[0-9]+}}
61- ; CHECK: jump.ne @.BB2_1
62-
63- ; trailing part:
64- ; CHECK: add @CPI2_0[0], r1, r1
65- ; CHECK: ld.2 r1, [[TRAILING_PART2:r[0-9]+]]
66- ; CHECK: and @CPI2_1[0], [[TRAILING_DST2:r[0-9]+]], [[TRAILING_DST2]]
67- ; CHECK: add @CPI2_0[0], r2, r2
68- ; CHECK: ld.2 r2, [[TRAILING_SRC2:r[0-9]+]]
69- ; CHECK: and @CPI2_2[0], [[TRAILING_SRC2]], [[TRAILING_SRC2]]
70- ; CHECK: or [[TRAILING_SRC2]], [[TRAILING_DST2]], [[MERGED2:r[0-9]+]]
71- ; CHECK: st.2 r1, [[MERGED2:r[0-9]+]]
72- ; CHECK: ret
73-
74- ; the test explicitly has some trailing part to be copied.
56+ define fastcc void @huge_copysize2 (i256 addrspace (2 )* %dest , i256 addrspace (2 )* %src ) {
57+ ; CHECK-LABEL: huge_copysize2:
58+ ; CHECK: ; %bb.0:
59+ ; CHECK-NEXT: add @CPI2_0[0], r1, r3
60+ ; CHECK-NEXT: add r2, r0, r4
61+ ; CHECK-NEXT: add r1, r0, r5
62+ ; CHECK-NEXT: .BB2_1: ; %load-store-loop
63+ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
64+ ; CHECK-NEXT: ld.2.inc r4, r6, r4
65+ ; CHECK-NEXT: st.2.inc r5, r6, r5
66+ ; CHECK-NEXT: sub! r5, r3, r6
67+ ; CHECK-NEXT: jump.ne @.BB2_1
68+ ; CHECK-NEXT: ; %bb.2: ; %memcpy-split
69+ ; CHECK-NEXT: add @CPI2_0[0], r1, r1
70+ ; CHECK-NEXT: ld.2 r1, r3
71+ ; CHECK-NEXT: and @CPI2_1[0], r3, r3
72+ ; CHECK-NEXT: add @CPI2_0[0], r2, r2
73+ ; CHECK-NEXT: ld.2 r2, r2
74+ ; CHECK-NEXT: and @CPI2_2[0], r2, r2
75+ ; CHECK-NEXT: or r2, r3, r2
76+ ; CHECK-NEXT: st.2 r1, r2
77+ ; CHECK-NEXT: ret
7578 call void @llvm.memcpy.p2i256.p2i256.i256 (i256 addrspace (2 )* %dest , i256 addrspace (2 )* %src , i256 81129638414606681695789005144065 , i1 false )
7679 ret void
7780}
7881
79- ; CHECK-LABEL: normal-known-size
80- define fastcc void @normal-known-size (i256* %dest , i256* %src ) {
81- ; CHECK: add 1024, r1, [[LCOND:r[3-9]+]]
82- ; CHECK: .BB3_1:
83- ; CHECK: shr.s 5, [[LOAD_SHIFT_AMMOUNT:r[0-9]+]], [[SHIFTED_OFFSET3_SRC:r[3-9]+]]
84- ; CHECK: add stack[[[SHIFTED_OFFSET3_SRC]]], r0, [[LOADED_VALUE3:r[3-9]+]]
85- ; CHECK: shr.s 5, [[STORE_SHIFT_AMMOUNT:r[0-9]+]], [[SHIFTED_OFFSET3_DST:r[0-9]+]]
86- ; CHECK: add [[LOADED_VALUE3]], r0, stack[[[SHIFTED_OFFSET3_DST]]]
87- ; CHECK: add 32, [[LOAD_SHIFT_AMMOUNT]], [[LOAD_SHIFT_AMMOUNT]]
88- ; CHECK: add 32, r1, r1
89- ; CHECK: sub! r1, [[LCOND]], r{{[0-9]+}}
90- ; CHECK: jump.ne @.BB3_1
91- ; CHECK: ret
82+ define fastcc void @normal_known_size (i256* %dest , i256* %src ) {
83+ ; CHECK-LABEL: normal_known_size:
84+ ; CHECK: ; %bb.0:
85+ ; CHECK-NEXT: add 1024, r1, r3
86+ ; CHECK-NEXT: .BB3_1: ; %load-store-loop
87+ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
88+ ; CHECK-NEXT: shr.s 5, r2, r4
89+ ; CHECK-NEXT: add stack[r4], r0, r4
90+ ; CHECK-NEXT: shr.s 5, r1, r5
91+ ; CHECK-NEXT: add r4, r0, stack[r5]
92+ ; CHECK-NEXT: add 32, r2, r2
93+ ; CHECK-NEXT: add 32, r1, r1
94+ ; CHECK-NEXT: sub! r1, r3, r4
95+ ; CHECK-NEXT: jump.ne @.BB3_1
96+ ; CHECK-NEXT: ; %bb.2: ; %memcpy-split
97+ ; CHECK-NEXT: ret
9298 call void @llvm.memcpy.p0i256.p0i256.i256 (i256* %dest , i256* %src , i256 1024 , i1 false )
9399 ret void
94100}
95101
96- ; CHECK-LABEL: normal-known-size-2
97- define fastcc void @normal-known-size-2 (i256* %dest , i256* %src ) {
98- ; CHECK: add 1056, r1, [[LCOND:r[0-9]+]]
99- ; CHECK: add r2, r0, [[LDBASE:r[0-9]+]]
100- ; CHECK: add r1, r0, [[STBASE:r[0-9]+]]
101- ; CHECK: .BB4_1:
102- ; CHECK: shr.s 5, [[SHIFT_COUNT_SRC:r[0-9]+]], [[SHIFTED_OFFSET4_SRC:r[3-9]+]]
103- ; CHECK: add stack[[[SHIFTED_OFFSET4_SRC]]], r0, [[LOADED_VALUE4:r[3-9]+]]
104- ; CHECK: shr.s 5, [[SHIFT_COUNT_DST:r[0-9]+]], [[SHIFTED_OFFSET4_DST:r[0-9]+]]
105- ; CHECK: add [[LOADED_VALUE4]], r0, stack[[[SHIFTED_OFFSET4_DST]]]
106- ; CHECK: add 32, [[LDBASE]], [[LDBASE]]
107- ; CHECK: add 32, [[STBASE]], [[STBASE]]
108- ; CHECK: sub! [[STBASE]], [[LCOND]], r{{[0-9]+}}
109- ; CHECK: jump.ne @.BB4_1
110- ; CHECK: add @CPI4_0[0], r0, [[SRCMASK4:r[0-9]+]]
111- ; CHECK: shr.s 5, r2, r2
112- ; CHECK: and stack[33 + r2], [[SRCMASK4]], [[SRCMASKED_VALUE4:r[0-9]+]]
113- ; CHECK: add @CPI4_1[0], r0, [[DSTMASK4:r[0-9]+]]
114- ; CHECK: shr.s 5, r1, r1
115- ; CHECK: and stack[33 + r1], [[DSTMASK4]], [[DSTMASKED_VALUE4:r[0-9]+]]
116- ; CHECK: or [[SRCMASKED_VALUE4]], [[DSTMASKED_VALUE4]], stack[33 + r1]
102+ define fastcc void @normal_known_size_2 (i256* %dest , i256* %src ) {
103+ ; CHECK-LABEL: normal_known_size_2:
104+ ; CHECK: ; %bb.0:
105+ ; CHECK-NEXT: add 1056, r1, r3
106+ ; CHECK-NEXT: add r2, r0, r4
107+ ; CHECK-NEXT: add r1, r0, r5
108+ ; CHECK-NEXT: .BB4_1: ; %load-store-loop
109+ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
110+ ; CHECK-NEXT: shr.s 5, r4, r6
111+ ; CHECK-NEXT: add stack[r6], r0, r6
112+ ; CHECK-NEXT: shr.s 5, r5, r7
113+ ; CHECK-NEXT: add r6, r0, stack[r7]
114+ ; CHECK-NEXT: add 32, r4, r4
115+ ; CHECK-NEXT: add 32, r5, r5
116+ ; CHECK-NEXT: sub! r5, r3, r6
117+ ; CHECK-NEXT: jump.ne @.BB4_1
118+ ; CHECK-NEXT: ; %bb.2: ; %memcpy-split
119+ ; CHECK-NEXT: add @CPI4_0[0], r0, r3
120+ ; CHECK-NEXT: shr.s 5, r2, r2
121+ ; CHECK-NEXT: and stack[33 + r2], r3, r2
122+ ; CHECK-NEXT: add @CPI4_1[0], r0, r3
123+ ; CHECK-NEXT: shr.s 5, r1, r1
124+ ; CHECK-NEXT: and stack[33 + r1], r3, r3
125+ ; CHECK-NEXT: or r2, r3, stack[33 + r1]
126+ ; CHECK-NEXT: ret
117127 call void @llvm.memcpy.p0i256.p0i256.i256 (i256* %dest , i256* %src , i256 1060 , i1 false )
118128 ret void
119129}
0 commit comments