1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12; RUN: llc -mtriple=arm64_32-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s
23
34define <2 x double > @test_insert_elt (<2 x double > %vec , double %val ) {
45; CHECK-LABEL: test_insert_elt:
5- ; CHECK: mov.d v0[0], v1[0]
6+ ; CHECK: ; %bb.0:
7+ ; CHECK-NEXT: ; kill: def $d1 killed $d1 def $q1
8+ ; CHECK-NEXT: mov.d v0[0], v1[0]
9+ ; CHECK-NEXT: ret
610 %res = insertelement <2 x double > %vec , double %val , i32 0
711 ret <2 x double > %res
812}
913
1014define void @test_split_16B (<4 x float > %val , ptr %addr ) {
1115; CHECK-LABEL: test_split_16B:
12- ; CHECK: str q0, [x0]
16+ ; CHECK: ; %bb.0:
17+ ; CHECK-NEXT: str q0, [x0]
18+ ; CHECK-NEXT: ret
1319 store <4 x float > %val , ptr %addr , align 8
1420 ret void
1521}
1622
1723define void @test_split_16B_splat (<4 x i32 >, ptr %addr ) {
1824; CHECK-LABEL: test_split_16B_splat:
19- ; CHECK: str {{q[0-9]+}}
20-
25+ ; CHECK: ; %bb.0:
26+ ; CHECK-NEXT: movi.4s v0, #42
27+ ; CHECK-NEXT: str q0, [x0]
28+ ; CHECK-NEXT: ret
2129 %vec.tmp0 = insertelement <4 x i32 > undef , i32 42 , i32 0
2230 %vec.tmp1 = insertelement <4 x i32 > %vec.tmp0 , i32 42 , i32 1
2331 %vec.tmp2 = insertelement <4 x i32 > %vec.tmp1 , i32 42 , i32 2
2432 %vec = insertelement <4 x i32 > %vec.tmp2 , i32 42 , i32 3
25-
2633 store <4 x i32 > %vec , ptr %addr , align 8
2734 ret void
2835}
@@ -33,166 +40,176 @@ define void @test_split_16B_splat(<4 x i32>, ptr %addr) {
3340declare {%vec , %vec } @llvm.aarch64.neon.ld2r.v2f64.p0 (ptr )
3441define {%vec , %vec } @test_neon_load (ptr %addr ) {
3542; CHECK-LABEL: test_neon_load:
36- ; CHECK: ld2r.2d { v0, v1 }, [x0]
43+ ; CHECK: ; %bb.0:
44+ ; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0]
45+ ; CHECK-NEXT: ret
3746 %res = call {%vec , %vec } @llvm.aarch64.neon.ld2r.v2f64.p0 (ptr %addr )
3847 ret {%vec , %vec } %res
3948}
4049
4150declare {%vec , %vec } @llvm.aarch64.neon.ld2lane.v2f64.p0 (%vec , %vec , i64 , ptr )
4251define {%vec , %vec } @test_neon_load_lane (ptr %addr , %vec %in1 , %vec %in2 ) {
4352; CHECK-LABEL: test_neon_load_lane:
44- ; CHECK: ld2.d { v0, v1 }[0], [x0]
53+ ; CHECK: ; %bb.0:
54+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
55+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
56+ ; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0]
57+ ; CHECK-NEXT: ret
4558 %res = call {%vec , %vec } @llvm.aarch64.neon.ld2lane.v2f64.p0 (%vec %in1 , %vec %in2 , i64 0 , ptr %addr )
4659 ret {%vec , %vec } %res
4760}
4861
4962declare void @llvm.aarch64.neon.st2.v2f64.p0 (%vec , %vec , ptr )
5063define void @test_neon_store (ptr %addr , %vec %in1 , %vec %in2 ) {
5164; CHECK-LABEL: test_neon_store:
52- ; CHECK: st2.2d { v0, v1 }, [x0]
65+ ; CHECK: ; %bb.0:
66+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
67+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
68+ ; CHECK-NEXT: st2.2d { v0, v1 }, [x0]
69+ ; CHECK-NEXT: ret
5370 call void @llvm.aarch64.neon.st2.v2f64.p0 (%vec %in1 , %vec %in2 , ptr %addr )
5471 ret void
5572}
5673
5774declare void @llvm.aarch64.neon.st2lane.v2f64.p0 (%vec , %vec , i64 , ptr )
5875define void @test_neon_store_lane (ptr %addr , %vec %in1 , %vec %in2 ) {
5976; CHECK-LABEL: test_neon_store_lane:
60- ; CHECK: st2.d { v0, v1 }[1], [x0]
77+ ; CHECK: ; %bb.0:
78+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
79+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
80+ ; CHECK-NEXT: st2.d { v0, v1 }[1], [x0]
81+ ; CHECK-NEXT: ret
6182 call void @llvm.aarch64.neon.st2lane.v2f64.p0 (%vec %in1 , %vec %in2 , i64 1 , ptr %addr )
6283 ret void
6384}
6485
6586declare {%vec , %vec } @llvm.aarch64.neon.ld2.v2f64.p0 (ptr )
6687define {{%vec , %vec }, ptr } @test_neon_load_post (ptr %addr , i32 %offset ) {
6788; CHECK-LABEL: test_neon_load_post:
68- ; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1
69- ; CHECK: ld2.2d { v0, v1 }, [x0], [[OFFSET]]
70-
89+ ; CHECK: ; %bb.0:
90+ ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
91+ ; CHECK-NEXT: sxtw x8, w1
92+ ; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], x8
93+ ; CHECK-NEXT: ret
7194 %vecs = call {%vec , %vec } @llvm.aarch64.neon.ld2.v2f64.p0 (ptr %addr )
72-
7395 %addr.new = getelementptr inbounds i8 , ptr %addr , i32 %offset
74-
7596 %res.tmp = insertvalue {{%vec , %vec }, ptr } undef , {%vec , %vec } %vecs , 0
7697 %res = insertvalue {{%vec , %vec }, ptr } %res.tmp , ptr %addr.new , 1
7798 ret {{%vec , %vec }, ptr } %res
7899}
79100
80101define {{%vec , %vec }, ptr } @test_neon_load_post_lane (ptr %addr , i32 %offset , %vec %in1 , %vec %in2 ) {
81102; CHECK-LABEL: test_neon_load_post_lane:
82- ; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1
83- ; CHECK: ld2.d { v0, v1 }[1], [x0], [[OFFSET]]
84-
103+ ; CHECK: ; %bb.0:
104+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
105+ ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
106+ ; CHECK-NEXT: sxtw x8, w1
107+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
108+ ; CHECK-NEXT: ld2.d { v0, v1 }[1], [x0], x8
109+ ; CHECK-NEXT: ret
85110 %vecs = call {%vec , %vec } @llvm.aarch64.neon.ld2lane.v2f64.p0 (%vec %in1 , %vec %in2 , i64 1 , ptr %addr )
86-
87111 %addr.new = getelementptr inbounds i8 , ptr %addr , i32 %offset
88-
89112 %res.tmp = insertvalue {{%vec , %vec }, ptr } undef , {%vec , %vec } %vecs , 0
90113 %res = insertvalue {{%vec , %vec }, ptr } %res.tmp , ptr %addr.new , 1
91114 ret {{%vec , %vec }, ptr } %res
92115}
93116
94117define ptr @test_neon_store_post (ptr %addr , i32 %offset , %vec %in1 , %vec %in2 ) {
95118; CHECK-LABEL: test_neon_store_post:
96- ; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1
97- ; CHECK: st2.2d { v0, v1 }, [x0], [[OFFSET]]
98-
119+ ; CHECK: ; %bb.0:
120+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
121+ ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
122+ ; CHECK-NEXT: sxtw x8, w1
123+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
124+ ; CHECK-NEXT: st2.2d { v0, v1 }, [x0], x8
125+ ; CHECK-NEXT: ret
99126 call void @llvm.aarch64.neon.st2.v2f64.p0 (%vec %in1 , %vec %in2 , ptr %addr )
100-
101127 %addr.new = getelementptr inbounds i8 , ptr %addr , i32 %offset
102-
103128 ret ptr %addr.new
104129}
105130
106131define ptr @test_neon_store_post_lane (ptr %addr , i32 %offset , %vec %in1 , %vec %in2 ) {
107132; CHECK-LABEL: test_neon_store_post_lane:
108- ; CHECK: sxtw [[OFFSET:x[0-9]+]], w1
109- ; CHECK: st2.d { v0, v1 }[0], [x0], [[OFFSET]]
110-
133+ ; CHECK: ; %bb.0:
134+ ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
135+ ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
136+ ; CHECK-NEXT: sxtw x8, w1
137+ ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
138+ ; CHECK-NEXT: st2.d { v0, v1 }[0], [x0], x8
139+ ; CHECK-NEXT: ret
111140 call void @llvm.aarch64.neon.st2lane.v2f64.p0 (%vec %in1 , %vec %in2 , i64 0 , ptr %addr )
112-
113141 %addr.new = getelementptr inbounds i8 , ptr %addr , i32 %offset
114-
115142 ret ptr %addr.new
116143}
117144
118145; ld1 is slightly different because it goes via ISelLowering of normal IR ops
119146; rather than an intrinsic.
120147define {%vec , ptr } @test_neon_ld1_post_lane (ptr %addr , i32 %offset , %vec %in ) {
121148; CHECK-LABEL: test_neon_ld1_post_lane:
122- ; CHECK: sbfiz [[OFFSET:x[0-9]+]], x1, #3, #32
123- ; CHECK: ld1.d { v0 }[0], [x0], [[OFFSET]]
124-
149+ ; CHECK: ; %bb.0:
150+ ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
151+ ; CHECK-NEXT: sbfiz x8, x1, #3, #32
152+ ; CHECK-NEXT: ld1.d { v0 }[0], [x0], x8
153+ ; CHECK-NEXT: ret
125154 %loaded = load double , ptr %addr , align 8
126155 %newvec = insertelement %vec %in , double %loaded , i32 0
127-
128156 %addr.new = getelementptr inbounds double , ptr %addr , i32 %offset
129-
130157 %res.tmp = insertvalue {%vec , ptr } undef , %vec %newvec , 0
131158 %res = insertvalue {%vec , ptr } %res.tmp , ptr %addr.new , 1
132-
133159 ret {%vec , ptr } %res
134160}
135161
136162define {{%vec , %vec }, ptr } @test_neon_load_post_exact (ptr %addr ) {
137163; CHECK-LABEL: test_neon_load_post_exact:
138- ; CHECK: ld2.2d { v0, v1 }, [x0], #32
139-
164+ ; CHECK: ; %bb.0:
165+ ; CHECK-NEXT: ld2.2d { v0, v1 }, [x0], #32
166+ ; CHECK-NEXT: ret
140167 %vecs = call {%vec , %vec } @llvm.aarch64.neon.ld2.v2f64.p0 (ptr %addr )
141-
142168 %addr.new = getelementptr inbounds i8 , ptr %addr , i32 32
143-
144169 %res.tmp = insertvalue {{%vec , %vec }, ptr } undef , {%vec , %vec } %vecs , 0
145170 %res = insertvalue {{%vec , %vec }, ptr } %res.tmp , ptr %addr.new , 1
146171 ret {{%vec , %vec }, ptr } %res
147172}
148173
149174define {%vec , ptr } @test_neon_ld1_post_lane_exact (ptr %addr , %vec %in ) {
150175; CHECK-LABEL: test_neon_ld1_post_lane_exact:
151- ; CHECK: ld1.d { v0 }[0], [x0], #8
152-
176+ ; CHECK: ; %bb.0:
177+ ; CHECK-NEXT: ld1.d { v0 }[0], [x0], #8
178+ ; CHECK-NEXT: ret
153179 %loaded = load double , ptr %addr , align 8
154180 %newvec = insertelement %vec %in , double %loaded , i32 0
155-
156181 %addr.new = getelementptr inbounds double , ptr %addr , i32 1
157-
158182 %res.tmp = insertvalue {%vec , ptr } undef , %vec %newvec , 0
159183 %res = insertvalue {%vec , ptr } %res.tmp , ptr %addr.new , 1
160-
161184 ret {%vec , ptr } %res
162185}
163186
164187; As in the general load/store case, this GEP has defined semantics when the
165188; address wraps. We cannot use post-indexed addressing.
166189define {%vec , ptr } @test_neon_ld1_notpost_lane_exact (ptr %addr , %vec %in ) {
167190; CHECK-LABEL: test_neon_ld1_notpost_lane_exact:
168- ; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], #8
169- ; CHECK: add w0, w0, #8
170- ; CHECK: ret
171-
191+ ; CHECK: ; %bb.0:
192+ ; CHECK-NEXT: ld1.d { v0 }[0], [x0]
193+ ; CHECK-NEXT: add w0, w0, #8
194+ ; CHECK-NEXT: ret
172195 %loaded = load double , ptr %addr , align 8
173196 %newvec = insertelement %vec %in , double %loaded , i32 0
174-
175197 %addr.new = getelementptr double , ptr %addr , i32 1
176-
177198 %res.tmp = insertvalue {%vec , ptr } undef , %vec %newvec , 0
178199 %res = insertvalue {%vec , ptr } %res.tmp , ptr %addr.new , 1
179-
180200 ret {%vec , ptr } %res
181201}
182202
183203define {%vec , ptr } @test_neon_ld1_notpost_lane (ptr %addr , i32 %offset , %vec %in ) {
184204; CHECK-LABEL: test_neon_ld1_notpost_lane:
185- ; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], {{x[0-9]+|sp}}
186- ; CHECK: add w0, w0, w1, lsl #3
187- ; CHECK: ret
188-
205+ ; CHECK: ; %bb.0:
206+ ; CHECK-NEXT: ld1.d { v0 }[0], [x0]
207+ ; CHECK-NEXT: add w0, w0, w1, lsl #3
208+ ; CHECK-NEXT: ret
189209 %loaded = load double , ptr %addr , align 8
190210 %newvec = insertelement %vec %in , double %loaded , i32 0
191-
192211 %addr.new = getelementptr double , ptr %addr , i32 %offset
193-
194212 %res.tmp = insertvalue {%vec , ptr } undef , %vec %newvec , 0
195213 %res = insertvalue {%vec , ptr } %res.tmp , ptr %addr.new , 1
196-
197214 ret {%vec , ptr } %res
198215}
0 commit comments