11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2- ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
2+ ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3+ ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
34
45define void @extract_32xi8 (ptr %src , ptr %dst ) nounwind {
5- ; CHECK-LABEL: extract_32xi8:
6- ; CHECK: # %bb.0:
7- ; CHECK-NEXT: xvld $xr0, $a0, 0
8- ; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 1
9- ; CHECK-NEXT: ret
6+ ; LA32-LABEL: extract_32xi8:
7+ ; LA32: # %bb.0:
8+ ; LA32-NEXT: xvld $xr0, $a0, 0
9+ ; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1
10+ ; LA32-NEXT: st.b $a0, $a1, 0
11+ ; LA32-NEXT: ret
12+ ;
13+ ; LA64-LABEL: extract_32xi8:
14+ ; LA64: # %bb.0:
15+ ; LA64-NEXT: xvld $xr0, $a0, 0
16+ ; LA64-NEXT: xvstelm.b $xr0, $a1, 0, 1
17+ ; LA64-NEXT: ret
1018 %v = load volatile <32 x i8 >, ptr %src
1119 %e = extractelement <32 x i8 > %v , i32 1
1220 store i8 %e , ptr %dst
1321 ret void
1422}
1523
1624define void @extract_16xi16 (ptr %src , ptr %dst ) nounwind {
17- ; CHECK-LABEL: extract_16xi16:
18- ; CHECK: # %bb.0:
19- ; CHECK-NEXT: xvld $xr0, $a0, 0
20- ; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 1
21- ; CHECK-NEXT: ret
25+ ; LA32-LABEL: extract_16xi16:
26+ ; LA32: # %bb.0:
27+ ; LA32-NEXT: xvld $xr0, $a0, 0
28+ ; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
29+ ; LA32-NEXT: st.h $a0, $a1, 0
30+ ; LA32-NEXT: ret
31+ ;
32+ ; LA64-LABEL: extract_16xi16:
33+ ; LA64: # %bb.0:
34+ ; LA64-NEXT: xvld $xr0, $a0, 0
35+ ; LA64-NEXT: xvstelm.h $xr0, $a1, 0, 1
36+ ; LA64-NEXT: ret
2237 %v = load volatile <16 x i16 >, ptr %src
2338 %e = extractelement <16 x i16 > %v , i32 1
2439 store i16 %e , ptr %dst
2540 ret void
2641}
2742
2843define void @extract_8xi32 (ptr %src , ptr %dst ) nounwind {
29- ; CHECK-LABEL: extract_8xi32:
30- ; CHECK: # %bb.0:
31- ; CHECK-NEXT: xvld $xr0, $a0, 0
32- ; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 1
33- ; CHECK-NEXT: ret
44+ ; LA32-LABEL: extract_8xi32:
45+ ; LA32: # %bb.0:
46+ ; LA32-NEXT: xvld $xr0, $a0, 0
47+ ; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
48+ ; LA32-NEXT: st.w $a0, $a1, 0
49+ ; LA32-NEXT: ret
50+ ;
51+ ; LA64-LABEL: extract_8xi32:
52+ ; LA64: # %bb.0:
53+ ; LA64-NEXT: xvld $xr0, $a0, 0
54+ ; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 1
55+ ; LA64-NEXT: ret
3456 %v = load volatile <8 x i32 >, ptr %src
3557 %e = extractelement <8 x i32 > %v , i32 1
3658 store i32 %e , ptr %dst
3759 ret void
3860}
3961
4062define void @extract_4xi64 (ptr %src , ptr %dst ) nounwind {
41- ; CHECK-LABEL: extract_4xi64:
42- ; CHECK: # %bb.0:
43- ; CHECK-NEXT: xvld $xr0, $a0, 0
44- ; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 1
45- ; CHECK-NEXT: ret
63+ ; LA32-LABEL: extract_4xi64:
64+ ; LA32: # %bb.0:
65+ ; LA32-NEXT: xvld $xr0, $a0, 0
66+ ; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 2
67+ ; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 3
68+ ; LA32-NEXT: st.w $a2, $a1, 4
69+ ; LA32-NEXT: st.w $a0, $a1, 0
70+ ; LA32-NEXT: ret
71+ ;
72+ ; LA64-LABEL: extract_4xi64:
73+ ; LA64: # %bb.0:
74+ ; LA64-NEXT: xvld $xr0, $a0, 0
75+ ; LA64-NEXT: xvstelm.d $xr0, $a1, 0, 1
76+ ; LA64-NEXT: ret
4677 %v = load volatile <4 x i64 >, ptr %src
4778 %e = extractelement <4 x i64 > %v , i32 1
4879 store i64 %e , ptr %dst
@@ -74,58 +105,102 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
74105}
75106
76107define void @extract_32xi8_idx (ptr %src , ptr %dst , i32 %idx ) nounwind {
77- ; CHECK-LABEL: extract_32xi8_idx:
78- ; CHECK: # %bb.0:
79- ; CHECK-NEXT: xvld $xr0, $a0, 0
80- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
81- ; CHECK-NEXT: movgr2fr.w $fa2, $a2
82- ; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
83- ; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
84- ; CHECK-NEXT: ret
108+ ; LA32-LABEL: extract_32xi8_idx:
109+ ; LA32: # %bb.0:
110+ ; LA32-NEXT: xvld $xr0, $a0, 0
111+ ; LA32-NEXT: movgr2fr.w $fa1, $a2
112+ ; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
113+ ; LA32-NEXT: xvshuf.b $xr0, $xr2, $xr0, $xr1
114+ ; LA32-NEXT: vpickve2gr.b $a0, $vr0, 0
115+ ; LA32-NEXT: st.b $a0, $a1, 0
116+ ; LA32-NEXT: ret
117+ ;
118+ ; LA64-LABEL: extract_32xi8_idx:
119+ ; LA64: # %bb.0:
120+ ; LA64-NEXT: xvld $xr0, $a0, 0
121+ ; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
122+ ; LA64-NEXT: movgr2fr.w $fa2, $a2
123+ ; LA64-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
124+ ; LA64-NEXT: xvstelm.b $xr0, $a1, 0, 0
125+ ; LA64-NEXT: ret
85126 %v = load volatile <32 x i8 >, ptr %src
86127 %e = extractelement <32 x i8 > %v , i32 %idx
87128 store i8 %e , ptr %dst
88129 ret void
89130}
90131
91132define void @extract_16xi16_idx (ptr %src , ptr %dst , i32 %idx ) nounwind {
92- ; CHECK-LABEL: extract_16xi16_idx:
93- ; CHECK: # %bb.0:
94- ; CHECK-NEXT: xvld $xr0, $a0, 0
95- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
96- ; CHECK-NEXT: movgr2fr.w $fa2, $a2
97- ; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
98- ; CHECK-NEXT: xvstelm.h $xr2, $a1, 0, 0
99- ; CHECK-NEXT: ret
133+ ; LA32-LABEL: extract_16xi16_idx:
134+ ; LA32: # %bb.0:
135+ ; LA32-NEXT: xvld $xr0, $a0, 0
136+ ; LA32-NEXT: movgr2fr.w $fa1, $a2
137+ ; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
138+ ; LA32-NEXT: xvshuf.h $xr1, $xr2, $xr0
139+ ; LA32-NEXT: vpickve2gr.h $a0, $vr1, 0
140+ ; LA32-NEXT: st.h $a0, $a1, 0
141+ ; LA32-NEXT: ret
142+ ;
143+ ; LA64-LABEL: extract_16xi16_idx:
144+ ; LA64: # %bb.0:
145+ ; LA64-NEXT: xvld $xr0, $a0, 0
146+ ; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
147+ ; LA64-NEXT: movgr2fr.w $fa2, $a2
148+ ; LA64-NEXT: xvshuf.h $xr2, $xr1, $xr0
149+ ; LA64-NEXT: xvstelm.h $xr2, $a1, 0, 0
150+ ; LA64-NEXT: ret
100151 %v = load volatile <16 x i16 >, ptr %src
101152 %e = extractelement <16 x i16 > %v , i32 %idx
102153 store i16 %e , ptr %dst
103154 ret void
104155}
105156
106157define void @extract_8xi32_idx (ptr %src , ptr %dst , i32 %idx ) nounwind {
107- ; CHECK-LABEL: extract_8xi32_idx:
108- ; CHECK: # %bb.0:
109- ; CHECK-NEXT: xvld $xr0, $a0, 0
110- ; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
111- ; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1
112- ; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
113- ; CHECK-NEXT: ret
158+ ; LA32-LABEL: extract_8xi32_idx:
159+ ; LA32: # %bb.0:
160+ ; LA32-NEXT: xvld $xr0, $a0, 0
161+ ; LA32-NEXT: xvreplgr2vr.w $xr1, $a2
162+ ; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1
163+ ; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
164+ ; LA32-NEXT: st.w $a0, $a1, 0
165+ ; LA32-NEXT: ret
166+ ;
167+ ; LA64-LABEL: extract_8xi32_idx:
168+ ; LA64: # %bb.0:
169+ ; LA64-NEXT: xvld $xr0, $a0, 0
170+ ; LA64-NEXT: xvreplgr2vr.w $xr1, $a2
171+ ; LA64-NEXT: xvperm.w $xr0, $xr0, $xr1
172+ ; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 0
173+ ; LA64-NEXT: ret
114174 %v = load volatile <8 x i32 >, ptr %src
115175 %e = extractelement <8 x i32 > %v , i32 %idx
116176 store i32 %e , ptr %dst
117177 ret void
118178}
119179
120180define void @extract_4xi64_idx (ptr %src , ptr %dst , i32 %idx ) nounwind {
121- ; CHECK-LABEL: extract_4xi64_idx:
122- ; CHECK: # %bb.0:
123- ; CHECK-NEXT: xvld $xr0, $a0, 0
124- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
125- ; CHECK-NEXT: movgr2fr.w $fa2, $a2
126- ; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
127- ; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0
128- ; CHECK-NEXT: ret
181+ ; LA32-LABEL: extract_4xi64_idx:
182+ ; LA32: # %bb.0:
183+ ; LA32-NEXT: xvld $xr0, $a0, 0
184+ ; LA32-NEXT: add.w $a0, $a2, $a2
185+ ; LA32-NEXT: addi.w $a2, $a0, 1
186+ ; LA32-NEXT: xvreplgr2vr.w $xr1, $a2
187+ ; LA32-NEXT: xvperm.w $xr1, $xr0, $xr1
188+ ; LA32-NEXT: xvpickve2gr.w $a2, $xr1, 0
189+ ; LA32-NEXT: xvreplgr2vr.w $xr1, $a0
190+ ; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1
191+ ; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
192+ ; LA32-NEXT: st.w $a0, $a1, 0
193+ ; LA32-NEXT: st.w $a2, $a1, 4
194+ ; LA32-NEXT: ret
195+ ;
196+ ; LA64-LABEL: extract_4xi64_idx:
197+ ; LA64: # %bb.0:
198+ ; LA64-NEXT: xvld $xr0, $a0, 0
199+ ; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
200+ ; LA64-NEXT: movgr2fr.w $fa2, $a2
201+ ; LA64-NEXT: xvshuf.d $xr2, $xr1, $xr0
202+ ; LA64-NEXT: xvstelm.d $xr2, $a1, 0, 0
203+ ; LA64-NEXT: ret
129204 %v = load volatile <4 x i64 >, ptr %src
130205 %e = extractelement <4 x i64 > %v , i32 %idx
131206 store i64 %e , ptr %dst
@@ -147,28 +222,45 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
147222}
148223
149224define void @extract_4xdouble_idx (ptr %src , ptr %dst , i32 %idx ) nounwind {
150- ; CHECK-LABEL: extract_4xdouble_idx:
151- ; CHECK: # %bb.0:
152- ; CHECK-NEXT: xvld $xr0, $a0, 0
153- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
154- ; CHECK-NEXT: movgr2fr.w $fa2, $a2
155- ; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
156- ; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0
157- ; CHECK-NEXT: ret
225+ ; LA32-LABEL: extract_4xdouble_idx:
226+ ; LA32: # %bb.0:
227+ ; LA32-NEXT: xvld $xr0, $a0, 0
228+ ; LA32-NEXT: movgr2fr.w $fa1, $a2
229+ ; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
230+ ; LA32-NEXT: xvshuf.d $xr1, $xr2, $xr0
231+ ; LA32-NEXT: xvstelm.d $xr1, $a1, 0, 0
232+ ; LA32-NEXT: ret
233+ ;
234+ ; LA64-LABEL: extract_4xdouble_idx:
235+ ; LA64: # %bb.0:
236+ ; LA64-NEXT: xvld $xr0, $a0, 0
237+ ; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
238+ ; LA64-NEXT: movgr2fr.w $fa2, $a2
239+ ; LA64-NEXT: xvshuf.d $xr2, $xr1, $xr0
240+ ; LA64-NEXT: xvstelm.d $xr2, $a1, 0, 0
241+ ; LA64-NEXT: ret
158242 %v = load volatile <4 x double >, ptr %src
159243 %e = extractelement <4 x double > %v , i32 %idx
160244 store double %e , ptr %dst
161245 ret void
162246}
163247
164248define void @eliminate_frame_index (<8 x i32 > %a ) nounwind {
165- ; CHECK-LABEL: eliminate_frame_index:
166- ; CHECK: # %bb.0:
167- ; CHECK-NEXT: addi.d $sp, $sp, -1040
168- ; CHECK-NEXT: addi.d $a0, $sp, 524
169- ; CHECK-NEXT: xvstelm.w $xr0, $a0, 0, 1
170- ; CHECK-NEXT: addi.d $sp, $sp, 1040
171- ; CHECK-NEXT: ret
249+ ; LA32-LABEL: eliminate_frame_index:
250+ ; LA32: # %bb.0:
251+ ; LA32-NEXT: addi.w $sp, $sp, -1040
252+ ; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
253+ ; LA32-NEXT: st.w $a0, $sp, 524
254+ ; LA32-NEXT: addi.w $sp, $sp, 1040
255+ ; LA32-NEXT: ret
256+ ;
257+ ; LA64-LABEL: eliminate_frame_index:
258+ ; LA64: # %bb.0:
259+ ; LA64-NEXT: addi.d $sp, $sp, -1040
260+ ; LA64-NEXT: addi.d $a0, $sp, 524
261+ ; LA64-NEXT: xvstelm.w $xr0, $a0, 0, 1
262+ ; LA64-NEXT: addi.d $sp, $sp, 1040
263+ ; LA64-NEXT: ret
172264 %1 = alloca [32 x [8 x i32 ]]
173265 %2 = getelementptr i8 , ptr %1 , i64 508
174266 %b = extractelement <8 x i32 > %a , i64 1
0 commit comments