@@ -5,11 +5,7 @@ define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) nounwind {
55; CHECK-LABEL: load_trunc_2i64_to_2i32:
66; CHECK: # %bb.0:
77; CHECK-NEXT: vld $vr0, $a0, 0
8- ; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
9- ; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0
10- ; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
11- ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
12- ; CHECK-NEXT: vpackev.w $vr0, $vr0, $vr1
8+ ; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 8
139; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
1410; CHECK-NEXT: st.d $a0, $a1, 0
1511; CHECK-NEXT: ret
@@ -23,12 +19,10 @@ define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) nounwind {
2319; CHECK-LABEL: load_trunc_2i64_to_2i16:
2420; CHECK: # %bb.0:
2521; CHECK-NEXT: vld $vr0, $a0, 0
26- ; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
27- ; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
28- ; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
29- ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
30- ; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1
31- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
22+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
23+ ; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
24+ ; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
25+ ; CHECK-NEXT: vpickve2gr.w $a0, $vr1, 0
3226; CHECK-NEXT: st.w $a0, $a1, 0
3327; CHECK-NEXT: ret
3428 %a = load <2 x i64 >, ptr %ptr
@@ -41,11 +35,9 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
4135; CHECK-LABEL: load_trunc_2i64_to_2i8:
4236; CHECK: # %bb.0:
4337; CHECK-NEXT: vld $vr0, $a0, 0
44- ; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
45- ; CHECK-NEXT: vinsgr2vr.b $vr1, $a0, 0
46- ; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
47- ; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
48- ; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1
38+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
39+ ; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
40+ ; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
4941; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
5042; CHECK-NEXT: st.h $a0, $a1, 0
5143; CHECK-NEXT: ret
@@ -58,19 +50,10 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
5850define void @load_trunc_4i32_to_4i16 (ptr %ptr , ptr %dst ) nounwind {
5951; CHECK-LABEL: load_trunc_4i32_to_4i16:
6052; CHECK: # %bb.0:
61- ; CHECK-NEXT: addi.d $sp, $sp, -16
6253; CHECK-NEXT: vld $vr0, $a0, 0
63- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
64- ; CHECK-NEXT: st.h $a0, $sp, 6
65- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2
66- ; CHECK-NEXT: st.h $a0, $sp, 4
67- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
68- ; CHECK-NEXT: st.h $a0, $sp, 2
69- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
70- ; CHECK-NEXT: st.h $a0, $sp, 0
71- ; CHECK-NEXT: ld.d $a0, $sp, 0
54+ ; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr0
55+ ; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
7256; CHECK-NEXT: st.d $a0, $a1, 0
73- ; CHECK-NEXT: addi.d $sp, $sp, 16
7457; CHECK-NEXT: ret
7558 %a = load <4 x i32 >, ptr %ptr
7659 %trunc = trunc <4 x i32 > %a to <4 x i16 >
@@ -81,20 +64,12 @@ define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) nounwind {
8164define void @load_trunc_4i32_to_4i8 (ptr %ptr , ptr %dst ) nounwind {
8265; CHECK-LABEL: load_trunc_4i32_to_4i8:
8366; CHECK: # %bb.0:
84- ; CHECK-NEXT: addi.d $sp, $sp, -16
8567; CHECK-NEXT: vld $vr0, $a0, 0
86- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3
87- ; CHECK-NEXT: st.b $a0, $sp, 3
88- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 2
89- ; CHECK-NEXT: st.b $a0, $sp, 2
90- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
91- ; CHECK-NEXT: st.b $a0, $sp, 1
92- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
93- ; CHECK-NEXT: st.b $a0, $sp, 0
94- ; CHECK-NEXT: vld $vr0, $sp, 0
68+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
69+ ; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
70+ ; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
9571; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
9672; CHECK-NEXT: st.w $a0, $a1, 0
97- ; CHECK-NEXT: addi.d $sp, $sp, 16
9873; CHECK-NEXT: ret
9974 %a = load <4 x i32 >, ptr %ptr
10075 %trunc = trunc <4 x i32 > %a to <4 x i8 >
@@ -105,27 +80,10 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
10580define void @load_trunc_8i16_to_8i8 (ptr %ptr , ptr %dst ) nounwind {
10681; CHECK-LABEL: load_trunc_8i16_to_8i8:
10782; CHECK: # %bb.0:
108- ; CHECK-NEXT: addi.d $sp, $sp, -16
10983; CHECK-NEXT: vld $vr0, $a0, 0
110- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
111- ; CHECK-NEXT: st.b $a0, $sp, 7
112- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
113- ; CHECK-NEXT: st.b $a0, $sp, 6
114- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
115- ; CHECK-NEXT: st.b $a0, $sp, 5
116- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
117- ; CHECK-NEXT: st.b $a0, $sp, 4
118- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
119- ; CHECK-NEXT: st.b $a0, $sp, 3
120- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
121- ; CHECK-NEXT: st.b $a0, $sp, 2
122- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
123- ; CHECK-NEXT: st.b $a0, $sp, 1
124- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
125- ; CHECK-NEXT: st.b $a0, $sp, 0
126- ; CHECK-NEXT: ld.d $a0, $sp, 0
84+ ; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
85+ ; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
12786; CHECK-NEXT: st.d $a0, $a1, 0
128- ; CHECK-NEXT: addi.d $sp, $sp, 16
12987; CHECK-NEXT: ret
13088 %a = load <8 x i16 >, ptr %ptr
13189 %trunc = trunc <8 x i16 > %a to <8 x i8 >
@@ -138,11 +96,7 @@ define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) nounwind {
13896; CHECK: # %bb.0:
13997; CHECK-NEXT: ld.d $a0, $a0, 0
14098; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
141- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
142- ; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
143- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
144- ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
145- ; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1
99+ ; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 8
146100; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
147101; CHECK-NEXT: st.w $a0, $a1, 0
148102; CHECK-NEXT: ret
@@ -156,12 +110,10 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
156110; CHECK-LABEL: load_trunc_2i32_to_2i8:
157111; CHECK: # %bb.0:
158112; CHECK-NEXT: ld.d $a0, $a0, 0
159- ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
160- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
161- ; CHECK-NEXT: vinsgr2vr.b $vr1, $a0, 0
162- ; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
163- ; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
164- ; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1
113+ ; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
114+ ; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI7_0)
115+ ; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
116+ ; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
165117; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
166118; CHECK-NEXT: st.h $a0, $a1, 0
167119; CHECK-NEXT: ret
@@ -174,21 +126,11 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
174126define void @load_trunc_4i16_to_4i8 (ptr %ptr , ptr %dst ) nounwind {
175127; CHECK-LABEL: load_trunc_4i16_to_4i8:
176128; CHECK: # %bb.0:
177- ; CHECK-NEXT: addi.d $sp, $sp, -16
178129; CHECK-NEXT: ld.d $a0, $a0, 0
179130; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
180- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
181- ; CHECK-NEXT: st.b $a0, $sp, 3
182- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
183- ; CHECK-NEXT: st.b $a0, $sp, 2
184- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
185- ; CHECK-NEXT: st.b $a0, $sp, 1
186- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
187- ; CHECK-NEXT: st.b $a0, $sp, 0
188- ; CHECK-NEXT: vld $vr0, $sp, 0
131+ ; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
189132; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
190133; CHECK-NEXT: st.w $a0, $a1, 0
191- ; CHECK-NEXT: addi.d $sp, $sp, 16
192134; CHECK-NEXT: ret
193135 %a = load <4 x i16 >, ptr %ptr
194136 %trunc = trunc <4 x i16 > %a to <4 x i8 >
@@ -201,11 +143,7 @@ define void @load_trunc_2i16_to_2i8(ptr %ptr, ptr %dst) nounwind {
201143; CHECK: # %bb.0:
202144; CHECK-NEXT: ld.w $a0, $a0, 0
203145; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
204- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
205- ; CHECK-NEXT: vinsgr2vr.b $vr1, $a0, 0
206- ; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
207- ; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
208- ; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1
146+ ; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 8
209147; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
210148; CHECK-NEXT: st.h $a0, $a1, 0
211149; CHECK-NEXT: ret
0 commit comments