44define <32 x i8 > @shuffle_v32i8 (<32 x i8 > %a ) {
55; CHECK-LABEL: shuffle_v32i8:
66; CHECK: # %bb.0:
7- ; CHECK-NEXT: addi.d $sp, $sp, -64
8- ; CHECK-NEXT: .cfi_def_cfa_offset 64
9- ; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
10- ; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
11- ; CHECK-NEXT: .cfi_offset 1, -8
12- ; CHECK-NEXT: .cfi_offset 22, -16
13- ; CHECK-NEXT: addi.d $fp, $sp, 64
14- ; CHECK-NEXT: .cfi_def_cfa 22, 0
15- ; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
16- ; CHECK-NEXT: xvori.b $xr1, $xr0, 0
17- ; CHECK-NEXT: xvst $xr0, $sp, 0
18- ; CHECK-NEXT: ld.h $a0, $sp, 16
19- ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
20- ; CHECK-NEXT: vpickve2gr.h $a1, $vr1, 0
21- ; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
22- ; CHECK-NEXT: vpickve2gr.h $a1, $vr1, 1
23- ; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
24- ; CHECK-NEXT: vpickve2gr.h $a1, $vr1, 2
25- ; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3
26- ; CHECK-NEXT: vpickve2gr.h $a1, $vr1, 3
27- ; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4
28- ; CHECK-NEXT: vpickve2gr.h $a1, $vr1, 4
29- ; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5
30- ; CHECK-NEXT: vpickve2gr.h $a1, $vr1, 5
31- ; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6
32- ; CHECK-NEXT: vpickve2gr.h $a1, $vr1, 6
33- ; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
34- ; CHECK-NEXT: xvori.b $xr1, $xr0, 0
35- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
36- ; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
37- ; CHECK-NEXT: ld.h $a0, $sp, 18
38- ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
39- ; CHECK-NEXT: xvori.b $xr1, $xr0, 0
40- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
41- ; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1
42- ; CHECK-NEXT: ld.h $a0, $sp, 20
43- ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
44- ; CHECK-NEXT: xvori.b $xr1, $xr0, 0
45- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
46- ; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2
47- ; CHECK-NEXT: ld.h $a0, $sp, 22
48- ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
49- ; CHECK-NEXT: xvori.b $xr1, $xr0, 0
50- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
51- ; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3
52- ; CHECK-NEXT: ld.h $a0, $sp, 24
53- ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
54- ; CHECK-NEXT: xvori.b $xr1, $xr0, 0
55- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
56- ; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 4
57- ; CHECK-NEXT: ld.h $a0, $sp, 26
58- ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
59- ; CHECK-NEXT: xvori.b $xr1, $xr0, 0
60- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
61- ; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 5
62- ; CHECK-NEXT: ld.h $a0, $sp, 28
63- ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
64- ; CHECK-NEXT: xvori.b $xr1, $xr0, 0
65- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
66- ; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 6
67- ; CHECK-NEXT: ld.h $a0, $sp, 30
68- ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
69- ; CHECK-NEXT: xvori.b $xr1, $xr0, 0
70- ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
71- ; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 7
72- ; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
73- ; CHECK-NEXT: addi.d $sp, $fp, -64
74- ; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
75- ; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
76- ; CHECK-NEXT: addi.d $sp, $sp, 64
7+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
8+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
9+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
10+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
11+ ; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
12+ ; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
13+ ; CHECK-NEXT: xvshuf.h $xr1, $xr2, $xr0
14+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
7715; CHECK-NEXT: ret
7816 %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > poison, <32 x i32 > <i32 16 , i32 17 , i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
7917 ret <32 x i8 > %shuffle
@@ -83,21 +21,13 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) {
8321define <16 x i16 > @shuffle_v16i16 (<16 x i16 > %a ) {
8422; CHECK-LABEL: shuffle_v16i16:
8523; CHECK: # %bb.0:
86- ; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4
87- ; CHECK-NEXT: xvinsgr2vr.w $xr1, $a0, 0
88- ; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 0
89- ; CHECK-NEXT: xvinsgr2vr.w $xr1, $a1, 1
90- ; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 1
91- ; CHECK-NEXT: xvinsgr2vr.w $xr1, $a1, 2
92- ; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 2
93- ; CHECK-NEXT: xvinsgr2vr.w $xr1, $a1, 3
94- ; CHECK-NEXT: xvinsgr2vr.w $xr1, $a0, 4
95- ; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5
96- ; CHECK-NEXT: xvinsgr2vr.w $xr1, $a0, 5
97- ; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6
98- ; CHECK-NEXT: xvinsgr2vr.w $xr1, $a0, 6
99- ; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
100- ; CHECK-NEXT: xvinsgr2vr.w $xr1, $a0, 7
24+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
25+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI1_0)
26+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_1)
27+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI1_1)
28+ ; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
29+ ; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
30+ ; CHECK-NEXT: xvshuf.w $xr1, $xr2, $xr0
10131; CHECK-NEXT: xvori.b $xr0, $xr1, 0
10232; CHECK-NEXT: ret
10333 %shuffle = shufflevector <16 x i16 > %a , <16 x i16 > poison, <16 x i32 > <i32 8 , i32 9 , i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
@@ -107,13 +37,13 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) {
10737define <8 x i32 > @shuffle_v8i32 (<8 x i32 > %a ) {
10838; CHECK-LABEL: shuffle_v8i32:
10939; CHECK: # %bb.0:
110- ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
111- ; CHECK-NEXT: xvinsgr2vr.d $xr1 , $a0, 0
112- ; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 0
113- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a1, 1
114- ; CHECK-NEXT: xvinsgr2vr .d $xr1 , $a0, 2
115- ; CHECK-NEXT: xvpickve2gr .d $a0 , $xr0, 3
116- ; CHECK-NEXT: xvinsgr2vr .d $xr1, $a0, 3
40+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
41+ ; CHECK-NEXT: xvld $xr2 , $a0, %pc_lo12(.LCPI2_0)
42+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1)
43+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1)
44+ ; CHECK-NEXT: xvpermi .d $xr3 , $xr0, 78
45+ ; CHECK-NEXT: xvshuf .d $xr2 , $xr0, $xr3
46+ ; CHECK-NEXT: xvshuf .d $xr1, $xr2, $xr0
11747; CHECK-NEXT: xvori.b $xr0, $xr1, 0
11848; CHECK-NEXT: ret
11949 %shuffle = shufflevector <8 x i32 > %a , <8 x i32 > poison, <8 x i32 > <i32 4 , i32 5 , i32 0 , i32 1 , i32 4 , i32 5 , i32 6 , i32 7 >
@@ -123,14 +53,13 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
12353define <4 x i64 > @shuffle_v4i64 (<4 x i64 > %a ) {
12454; CHECK-LABEL: shuffle_v4i64:
12555; CHECK: # %bb.0:
126- ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
127- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0
128- ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
129- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1
130- ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
131- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2
132- ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
133- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3
56+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
57+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI3_0)
58+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
59+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_1)
60+ ; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
61+ ; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
62+ ; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
13463; CHECK-NEXT: xvori.b $xr0, $xr1, 0
13564; CHECK-NEXT: ret
13665 %shuffle = shufflevector <4 x i64 > %a , <4 x i64 > poison, <4 x i32 > <i32 3 , i32 1 , i32 2 , i32 0 >
@@ -140,19 +69,13 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
14069define <8 x float > @shuffle_v8f32 (<8 x float > %a ) {
14170; CHECK-LABEL: shuffle_v8f32:
14271; CHECK: # %bb.0:
143- ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
144- ; CHECK-NEXT: movgr2fr.d $fa1, $a0
145- ; CHECK-NEXT: movfr2gr.d $a0, $fa1
146- ; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 0
147- ; CHECK-NEXT: movgr2fr.d $fa2, $a1
148- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0
149- ; CHECK-NEXT: movfr2gr.d $a1, $fa2
150- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a1, 1
151- ; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 3
152- ; CHECK-NEXT: movgr2fr.d $fa0, $a1
153- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2
154- ; CHECK-NEXT: movfr2gr.d $a0, $fa0
155- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3
72+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
73+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0)
74+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_1)
75+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI4_1)
76+ ; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
77+ ; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
78+ ; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
15679; CHECK-NEXT: xvori.b $xr0, $xr1, 0
15780; CHECK-NEXT: ret
15881 %shuffle = shufflevector <8 x float > %a , <8 x float > poison, <8 x i32 > <i32 4 , i32 5 , i32 0 , i32 1 , i32 4 , i32 5 , i32 6 , i32 7 >
@@ -162,22 +85,13 @@ define <8 x float> @shuffle_v8f32(<8 x float> %a) {
16285define <4 x double > @shuffle_v4f64 (<4 x double > %a ) {
16386; CHECK-LABEL: shuffle_v4f64:
16487; CHECK: # %bb.0:
165- ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
166- ; CHECK-NEXT: movgr2fr.d $fa1, $a0
167- ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
168- ; CHECK-NEXT: movgr2fr.d $fa2, $a0
169- ; CHECK-NEXT: movfr2gr.d $a0, $fa1
170- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0
171- ; CHECK-NEXT: movfr2gr.d $a0, $fa2
172- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1
173- ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
174- ; CHECK-NEXT: movgr2fr.d $fa2, $a0
175- ; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
176- ; CHECK-NEXT: movgr2fr.d $fa0, $a0
177- ; CHECK-NEXT: movfr2gr.d $a0, $fa2
178- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2
179- ; CHECK-NEXT: movfr2gr.d $a0, $fa0
180- ; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3
88+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
89+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI5_0)
90+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1)
91+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_1)
92+ ; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
93+ ; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
94+ ; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
18195; CHECK-NEXT: xvori.b $xr0, $xr1, 0
18296; CHECK-NEXT: ret
18397 %shuffle = shufflevector <4 x double > %a , <4 x double > poison, <4 x i32 > <i32 3 , i32 1 , i32 2 , i32 0 >
0 commit comments