@@ -17,14 +17,25 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) {
1717 ret <32 x i8 > %shuffle
1818}
1919
20+ define <32 x i8 > @shuffle_v32i8_same_lane (<32 x i8 > %a ) {
21+ ; CHECK-LABEL: shuffle_v32i8_same_lane:
22+ ; CHECK: # %bb.0:
23+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
24+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI1_0)
25+ ; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0
26+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
27+ ; CHECK-NEXT: ret
28+ %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > poison, <32 x i32 > <i32 14 , i32 15 , i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
29+ ret <32 x i8 > %shuffle
30+ }
2031
2132define <16 x i16 > @shuffle_v16i16 (<16 x i16 > %a ) {
2233; CHECK-LABEL: shuffle_v16i16:
2334; CHECK: # %bb.0:
24- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0 )
25- ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI1_0 )
26- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_1 )
27- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI1_1 )
35+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0 )
36+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0 )
37+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1 )
38+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1 )
2839; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
2940; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
3041; CHECK-NEXT: xvshuf.w $xr1, $xr2, $xr0
@@ -34,13 +45,25 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) {
3445 ret <16 x i16 > %shuffle
3546}
3647
48+ define <16 x i16 > @shuffle_v16i16_same_lane (<16 x i16 > %a ) {
49+ ; CHECK-LABEL: shuffle_v16i16_same_lane:
50+ ; CHECK: # %bb.0:
51+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
52+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
53+ ; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0
54+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
55+ ; CHECK-NEXT: ret
56+ %shuffle = shufflevector <16 x i16 > %a , <16 x i16 > poison, <16 x i32 > <i32 6 , i32 7 , i32 0 , i32 5 , i32 2 , i32 3 , i32 6 , i32 5 , i32 8 , i32 9 , i32 10 , i32 13 , i32 12 , i32 15 , i32 13 , i32 15 >
57+ ret <16 x i16 > %shuffle
58+ }
59+
3760define <8 x i32 > @shuffle_v8i32 (<8 x i32 > %a ) {
3861; CHECK-LABEL: shuffle_v8i32:
3962; CHECK: # %bb.0:
40- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0 )
41- ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0 )
42- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1 )
43- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1 )
63+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0 )
64+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0 )
65+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_1 )
66+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI4_1 )
4467; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
4568; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
4669; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
@@ -50,13 +73,25 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
5073 ret <8 x i32 > %shuffle
5174}
5275
76+ define <8 x i32 > @shuffle_v8i32_same_lane (<8 x i32 > %a ) {
77+ ; CHECK-LABEL: shuffle_v8i32_same_lane:
78+ ; CHECK: # %bb.0:
79+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
80+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
81+ ; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
82+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
83+ ; CHECK-NEXT: ret
84+ %shuffle = shufflevector <8 x i32 > %a , <8 x i32 > poison, <8 x i32 > <i32 2 , i32 3 , i32 0 , i32 1 , i32 4 , i32 5 , i32 6 , i32 7 >
85+ ret <8 x i32 > %shuffle
86+ }
87+
5388define <4 x i64 > @shuffle_v4i64 (<4 x i64 > %a ) {
5489; CHECK-LABEL: shuffle_v4i64:
5590; CHECK: # %bb.0:
56- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0 )
57- ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI3_0 )
58- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1 )
59- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_1 )
91+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0 )
92+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0 )
93+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1 )
94+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI6_1 )
6095; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
6196; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
6297; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
@@ -66,13 +101,25 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
66101 ret <4 x i64 > %shuffle
67102}
68103
104+ define <4 x i64 > @shuffle_v4i64_same_lane (<4 x i64 > %a ) {
105+ ; CHECK-LABEL: shuffle_v4i64_same_lane:
106+ ; CHECK: # %bb.0:
107+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
108+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI7_0)
109+ ; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
110+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
111+ ; CHECK-NEXT: ret
112+ %shuffle = shufflevector <4 x i64 > %a , <4 x i64 > poison, <4 x i32 > <i32 1 , i32 0 , i32 2 , i32 3 >
113+ ret <4 x i64 > %shuffle
114+ }
115+
69116define <8 x float > @shuffle_v8f32 (<8 x float > %a ) {
70117; CHECK-LABEL: shuffle_v8f32:
71118; CHECK: # %bb.0:
72- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0 )
73- ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0 )
74- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_1 )
75- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI4_1 )
119+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0 )
120+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI8_0 )
121+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_1 )
122+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI8_1 )
76123; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
77124; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
78125; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
@@ -82,13 +129,26 @@ define <8 x float> @shuffle_v8f32(<8 x float> %a) {
82129 ret <8 x float > %shuffle
83130}
84131
132+ define <8 x float > @shuffle_v8f32_same_lane (<8 x float > %a ) {
133+ ; CHECK-LABEL: shuffle_v8f32_same_lane:
134+ ; CHECK: # %bb.0:
135+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
136+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI9_0)
137+ ; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
138+ ; CHECK-NEXT: xvshuf.w $xr1, $xr0, $xr0
139+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
140+ ; CHECK-NEXT: ret
141+ %shuffle = shufflevector <8 x float > %a , <8 x float > poison, <8 x i32 > <i32 3 , i32 2 , i32 0 , i32 2 , i32 3 , i32 1 , i32 2 , i32 3 >
142+ ret <8 x float > %shuffle
143+ }
144+
85145define <4 x double > @shuffle_v4f64 (<4 x double > %a ) {
86146; CHECK-LABEL: shuffle_v4f64:
87147; CHECK: # %bb.0:
88- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0 )
89- ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI5_0 )
90- ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1 )
91- ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_1 )
148+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0 )
149+ ; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0 )
150+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1 )
151+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI10_1 )
92152; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
93153; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
94154; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
@@ -97,3 +157,16 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) {
97157 %shuffle = shufflevector <4 x double > %a , <4 x double > poison, <4 x i32 > <i32 3 , i32 1 , i32 2 , i32 0 >
98158 ret <4 x double > %shuffle
99159}
160+
161+ define <4 x double > @shuffle_v4f64_same_lane (<4 x double > %a ) {
162+ ; CHECK-LABEL: shuffle_v4f64_same_lane:
163+ ; CHECK: # %bb.0:
164+ ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
165+ ; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0)
166+ ; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
167+ ; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
168+ ; CHECK-NEXT: xvori.b $xr0, $xr1, 0
169+ ; CHECK-NEXT: ret
170+ %shuffle = shufflevector <4 x double > %a , <4 x double > poison, <4 x i32 > <i32 3 , i32 2 , i32 0 , i32 1 >
171+ ret <4 x double > %shuffle
172+ }
0 commit comments