@@ -7,11 +7,8 @@ define void @rotl_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
77; CHECK: # %bb.0:
88; CHECK-NEXT: xvld $xr0, $a1, 0
99; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
10- ; CHECK-NEXT: xvrepli.b $xr2, 8
11- ; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1
12- ; CHECK-NEXT: xvsll.b $xr1, $xr0, $xr1
13- ; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr2
14- ; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
10+ ; CHECK-NEXT: xvneg.b $xr1, $xr1
11+ ; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1
1512; CHECK-NEXT: xvst $xr0, $a0, 0
1613; CHECK-NEXT: ret
1714 %v0 = load <32 x i8 >, ptr %src
@@ -30,11 +27,7 @@ define void @rotr_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
3027; CHECK: # %bb.0:
3128; CHECK-NEXT: xvld $xr0, $a1, 0
3229; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
33- ; CHECK-NEXT: xvrepli.b $xr2, 8
34- ; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1
35- ; CHECK-NEXT: xvsrl.b $xr1, $xr0, $xr1
36- ; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr2
37- ; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
30+ ; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1
3831; CHECK-NEXT: xvst $xr0, $a0, 0
3932; CHECK-NEXT: ret
4033 %v0 = load <32 x i8 >, ptr %src
@@ -52,9 +45,7 @@ define void @rotr_v32i8_imm(ptr %dst, ptr %src) nounwind {
5245; CHECK-LABEL: rotr_v32i8_imm:
5346; CHECK: # %bb.0:
5447; CHECK-NEXT: xvld $xr0, $a1, 0
55- ; CHECK-NEXT: xvsrli.b $xr1, $xr0, 2
56- ; CHECK-NEXT: xvslli.b $xr0, $xr0, 6
57- ; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
48+ ; CHECK-NEXT: xvrotri.b $xr0, $xr0, 2
5849; CHECK-NEXT: xvst $xr0, $a0, 0
5950; CHECK-NEXT: ret
6051 %v0 = load <32 x i8 >, ptr %src
@@ -70,11 +61,8 @@ define void @rotl_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
7061; CHECK: # %bb.0:
7162; CHECK-NEXT: xvld $xr0, $a1, 0
7263; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
73- ; CHECK-NEXT: xvrepli.h $xr2, 16
74- ; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1
75- ; CHECK-NEXT: xvsll.h $xr1, $xr0, $xr1
76- ; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr2
77- ; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
64+ ; CHECK-NEXT: xvneg.h $xr1, $xr1
65+ ; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1
7866; CHECK-NEXT: xvst $xr0, $a0, 0
7967; CHECK-NEXT: ret
8068 %v0 = load <16 x i16 >, ptr %src
@@ -93,11 +81,7 @@ define void @rotr_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
9381; CHECK: # %bb.0:
9482; CHECK-NEXT: xvld $xr0, $a1, 0
9583; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
96- ; CHECK-NEXT: xvrepli.h $xr2, 16
97- ; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1
98- ; CHECK-NEXT: xvsrl.h $xr1, $xr0, $xr1
99- ; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr2
100- ; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
84+ ; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1
10185; CHECK-NEXT: xvst $xr0, $a0, 0
10286; CHECK-NEXT: ret
10387 %v0 = load <16 x i16 >, ptr %src
@@ -115,9 +99,7 @@ define void @rotr_v16i16_imm(ptr %dst, ptr %src) nounwind {
11599; CHECK-LABEL: rotr_v16i16_imm:
116100; CHECK: # %bb.0:
117101; CHECK-NEXT: xvld $xr0, $a1, 0
118- ; CHECK-NEXT: xvsrli.h $xr1, $xr0, 2
119- ; CHECK-NEXT: xvslli.h $xr0, $xr0, 14
120- ; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
102+ ; CHECK-NEXT: xvrotri.h $xr0, $xr0, 2
121103; CHECK-NEXT: xvst $xr0, $a0, 0
122104; CHECK-NEXT: ret
123105 %v0 = load <16 x i16 >, ptr %src
@@ -133,11 +115,8 @@ define void @rotl_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
133115; CHECK: # %bb.0:
134116; CHECK-NEXT: xvld $xr0, $a1, 0
135117; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
136- ; CHECK-NEXT: xvrepli.w $xr2, 32
137- ; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1
138- ; CHECK-NEXT: xvsll.w $xr1, $xr0, $xr1
139- ; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr2
140- ; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
118+ ; CHECK-NEXT: xvneg.w $xr1, $xr1
119+ ; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1
141120; CHECK-NEXT: xvst $xr0, $a0, 0
142121; CHECK-NEXT: ret
143122 %v0 = load <8 x i32 >, ptr %src
@@ -156,11 +135,7 @@ define void @rotr_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
156135; CHECK: # %bb.0:
157136; CHECK-NEXT: xvld $xr0, $a1, 0
158137; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
159- ; CHECK-NEXT: xvrepli.w $xr2, 32
160- ; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1
161- ; CHECK-NEXT: xvsrl.w $xr1, $xr0, $xr1
162- ; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr2
163- ; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
138+ ; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1
164139; CHECK-NEXT: xvst $xr0, $a0, 0
165140; CHECK-NEXT: ret
166141 %v0 = load <8 x i32 >, ptr %src
@@ -178,9 +153,7 @@ define void @rotr_v8i32_imm(ptr %dst, ptr %src) nounwind {
178153; CHECK-LABEL: rotr_v8i32_imm:
179154; CHECK: # %bb.0:
180155; CHECK-NEXT: xvld $xr0, $a1, 0
181- ; CHECK-NEXT: xvsrli.w $xr1, $xr0, 2
182- ; CHECK-NEXT: xvslli.w $xr0, $xr0, 30
183- ; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
156+ ; CHECK-NEXT: xvrotri.w $xr0, $xr0, 2
184157; CHECK-NEXT: xvst $xr0, $a0, 0
185158; CHECK-NEXT: ret
186159 %v0 = load <8 x i32 >, ptr %src
@@ -196,25 +169,19 @@ define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
196169; LA32: # %bb.0:
197170; LA32-NEXT: xvld $xr0, $a1, 0
198171; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
199- ; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
200- ; LA32-NEXT: xvreplve0.d $xr1, $xr1
201- ; LA32-NEXT: xvrepli.d $xr2, 64
202- ; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1
203- ; LA32-NEXT: xvsll.d $xr1, $xr0, $xr1
204- ; LA32-NEXT: xvsrl.d $xr0, $xr0, $xr2
205- ; LA32-NEXT: xvor.v $xr0, $xr1, $xr0
172+ ; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
173+ ; LA32-NEXT: xvpermi.q $xr1, $xr1, 2
174+ ; LA32-NEXT: xvneg.d $xr1, $xr1
175+ ; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
206176; LA32-NEXT: xvst $xr0, $a0, 0
207177; LA32-NEXT: ret
208178;
209179; LA64-LABEL: rotl_v4i64:
210180; LA64: # %bb.0:
211181; LA64-NEXT: xvld $xr0, $a1, 0
212182; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
213- ; LA64-NEXT: xvrepli.d $xr2, 64
214- ; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1
215- ; LA64-NEXT: xvsll.d $xr1, $xr0, $xr1
216- ; LA64-NEXT: xvsrl.d $xr0, $xr0, $xr2
217- ; LA64-NEXT: xvor.v $xr0, $xr1, $xr0
183+ ; LA64-NEXT: xvneg.d $xr1, $xr1
184+ ; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1
218185; LA64-NEXT: xvst $xr0, $a0, 0
219186; LA64-NEXT: ret
220187 %v0 = load <4 x i64 >, ptr %src
@@ -233,25 +200,17 @@ define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
233200; LA32: # %bb.0:
234201; LA32-NEXT: xvld $xr0, $a1, 0
235202; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
236- ; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
237- ; LA32-NEXT: xvreplve0.d $xr1, $xr1
238- ; LA32-NEXT: xvrepli.d $xr2, 64
239- ; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1
240- ; LA32-NEXT: xvsrl.d $xr1, $xr0, $xr1
241- ; LA32-NEXT: xvsll.d $xr0, $xr0, $xr2
242- ; LA32-NEXT: xvor.v $xr0, $xr1, $xr0
203+ ; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
204+ ; LA32-NEXT: xvpermi.q $xr1, $xr1, 2
205+ ; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
243206; LA32-NEXT: xvst $xr0, $a0, 0
244207; LA32-NEXT: ret
245208;
246209; LA64-LABEL: rotr_v4i64:
247210; LA64: # %bb.0:
248211; LA64-NEXT: xvld $xr0, $a1, 0
249212; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
250- ; LA64-NEXT: xvrepli.d $xr2, 64
251- ; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1
252- ; LA64-NEXT: xvsrl.d $xr1, $xr0, $xr1
253- ; LA64-NEXT: xvsll.d $xr0, $xr0, $xr2
254- ; LA64-NEXT: xvor.v $xr0, $xr1, $xr0
213+ ; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1
255214; LA64-NEXT: xvst $xr0, $a0, 0
256215; LA64-NEXT: ret
257216 %v0 = load <4 x i64 >, ptr %src
@@ -266,14 +225,20 @@ define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
266225}
267226
268227define void @rotr_v4i64_imm (ptr %dst , ptr %src ) nounwind {
269- ; CHECK-LABEL: rotr_v4i64_imm:
270- ; CHECK: # %bb.0:
271- ; CHECK-NEXT: xvld $xr0, $a1, 0
272- ; CHECK-NEXT: xvsrli.d $xr1, $xr0, 2
273- ; CHECK-NEXT: xvslli.d $xr0, $xr0, 62
274- ; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
275- ; CHECK-NEXT: xvst $xr0, $a0, 0
276- ; CHECK-NEXT: ret
228+ ; LA32-LABEL: rotr_v4i64_imm:
229+ ; LA32: # %bb.0:
230+ ; LA32-NEXT: xvld $xr0, $a1, 0
231+ ; LA32-NEXT: xvrepli.w $xr1, -62
232+ ; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
233+ ; LA32-NEXT: xvst $xr0, $a0, 0
234+ ; LA32-NEXT: ret
235+ ;
236+ ; LA64-LABEL: rotr_v4i64_imm:
237+ ; LA64: # %bb.0:
238+ ; LA64-NEXT: xvld $xr0, $a1, 0
239+ ; LA64-NEXT: xvrotri.d $xr0, $xr0, 2
240+ ; LA64-NEXT: xvst $xr0, $a0, 0
241+ ; LA64-NEXT: ret
277242 %v0 = load <4 x i64 >, ptr %src
278243 %b = lshr <4 x i64 > %v0 , splat (i64 2 )
279244 %c = shl <4 x i64 > %v0 , splat (i64 62 )
0 commit comments