Skip to content

Commit c0ac0c4

Browse files
authored
[PowerPC] Add intrinsic support for xvrlw (#167349)
1 parent a3058d5 commit c0ac0c4

File tree

6 files changed

+187
-2
lines changed

6 files changed

+187
-2
lines changed

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,12 @@ class PowerPC_VSX_Sca_DDD_Intrinsic<string GCCIntSuffix>
387387
[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
388388
[IntrNoMem]>;
389389

390+
/// PowerPC_VSX_WWW_Intrinsic - A PowerPC intrinsic that takes two v4i32
391+
/// vectors and returns one. These intrinsics have no side effects.
392+
class PowerPC_VSX_WWW_Intrinsic<string GCCIntSuffix>
393+
: PowerPC_VSX_Intrinsic<GCCIntSuffix,
394+
[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
395+
[IntrNoMem]>;
390396
//===----------------------------------------------------------------------===//
391397
// PowerPC Altivec Intrinsic Definitions.
392398

@@ -1214,6 +1220,7 @@ def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
12141220
def int_ppc_altivec_vrlb : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
12151221
def int_ppc_altivec_vrlh : PowerPC_Vec_HHH_Intrinsic<"vrlh">;
12161222
def int_ppc_altivec_vrlw : PowerPC_Vec_WWW_Intrinsic<"vrlw">;
1223+
def int_ppc_vsx_xvrlw : PowerPC_VSX_WWW_Intrinsic<"xvrlw">;
12171224
def int_ppc_altivec_vrld : PowerPC_Vec_DDD_Intrinsic<"vrld">;
12181225

12191226
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".

llvm/lib/Target/PowerPC/PPCInstrAltivec.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,7 @@ def : Pat<(v16i8 (rotl v16i8:$vA, v16i8:$vB)),
889889
(v16i8 (VRLB v16i8:$vA, v16i8:$vB))>;
890890
def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
891891
(v8i16 (VRLH v8i16:$vA, v8i16:$vB))>;
892+
let Predicates = [IsNotISAFuture] in
892893
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
893894
(v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
894895

llvm/lib/Target/PowerPC/PPCInstrFuture.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,10 @@ let Predicates = [HasVSX, IsISAFuture] in {
420420
: VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
421421
"vucmprlh $VRT, $VRA, $VRB", []>;
422422

423-
def XVRLW: XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
424-
"xvrlw $XT, $XA, $XB", []>;
423+
def XVRLW : XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
424+
"xvrlw $XT, $XA, $XB",
425+
[(set v4i32:$XT, (int_ppc_vsx_xvrlw v4i32:$XA,
426+
v4i32:$XB))]>;
425427

426428
// AES Acceleration Instructions
427429
def XXAESENCP : XX3Form_XTABp5_M2<194, (outs vsrprc:$XTp),
@@ -550,6 +552,10 @@ def : Pat<(int_ppc_vsx_stxvprl v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRL $XTp,
550552
$RA, $RB)>;
551553
def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp,
552554
$RA, $RB)>;
555+
let Predicates = [HasVSX, IsISAFuture] in {
556+
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA,
557+
v4i32:$vB))>;
558+
}
553559

554560
//---------------------------- Instruction aliases ---------------------------//
555561
// Predicate combinations available:

llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
33
; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
44
; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
5+
; RUN: llc < %s -mcpu=future -mtriple=powerpc64le-- | FileCheck %s --check-prefix=FUTURE
56

67
declare i8 @llvm.fshl.i8(i8, i8, i8)
78
declare i16 @llvm.fshl.i16(i16, i16, i16)
@@ -24,6 +25,13 @@ define i8 @rotl_i8_const_shift(i8 %x) {
2425
; CHECK-NEXT: rlwimi 4, 3, 3, 0, 28
2526
; CHECK-NEXT: mr 3, 4
2627
; CHECK-NEXT: blr
28+
;
29+
; FUTURE-LABEL: rotl_i8_const_shift:
30+
; FUTURE: # %bb.0:
31+
; FUTURE-NEXT: rotlwi 4, 3, 27
32+
; FUTURE-NEXT: rlwimi 4, 3, 3, 0, 28
33+
; FUTURE-NEXT: mr 3, 4
34+
; FUTURE-NEXT: blr
2735
%f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
2836
ret i8 %f
2937
}
@@ -43,6 +51,11 @@ define i64 @rotl_i64_const_shift(i64 %x) {
4351
; CHECK64: # %bb.0:
4452
; CHECK64-NEXT: rotldi 3, 3, 3
4553
; CHECK64-NEXT: blr
54+
;
55+
; FUTURE-LABEL: rotl_i64_const_shift:
56+
; FUTURE: # %bb.0:
57+
; FUTURE-NEXT: rotldi 3, 3, 3
58+
; FUTURE-NEXT: blr
4659
%f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
4760
ret i64 %f
4861
}
@@ -60,6 +73,17 @@ define i16 @rotl_i16(i16 %x, i16 %z) {
6073
; CHECK-NEXT: srw 4, 5, 4
6174
; CHECK-NEXT: or 3, 3, 4
6275
; CHECK-NEXT: blr
76+
;
77+
; FUTURE-LABEL: rotl_i16:
78+
; FUTURE: # %bb.0:
79+
; FUTURE-NEXT: clrlwi 6, 4, 28
80+
; FUTURE-NEXT: neg 4, 4
81+
; FUTURE-NEXT: clrlwi 5, 3, 16
82+
; FUTURE-NEXT: clrlwi 4, 4, 28
83+
; FUTURE-NEXT: slw 3, 3, 6
84+
; FUTURE-NEXT: srw 4, 5, 4
85+
; FUTURE-NEXT: or 3, 3, 4
86+
; FUTURE-NEXT: blr
6387
%f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
6488
ret i16 %f
6589
}
@@ -69,6 +93,11 @@ define i32 @rotl_i32(i32 %x, i32 %z) {
6993
; CHECK: # %bb.0:
7094
; CHECK-NEXT: rotlw 3, 3, 4
7195
; CHECK-NEXT: blr
96+
;
97+
; FUTURE-LABEL: rotl_i32:
98+
; FUTURE: # %bb.0:
99+
; FUTURE-NEXT: rotlw 3, 3, 4
100+
; FUTURE-NEXT: blr
72101
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
73102
ret i32 %f
74103
}
@@ -100,6 +129,11 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
100129
; CHECK64: # %bb.0:
101130
; CHECK64-NEXT: rotld 3, 3, 4
102131
; CHECK64-NEXT: blr
132+
;
133+
; FUTURE-LABEL: rotl_i64:
134+
; FUTURE: # %bb.0:
135+
; FUTURE-NEXT: rotld 3, 3, 4
136+
; FUTURE-NEXT: blr
103137
%f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
104138
ret i64 %f
105139
}
@@ -124,6 +158,11 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
124158
; CHECK64: # %bb.0:
125159
; CHECK64-NEXT: vrlw 2, 2, 3
126160
; CHECK64-NEXT: blr
161+
;
162+
; FUTURE-LABEL: rotl_v4i32:
163+
; FUTURE: # %bb.0:
164+
; FUTURE-NEXT: xvrlw 34, 34, 35
165+
; FUTURE-NEXT: blr
127166
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
128167
ret <4 x i32> %f
129168
}
@@ -150,6 +189,12 @@ define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) {
150189
; CHECK64-NEXT: vspltisw 3, 3
151190
; CHECK64-NEXT: vrlw 2, 2, 3
152191
; CHECK64-NEXT: blr
192+
;
193+
; FUTURE-LABEL: rotl_v4i32_const_shift:
194+
; FUTURE: # %bb.0:
195+
; FUTURE-NEXT: vspltisw 3, 3
196+
; FUTURE-NEXT: xvrlw 34, 34, 35
197+
; FUTURE-NEXT: blr
153198
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
154199
ret <4 x i32> %f
155200
}
@@ -163,6 +208,13 @@ define i8 @rotr_i8_const_shift(i8 %x) {
163208
; CHECK-NEXT: rlwimi 4, 3, 5, 0, 26
164209
; CHECK-NEXT: mr 3, 4
165210
; CHECK-NEXT: blr
211+
;
212+
; FUTURE-LABEL: rotr_i8_const_shift:
213+
; FUTURE: # %bb.0:
214+
; FUTURE-NEXT: rotlwi 4, 3, 29
215+
; FUTURE-NEXT: rlwimi 4, 3, 5, 0, 26
216+
; FUTURE-NEXT: mr 3, 4
217+
; FUTURE-NEXT: blr
166218
%f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
167219
ret i8 %f
168220
}
@@ -172,6 +224,11 @@ define i32 @rotr_i32_const_shift(i32 %x) {
172224
; CHECK: # %bb.0:
173225
; CHECK-NEXT: rotlwi 3, 3, 29
174226
; CHECK-NEXT: blr
227+
;
228+
; FUTURE-LABEL: rotr_i32_const_shift:
229+
; FUTURE: # %bb.0:
230+
; FUTURE-NEXT: rotlwi 3, 3, 29
231+
; FUTURE-NEXT: blr
175232
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
176233
ret i32 %f
177234
}
@@ -189,6 +246,17 @@ define i16 @rotr_i16(i16 %x, i16 %z) {
189246
; CHECK-NEXT: slw 3, 3, 4
190247
; CHECK-NEXT: or 3, 5, 3
191248
; CHECK-NEXT: blr
249+
;
250+
; FUTURE-LABEL: rotr_i16:
251+
; FUTURE: # %bb.0:
252+
; FUTURE-NEXT: clrlwi 6, 4, 28
253+
; FUTURE-NEXT: neg 4, 4
254+
; FUTURE-NEXT: clrlwi 5, 3, 16
255+
; FUTURE-NEXT: clrlwi 4, 4, 28
256+
; FUTURE-NEXT: srw 5, 5, 6
257+
; FUTURE-NEXT: slw 3, 3, 4
258+
; FUTURE-NEXT: or 3, 5, 3
259+
; FUTURE-NEXT: blr
192260
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
193261
ret i16 %f
194262
}
@@ -199,6 +267,12 @@ define i32 @rotr_i32(i32 %x, i32 %z) {
199267
; CHECK-NEXT: neg 4, 4
200268
; CHECK-NEXT: rotlw 3, 3, 4
201269
; CHECK-NEXT: blr
270+
;
271+
; FUTURE-LABEL: rotr_i32:
272+
; FUTURE: # %bb.0:
273+
; FUTURE-NEXT: neg 4, 4
274+
; FUTURE-NEXT: rotlw 3, 3, 4
275+
; FUTURE-NEXT: blr
202276
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
203277
ret i32 %f
204278
}
@@ -231,6 +305,12 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
231305
; CHECK64-NEXT: neg 4, 4
232306
; CHECK64-NEXT: rotld 3, 3, 4
233307
; CHECK64-NEXT: blr
308+
;
309+
; FUTURE-LABEL: rotr_i64:
310+
; FUTURE: # %bb.0:
311+
; FUTURE-NEXT: neg 4, 4
312+
; FUTURE-NEXT: rotld 3, 3, 4
313+
; FUTURE-NEXT: blr
234314
%f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
235315
ret i64 %f
236316
}
@@ -263,6 +343,12 @@ define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
263343
; CHECK64-NEXT: vsubuwm 3, 4, 3
264344
; CHECK64-NEXT: vrlw 2, 2, 3
265345
; CHECK64-NEXT: blr
346+
;
347+
; FUTURE-LABEL: rotr_v4i32:
348+
; FUTURE: # %bb.0:
349+
; FUTURE-NEXT: vnegw 3, 3
350+
; FUTURE-NEXT: xvrlw 34, 34, 35
351+
; FUTURE-NEXT: blr
266352
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
267353
ret <4 x i32> %f
268354
}
@@ -293,6 +379,12 @@ define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
293379
; CHECK64-NEXT: vsubuwm 3, 4, 3
294380
; CHECK64-NEXT: vrlw 2, 2, 3
295381
; CHECK64-NEXT: blr
382+
;
383+
; FUTURE-LABEL: rotr_v4i32_const_shift:
384+
; FUTURE: # %bb.0:
385+
; FUTURE-NEXT: xxspltiw 0, 29
386+
; FUTURE-NEXT: xvrlw 34, 34, 0
387+
; FUTURE-NEXT: blr
296388
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
297389
ret <4 x i32> %f
298390
}
@@ -301,6 +393,10 @@ define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
301393
; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
302394
; CHECK: # %bb.0:
303395
; CHECK-NEXT: blr
396+
;
397+
; FUTURE-LABEL: rotl_i32_shift_by_bitwidth:
398+
; FUTURE: # %bb.0:
399+
; FUTURE-NEXT: blr
304400
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
305401
ret i32 %f
306402
}
@@ -309,6 +405,10 @@ define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
309405
; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
310406
; CHECK: # %bb.0:
311407
; CHECK-NEXT: blr
408+
;
409+
; FUTURE-LABEL: rotr_i32_shift_by_bitwidth:
410+
; FUTURE: # %bb.0:
411+
; FUTURE-NEXT: blr
312412
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
313413
ret i32 %f
314414
}
@@ -317,6 +417,10 @@ define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
317417
; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
318418
; CHECK: # %bb.0:
319419
; CHECK-NEXT: blr
420+
;
421+
; FUTURE-LABEL: rotl_v4i32_shift_by_bitwidth:
422+
; FUTURE: # %bb.0:
423+
; FUTURE-NEXT: blr
320424
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
321425
ret <4 x i32> %f
322426
}
@@ -325,6 +429,10 @@ define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
325429
; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
326430
; CHECK: # %bb.0:
327431
; CHECK-NEXT: blr
432+
;
433+
; FUTURE-LABEL: rotr_v4i32_shift_by_bitwidth:
434+
; FUTURE: # %bb.0:
435+
; FUTURE-NEXT: blr
328436
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
329437
ret <4 x i32> %f
330438
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3+
; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4+
; RUN: FileCheck %s
5+
6+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
7+
; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
8+
; RUN: FileCheck %s
9+
10+
; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
11+
; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
12+
; RUN: FileCheck %s
13+
14+
define <4 x i32> @testVRLWMI(<4 x i32> %a, <4 x i32> %b) {
15+
; CHECK-LABEL: testVRLWMI:
16+
; CHECK: # %bb.0: # %entry
17+
; CHECK-NEXT: xvrlw v2, v2, v3
18+
; CHECK-NEXT: blr
19+
entry:
20+
%0 = tail call <4 x i32> @llvm.ppc.vsx.xvrlw(<4 x i32> %a, <4 x i32> %b)
21+
ret <4 x i32> %0
22+
}

llvm/test/CodeGen/PowerPC/vector-rotates.ll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
; RUN: llc -O3 -mtriple=powerpc64-unknown-unknown -ppc-asm-full-reg-names \
66
; RUN: -verify-machineinstrs -mcpu=pwr7 < %s | \
77
; RUN: FileCheck --check-prefix=CHECK-P7 %s
8+
; RUN: llc -O3 -mtriple=powerpc64-unknown-unknown -ppc-asm-full-reg-names \
9+
; RUN: -verify-machineinstrs -mcpu=future < %s | \
10+
; RUN: FileCheck --check-prefix=CHECK-FUTURE %s
811

912
define <16 x i8> @rotl_v16i8(<16 x i8> %a) {
1013
; CHECK-P8-LABEL: rotl_v16i8:
@@ -23,6 +26,14 @@ define <16 x i8> @rotl_v16i8(<16 x i8> %a) {
2326
; CHECK-P7-NEXT: lxvw4x vs35, 0, r3
2427
; CHECK-P7-NEXT: vrlb v2, v2, v3
2528
; CHECK-P7-NEXT: blr
29+
;
30+
; CHECK-FUTURE-LABEL: rotl_v16i8:
31+
; CHECK-FUTURE: # %bb.0: # %entry
32+
; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
33+
; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI0_0@toc@l
34+
; CHECK-FUTURE-NEXT: lxv vs35, 0(r3)
35+
; CHECK-FUTURE-NEXT: vrlb v2, v2, v3
36+
; CHECK-FUTURE-NEXT: blr
2637
entry:
2738
%b = shl <16 x i8> %a, <i8 1, i8 1, i8 2, i8 2, i8 3, i8 3, i8 4, i8 4, i8 5, i8 5, i8 6, i8 6, i8 7, i8 7, i8 8, i8 8>
2839
%c = lshr <16 x i8> %a, <i8 7, i8 7, i8 6, i8 6, i8 5, i8 5, i8 4, i8 4, i8 3, i8 3, i8 2, i8 2, i8 1, i8 1, i8 0, i8 0>
@@ -47,6 +58,14 @@ define <8 x i16> @rotl_v8i16(<8 x i16> %a) {
4758
; CHECK-P7-NEXT: lxvw4x vs35, 0, r3
4859
; CHECK-P7-NEXT: vrlh v2, v2, v3
4960
; CHECK-P7-NEXT: blr
61+
;
62+
; CHECK-FUTURE-LABEL: rotl_v8i16:
63+
; CHECK-FUTURE: # %bb.0: # %entry
64+
; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
65+
; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI1_0@toc@l
66+
; CHECK-FUTURE-NEXT: lxv vs35, 0(r3)
67+
; CHECK-FUTURE-NEXT: vrlh v2, v2, v3
68+
; CHECK-FUTURE-NEXT: blr
5069
entry:
5170
%b = shl <8 x i16> %a, <i16 1, i16 2, i16 3, i16 5, i16 7, i16 11, i16 13, i16 16>
5271
%c = lshr <8 x i16> %a, <i16 15, i16 14, i16 13, i16 11, i16 9, i16 5, i16 3, i16 0>
@@ -71,6 +90,14 @@ define <4 x i32> @rotl_v4i32_0(<4 x i32> %a) {
7190
; CHECK-P7-NEXT: lxvw4x vs35, 0, r3
7291
; CHECK-P7-NEXT: vrlw v2, v2, v3
7392
; CHECK-P7-NEXT: blr
93+
;
94+
; CHECK-FUTURE-LABEL: rotl_v4i32_0:
95+
; CHECK-FUTURE: # %bb.0: # %entry
96+
; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
97+
; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI2_0@toc@l
98+
; CHECK-FUTURE-NEXT: lxv vs0, 0(r3)
99+
; CHECK-FUTURE-NEXT: xvrlw vs34, vs34, vs0
100+
; CHECK-FUTURE-NEXT: blr
74101
entry:
75102
%b = shl <4 x i32> %a, <i32 29, i32 19, i32 17, i32 11>
76103
%c = lshr <4 x i32> %a, <i32 3, i32 13, i32 15, i32 21>
@@ -94,6 +121,12 @@ define <4 x i32> @rotl_v4i32_1(<4 x i32> %a) {
94121
; CHECK-P7-NEXT: vsubuwm v3, v4, v3
95122
; CHECK-P7-NEXT: vrlw v2, v2, v3
96123
; CHECK-P7-NEXT: blr
124+
;
125+
; CHECK-FUTURE-LABEL: rotl_v4i32_1:
126+
; CHECK-FUTURE: # %bb.0: # %entry
127+
; CHECK-FUTURE-NEXT: xxspltiw vs0, 23
128+
; CHECK-FUTURE-NEXT: xvrlw vs34, vs34, vs0
129+
; CHECK-FUTURE-NEXT: blr
97130
entry:
98131
%b = shl <4 x i32> %a, <i32 23, i32 23, i32 23, i32 23>
99132
%c = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
@@ -124,6 +157,14 @@ define <2 x i64> @rotl_v2i64(<2 x i64> %a) {
124157
; CHECK-P7-NEXT: addi r3, r1, -16
125158
; CHECK-P7-NEXT: lxvd2x vs34, 0, r3
126159
; CHECK-P7-NEXT: blr
160+
;
161+
; CHECK-FUTURE-LABEL: rotl_v2i64:
162+
; CHECK-FUTURE: # %bb.0: # %entry
163+
; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
164+
; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI4_0@toc@l
165+
; CHECK-FUTURE-NEXT: lxv vs35, 0(r3)
166+
; CHECK-FUTURE-NEXT: vrld v2, v2, v3
167+
; CHECK-FUTURE-NEXT: blr
127168
entry:
128169
%b = shl <2 x i64> %a, <i64 41, i64 53>
129170
%c = lshr <2 x i64> %a, <i64 23, i64 11>

0 commit comments

Comments
 (0)