Skip to content

Commit c113f2b

Browse files
committed
Changes according to review
1 parent 9f773b8 commit c113f2b

File tree

5 files changed

+38
-46
lines changed

5 files changed

+38
-46
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30976,24 +30976,13 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
3097630976
// can efficiently be merged together using a masked move.
3097730977
MVT ExtVT = MVT::v32i16;
3097830978

30979-
// When used in a vectorshuffle, selects even-index lanes from the first
30980-
// vector and odd index lanes from the second vector.
30981-
SmallVector<int, 64> InterleaveIndices;
30982-
for (unsigned i = 0; i < 64; ++i) {
30983-
unsigned offset = (i % 2 == 0) ? 0 : 64;
30984-
InterleaveIndices.push_back(i + offset);
30985-
}
30986-
30987-
SDValue zero = DAG.getConstant(0, dl, VT);
30988-
SDValue eight = DAG.getTargetConstant(8, dl, MVT::i8);
3098930979
SDValue RLo, RHi;
30990-
30991-
// Isolate lower and upper lanes of Amt by shuffling zeros into AmtLo and
30980+
// Isolate lower and upper lanes of Amt by masking odd lanes in AmtLo and
3099230981
// right shifting AmtHi.
30993-
SDValue AmtLo = DAG.getBitcast(
30994-
ExtVT, DAG.getVectorShuffle(VT, dl, Amt, zero, InterleaveIndices));
30995-
SDValue AmtHi = DAG.getNode(X86ISD::VSRLI, dl, ExtVT,
30996-
DAG.getBitcast(ExtVT, Amt), eight);
30982+
SDValue AmtLo = DAG.getNode(ISD::AND, dl, ExtVT, DAG.getBitcast(ExtVT, Amt),
30983+
DAG.getConstant(0x00ff, dl, ExtVT));
30984+
SDValue AmtHi = getTargetVShiftByConstNode(
30985+
X86ISD::VSRLI, dl, ExtVT, DAG.getBitcast(ExtVT, Amt), 8, DAG);
3099730986
unsigned int ShiftOp;
3099830987
switch (Opc) {
3099930988
case ISD::SHL:
@@ -31002,16 +30991,16 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
3100230991
// prevent high bits of an even lane overflowing into low bits of an odd
3100330992
// lane.
3100430993
RLo = DAG.getBitcast(ExtVT, R);
31005-
RHi = DAG.getBitcast(
31006-
ExtVT, DAG.getVectorShuffle(VT, dl, zero, R, InterleaveIndices));
30994+
RHi = DAG.getNode(ISD::AND, dl, ExtVT, RLo,
30995+
DAG.getConstant(0xff00, dl, ExtVT));
3100730996
ShiftOp = X86ISD::VSHLV;
3100830997
break;
3100930998
case ISD::SRL:
3101030999
// Same idea as above, but this time we need to make sure no low bits of
3101131000
// an odd lane can overflow into high bits of an even lane.
31012-
RLo = DAG.getBitcast(
31013-
ExtVT, DAG.getVectorShuffle(VT, dl, R, zero, InterleaveIndices));
3101431001
RHi = DAG.getBitcast(ExtVT, R);
31002+
RLo = DAG.getNode(ISD::AND, dl, ExtVT, RHi,
31003+
DAG.getConstant(0x00ff, dl, ExtVT));
3101531004
ShiftOp = X86ISD::VSRLV;
3101631005
break;
3101731006
case ISD::SRA:
@@ -31020,8 +31009,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
3102031009
// depending on the sign bit of the original lane. We do this using 2
3102131010
// immediate shifts.
3102231011
RHi = DAG.getBitcast(ExtVT, R);
31023-
RLo = DAG.getNode(X86ISD::VSHLI, dl, ExtVT, RHi, eight);
31024-
RLo = DAG.getNode(X86ISD::VSRAI, dl, ExtVT, RLo, eight);
31012+
RLo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, RHi, 8, DAG);
31013+
RLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExtVT, RLo, 8, DAG);
3102531014
ShiftOp = X86ISD::VSRAV;
3102631015
break;
3102731016
default:
@@ -31034,8 +31023,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
3103431023
SDValue ShiftedHi =
3103531024
DAG.getBitcast(VT, DAG.getNode(ShiftOp, dl, ExtVT, RHi, AmtHi));
3103631025

31037-
return DAG.getVectorShuffle(VT, dl, ShiftedLo, ShiftedHi,
31038-
InterleaveIndices);
31026+
// To merge the shifted vectors back together, we select even lanes
31027+
// from ShiftedLo and odd lanes from ShiftedHi.
31028+
SDValue SelectMask = DAG.getBitcast(
31029+
MVT::v64i1, DAG.getConstant(0x5555555555555555, dl, MVT::i64));
31030+
return DAG.getSelect(dl, VT, SelectMask, ShiftedLo, ShiftedHi);
3103931031
}
3104031032

3104131033
if (VT == MVT::v16i8 ||

llvm/test/CodeGen/X86/gfni-shifts.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,12 +1684,12 @@ define <64 x i8> @var_shl_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
16841684
;
16851685
; GFNIAVX512BW-LABEL: var_shl_v64i8:
16861686
; GFNIAVX512BW: # %bb.0:
1687-
; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm2
1688-
; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm3
1689-
; GFNIAVX512BW-NEXT: vpsllvw %zmm2, %zmm3, %zmm2
1690-
; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
1687+
; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm2
1688+
; GFNIAVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm2
1689+
; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
1690+
; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
16911691
; GFNIAVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
1692-
; GFNIAVX512BW-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA
1692+
; GFNIAVX512BW-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
16931693
; GFNIAVX512BW-NEXT: kmovq %rax, %k1
16941694
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
16951695
; GFNIAVX512BW-NEXT: retq
@@ -1875,16 +1875,15 @@ define <64 x i8> @var_lshr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
18751875
;
18761876
; GFNIAVX512BW-LABEL: var_lshr_v64i8:
18771877
; GFNIAVX512BW: # %bb.0:
1878-
; GFNIAVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1878+
; GFNIAVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
18791879
; GFNIAVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
18801880
; GFNIAVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm2
18811881
; GFNIAVX512BW-NEXT: vpsrlvw %zmm3, %zmm2, %zmm2
18821882
; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
18831883
; GFNIAVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
1884-
; GFNIAVX512BW-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA
1884+
; GFNIAVX512BW-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
18851885
; GFNIAVX512BW-NEXT: kmovq %rax, %k1
1886-
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1}
1887-
; GFNIAVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
1886+
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
18881887
; GFNIAVX512BW-NEXT: retq
18891888
%shift = lshr <64 x i8> %a, %b
18901889
ret <64 x i8> %shift
@@ -2238,9 +2237,10 @@ define <64 x i8> @var_ashr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
22382237
; GFNIAVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0
22392238
; GFNIAVX512BW-NEXT: vpsraw $8, %zmm0, %zmm0
22402239
; GFNIAVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
2241-
; GFNIAVX512BW-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA
2240+
; GFNIAVX512BW-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
22422241
; GFNIAVX512BW-NEXT: kmovq %rax, %k1
2243-
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
2242+
; GFNIAVX512BW-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1}
2243+
; GFNIAVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
22442244
; GFNIAVX512BW-NEXT: retq
22452245
%shift = ashr <64 x i8> %a, %b
22462246
ret <64 x i8> %shift

llvm/test/CodeGen/X86/vector-shift-ashr-512.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,10 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
112112
; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0
113113
; AVX512BW-NEXT: vpsraw $8, %zmm0, %zmm0
114114
; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
115-
; AVX512BW-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA
115+
; AVX512BW-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
116116
; AVX512BW-NEXT: kmovq %rax, %k1
117-
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
117+
; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1}
118+
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
118119
; AVX512BW-NEXT: retq
119120
%shift = ashr <64 x i8> %a, %b
120121
ret <64 x i8> %shift

llvm/test/CodeGen/X86/vector-shift-lshr-512.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,15 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
8585
;
8686
; AVX512BW-LABEL: var_shift_v64i8:
8787
; AVX512BW: # %bb.0:
88-
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
88+
; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
8989
; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3
9090
; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm2
9191
; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm2, %zmm2
9292
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
9393
; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
94-
; AVX512BW-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA
94+
; AVX512BW-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
9595
; AVX512BW-NEXT: kmovq %rax, %k1
96-
; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1}
97-
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
96+
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
9897
; AVX512BW-NEXT: retq
9998
%shift = lshr <64 x i8> %a, %b
10099
ret <64 x i8> %shift

llvm/test/CodeGen/X86/vector-shift-shl-512.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,12 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
8282
;
8383
; AVX512BW-LABEL: var_shift_v64i8:
8484
; AVX512BW: # %bb.0:
85-
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm2
86-
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm3
87-
; AVX512BW-NEXT: vpsllvw %zmm2, %zmm3, %zmm2
88-
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
85+
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm2
86+
; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm2
87+
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
88+
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
8989
; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
90-
; AVX512BW-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA
90+
; AVX512BW-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555
9191
; AVX512BW-NEXT: kmovq %rax, %k1
9292
; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
9393
; AVX512BW-NEXT: retq

0 commit comments

Comments
 (0)