Skip to content

Commit 24989a2

Browse files
Fix reviews
1 parent 4b44048 commit 24989a2

File tree

2 files changed

+53
-42
lines changed

2 files changed

+53
-42
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17350,24 +17350,23 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1735017350
return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, Subtarget, DAG);
1735117351
}
1735217352

17353-
static SDValue lowerShuffleAsVSELECT(const SDLoc &DL,
17354-
ArrayRef<int> RepeatedMask, SDValue V1,
17355-
SDValue V2, SelectionDAG &DAG) {
17353+
static SDValue lowerShuffleAsVSELECT(const SDLoc &DL, ArrayRef<int> Mask,
17354+
SDValue V1, SDValue V2,
17355+
SelectionDAG &DAG) {
1735617356
if (V1.getOpcode() != ISD::BUILD_VECTOR &&
1735717357
V2.getOpcode() != ISD::BUILD_VECTOR)
1735817358
return SDValue();
17359-
SDValue BuildVector;
17360-
if (V1.getOpcode() == ISD::BUILD_VECTOR) {
17361-
BuildVector = V1;
17362-
if (V2.getOpcode() != ISD::BITCAST)
17363-
return SDValue();
17364-
} else {
17365-
BuildVector = V2;
17366-
if (V1.getOpcode() != ISD::BITCAST)
17367-
return SDValue();
17368-
}
17359+
17360+
bool IsV1BuildVector = V1.getOpcode() == ISD::BUILD_VECTOR;
17361+
SDValue BuildVector = IsV1BuildVector ? V1 : V2;
17362+
1736917363
if (!ISD::isBuildVectorAllZeros(BuildVector.getNode()))
1737017364
return SDValue();
17365+
17366+
// This relates to the lowering of `_mm512_maskz_shuffle_epi32` intrinsic.
17367+
// The `BUILD_VECTOR` contains the zeroing mask. If the corresponding
17368+
// element is UNDEF, then the bit in mask is set. If it is zero, the
17369+
// corresponding bit in mask is zero.
1737117370
APInt DestMask(16, 0);
1737217371
for (unsigned i = 0; i < 16; ++i) {
1737317372
SDValue Op = BuildVector->getOperand(i);
@@ -17377,28 +17376,25 @@ static SDValue lowerShuffleAsVSELECT(const SDLoc &DL,
1737717376
if (DestMask.isZero())
1737817377
return SDValue();
1737917378

17380-
unsigned Imm8 = 0;
17381-
for (unsigned i = 0; i < 4; ++i) {
17382-
if (V1.getOpcode() != ISD::BUILD_VECTOR) {
17383-
if (RepeatedMask[i] >= 4) {
17384-
continue;
17385-
}
17386-
} else if (RepeatedMask[i] < 4) {
17387-
continue;
17388-
}
17389-
Imm8 += (RepeatedMask[i] % 4) << (2 * i);
17390-
}
17391-
1739217379
SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, MVT::v16i1,
1739317380
DAG.getConstant(DestMask, DL, MVT::i16));
1739417381

17395-
std::vector<SDValue> ZeroElements(16, DAG.getConstant(0, DL, MVT::i32));
17382+
SmallVector<SDValue, 16> ZeroElements(16, DAG.getConstant(0, DL, MVT::i32));
1739617383
SDValue Zeros = DAG.getBuildVector(MVT::v16i32, DL, ZeroElements);
1739717384

17385+
SmallVector<int, 16> NewMask(16);
17386+
for (int I = 0; I < 16; ++I) {
17387+
if (IsV1BuildVector) {
17388+
NewMask[I] = Mask[I] >= 16 ? Mask[I] - 16 : Mask[I] + 16;
17389+
} else {
17390+
NewMask[I] = Mask[I];
17391+
}
17392+
}
17393+
1739817394
return DAG.getNode(ISD::VSELECT, DL, MVT::v16i32, Bitcast,
17399-
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32,
17400-
V1.getOpcode() != ISD::BUILD_VECTOR ? V1 : V2,
17401-
DAG.getTargetConstant(Imm8, DL, MVT::i8)),
17395+
DAG.getVectorShuffle(MVT::v16i32, DL,
17396+
IsV1BuildVector ? V2 : V1,
17397+
DAG.getUNDEF(MVT::v16i32), NewMask),
1740217398
Zeros);
1740317399
}
1740417400

@@ -17448,7 +17444,7 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
1744817444
if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i32, V1, V2, Mask, DAG))
1744917445
return V;
1745017446

17451-
if (SDValue V = lowerShuffleAsVSELECT(DL, RepeatedMask, V1, V2, DAG))
17447+
if (SDValue V = lowerShuffleAsVSELECT(DL, Mask, V1, V2, DAG))
1745217448
return V;
1745317449
}
1745417450

llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -990,23 +990,38 @@ bb:
990990
ret void
991991
}
992992

993-
define <8 x i64> @pr121147(<8 x i64> %a) {
994-
; AVX512F-LABEL: pr121147:
995-
; AVX512F: # %bb.0: # %entry
993+
define <16 x i32> @gen_VPSHUFD_AVX512_0(<16 x i32> %a) {
994+
; AVX512F-LABEL: gen_VPSHUFD_AVX512_0:
995+
; AVX512F: # %bb.0:
996996
; AVX512F-NEXT: movw $-21846, %ax # imm = 0xAAAA
997997
; AVX512F-NEXT: kmovw %eax, %k1
998-
; AVX512F-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,2,0,2,4,6,4,6,8,10,8,10,12,14,12,14]
998+
; AVX512F-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,2,2,2,6,6,6,6,10,10,10,10,14,14,14,14]
999999
; AVX512F-NEXT: retq
10001000
;
1001-
; AVX512BW-LABEL: pr121147:
1002-
; AVX512BW: # %bb.0: # %entry
1001+
; AVX512BW-LABEL: gen_VPSHUFD_AVX512_0:
1002+
; AVX512BW: # %bb.0:
10031003
; AVX512BW-NEXT: movw $-21846, %ax # imm = 0xAAAA
10041004
; AVX512BW-NEXT: kmovd %eax, %k1
1005-
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,2,0,2,4,6,4,6,8,10,8,10,12,14,12,14]
1005+
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,2,2,2,6,6,6,6,10,10,10,10,14,14,14,14]
10061006
; AVX512BW-NEXT: retq
1007-
entry:
1008-
%0 = bitcast <8 x i64> %a to <16 x i32>
1009-
%1 = shufflevector <16 x i32> <i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison>, <16 x i32> %0, <16 x i32> <i32 0, i32 18, i32 2, i32 18, i32 4, i32 22, i32 6, i32 22, i32 8, i32 26, i32 10, i32 26, i32 12, i32 30, i32 14, i32 30>
1010-
%2 = bitcast <16 x i32> %1 to <8 x i64>
1011-
ret <8 x i64> %2
1007+
%res = shufflevector <16 x i32> <i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison>, <16 x i32> %a, <16 x i32> <i32 0, i32 18, i32 2, i32 18, i32 4, i32 22, i32 6, i32 22, i32 8, i32 26, i32 10, i32 26, i32 12, i32 30, i32 14, i32 30>
1008+
ret <16 x i32> %res
1009+
}
1010+
1011+
define <16 x i32> @gen_VPSHUFD_AVX512_1(<16 x i32> %a) {
1012+
; AVX512F-LABEL: gen_VPSHUFD_AVX512_1:
1013+
; AVX512F: # %bb.0:
1014+
; AVX512F-NEXT: movw $-21846, %ax # imm = 0xAAAA
1015+
; AVX512F-NEXT: kmovw %eax, %k1
1016+
; AVX512F-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,2,2,2,6,6,6,6,10,10,10,10,14,14,14,14]
1017+
; AVX512F-NEXT: retq
1018+
;
1019+
; AVX512BW-LABEL: gen_VPSHUFD_AVX512_1:
1020+
; AVX512BW: # %bb.0:
1021+
; AVX512BW-NEXT: movw $-21846, %ax # imm = 0xAAAA
1022+
; AVX512BW-NEXT: kmovd %eax, %k1
1023+
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,2,2,2,6,6,6,6,10,10,10,10,14,14,14,14]
1024+
; AVX512BW-NEXT: retq
1025+
%res = shufflevector <16 x i32> %a , <16 x i32> <i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison, i32 0, i32 poison>, <16 x i32> <i32 16, i32 2, i32 18, i32 2, i32 20, i32 6, i32 22, i32 6, i32 24, i32 10, i32 26, i32 10, i32 28, i32 14, i32 30, i32 14>
1026+
ret <16 x i32> %res
10121027
}

0 commit comments

Comments
 (0)