Skip to content

Commit adb5d6a

Browse files
authored
[X86] use lowerShuffleWithPERMV helper to create VPERMV/VPERMV3 nodes (#129882)
This allows us to make use of the extra canonicalization that lowerShuffleWithPERMV performs
1 parent 844a1d5 commit adb5d6a

File tree

2 files changed

+16
-27
lines changed

2 files changed

+16
-27
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -42610,7 +42610,6 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4261042610
// Combine VPERMV3 to widened VPERMV if the two source operands can be
4261142611
// freely concatenated.
4261242612
MVT WideVT = VT.getDoubleNumVectorElementsVT();
42613-
MVT MaskVT = N.getOperand(1).getSimpleValueType();
4261442613
bool CanConcat = VT.is128BitVector() ||
4261542614
(VT.is256BitVector() && Subtarget.useAVX512Regs());
4261642615
if (CanConcat) {
@@ -42634,12 +42633,10 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4263442633
DL, WideVT, {N.getOperand(2), N.getOperand(0)}, DAG, DCI,
4263542634
Subtarget)) {
4263642635
ShuffleVectorSDNode::commuteMask(Mask);
42637-
SDValue NewMask =
42638-
getConstVector(Mask, MaskVT, DAG, DL, /*IsMask=*/true);
42639-
NewMask = widenSubVector(NewMask, false, Subtarget, DAG, DL,
42640-
WideVT.getSizeInBits());
42636+
Mask.append(NumElts, SM_SentinelUndef);
4264142637
SDValue Perm =
42642-
DAG.getNode(X86ISD::VPERMV, DL, WideVT, NewMask, ConcatSrc);
42638+
lowerShuffleWithPERMV(DL, WideVT, Mask, ConcatSrc,
42639+
DAG.getUNDEF(WideVT), Subtarget, DAG);
4264342640
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
4264442641
DAG.getVectorIdxConstant(0, DL));
4264542642
}
@@ -42649,10 +42646,9 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4264942646
// Canonicalize to VPERMV if both sources are the same.
4265042647
if (V1 == V2) {
4265142648
for (int &M : Mask)
42652-
M = (M < 0 ? M : M & (Mask.size() - 1));
42653-
SDValue NewMask = getConstVector(Mask, MaskVT, DAG, DL,
42654-
/*IsMask=*/true);
42655-
return DAG.getNode(X86ISD::VPERMV, DL, VT, NewMask, N.getOperand(0));
42649+
M = (M < 0 ? M : (M & (NumElts - 1)));
42650+
return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(0),
42651+
DAG.getUNDEF(VT), Subtarget, DAG);
4265642652
}
4265742653
// If sources are half width, then concat and use VPERMV with adjusted
4265842654
// mask.
@@ -42667,19 +42663,16 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4266742663
combineConcatVectorOps(DL, VT, Ops, DAG, DCI, Subtarget)) {
4266842664
for (int &M : Mask)
4266942665
M = (M < (int)NumElts ? M : (M - (NumElts / 2)));
42670-
SDValue NewMask = getConstVector(Mask, MaskVT, DAG, DL,
42671-
/*IsMask=*/true);
42672-
return DAG.getNode(X86ISD::VPERMV, DL, VT, NewMask, ConcatSrc);
42666+
return lowerShuffleWithPERMV(DL, VT, Mask, ConcatSrc,
42667+
DAG.getUNDEF(VT), Subtarget, DAG);
4267342668
}
4267442669
}
4267542670
// Commute foldable source to the RHS.
4267642671
if (isShuffleFoldableLoad(N.getOperand(0)) &&
4267742672
!isShuffleFoldableLoad(N.getOperand(2))) {
4267842673
ShuffleVectorSDNode::commuteMask(Mask);
42679-
SDValue NewMask =
42680-
getConstVector(Mask, MaskVT, DAG, DL, /*IsMask=*/true);
42681-
return DAG.getNode(X86ISD::VPERMV3, DL, VT, N.getOperand(2), NewMask,
42682-
N.getOperand(0));
42674+
return lowerShuffleWithPERMV(DL, VT, Mask, N.getOperand(2),
42675+
N.getOperand(0), Subtarget, DAG);
4268342676
}
4268442677
}
4268542678
return SDValue();
@@ -58048,10 +58041,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5804858041
if (ConcatMask.size() == (NumOps * NumSrcElts)) {
5804958042
SDValue Src = concatSubVectors(Ops[0].getOperand(1),
5805058043
Ops[1].getOperand(1), DAG, DL);
58051-
MVT IntMaskSVT = MVT::getIntegerVT(EltSizeInBits);
58052-
MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
58053-
SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
58054-
return DAG.getNode(X86ISD::VPERMV, DL, VT, Mask, Src);
58044+
return lowerShuffleWithPERMV(DL, VT, ConcatMask, Src,
58045+
DAG.getUNDEF(VT), Subtarget, DAG);
5805558046
}
5805658047
}
5805758048
break;
@@ -58080,10 +58071,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5808058071
if (ConcatMask.size() == (NumOps * NumSrcElts)) {
5808158072
SDValue Src0 = ConcatSubOperand(VT, Ops, 0);
5808258073
SDValue Src1 = ConcatSubOperand(VT, Ops, 2);
58083-
MVT IntMaskSVT = MVT::getIntegerVT(EltSizeInBits);
58084-
MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
58085-
SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
58086-
return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1);
58074+
return lowerShuffleWithPERMV(DL, VT, ConcatMask, Src0, Src1,
58075+
Subtarget, DAG);
5808758076
}
5808858077
}
5808958078
break;

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3908,7 +3908,7 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
39083908
; AVX512BW: # %bb.0:
39093909
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
39103910
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
3911-
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,25,26,27,28,29,0,31,0,0,0,0,0,0,0,0]
3911+
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} xmm1 = [0,25,26,27,28,29,0,31]
39123912
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm1
39133913
; AVX512BW-NEXT: vpbroadcastw %xmm0, %ymm0
39143914
; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
@@ -4146,7 +4146,7 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
41464146
; AVX512BW: # %bb.0:
41474147
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
41484148
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
4149-
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,25,26,27,28,29,30,31,0,0,0,0,0,0,0,0]
4149+
; AVX512BW-NEXT: vpmovsxbw {{.*#+}} xmm1 = [0,25,26,27,28,29,30,31]
41504150
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
41514151
; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
41524152
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)

0 commit comments

Comments
 (0)