Skip to content

Commit 92083e8

Browse files
committed
[X86] Allow VPERMV3 -> VPERMV folds to handle extraction from a wider source vector (e.g. v16i32 -> v4i32)
We don't need to restrict this to double width vectors, as long as we correctly bitcast the types Improves the fix for #97968
1 parent 8ac6b41 commit 92083e8

File tree

2 files changed

+18
-20
lines changed

2 files changed

+18
-20
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -41336,29 +41336,27 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4133641336
case X86ISD::VPERMV3: {
4133741337
// Combine VPERMV3 to widened VPERMV if the two source operands are split
4133841338
// from the same vector.
41339-
// TODO: Handle extraction from a wider source vector (e.g. v16i32 -> v4i32).
4134041339
SDValue V1 = peekThroughBitcasts(N.getOperand(0));
4134141340
SDValue V2 = peekThroughBitcasts(N.getOperand(2));
4134241341
MVT SVT = V1.getSimpleValueType();
41343-
MVT NVT = VT.getDoubleNumVectorElementsVT();
41344-
if ((NVT.is256BitVector() ||
41345-
(NVT.is512BitVector() && Subtarget.hasEVEX512())) &&
41346-
V1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
41342+
if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4134741343
V1.getConstantOperandVal(1) == 0 &&
4134841344
V2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4134941345
V2.getConstantOperandVal(1) == SVT.getVectorNumElements() &&
41350-
V1.getOperand(0) == V2.getOperand(0) &&
41351-
V1.getOperand(0).getValueSizeInBits() == NVT.getSizeInBits()) {
41352-
SDValue Mask =
41353-
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NVT, DAG.getUNDEF(NVT),
41354-
N.getOperand(1), DAG.getIntPtrConstant(0, DL));
41355-
return DAG.getNode(
41356-
ISD::EXTRACT_SUBVECTOR, DL, VT,
41357-
DAG.getNode(X86ISD::VPERMV, DL, NVT, Mask,
41358-
DAG.getBitcast(NVT, V1.getOperand(0))),
41359-
DAG.getIntPtrConstant(0, DL));
41346+
V1.getOperand(0) == V2.getOperand(0)) {
41347+
EVT NVT = V1.getOperand(0).getValueType();
41348+
if (NVT.is256BitVector() ||
41349+
(NVT.is512BitVector() && Subtarget.hasEVEX512())) {
41350+
MVT WideVT = MVT::getVectorVT(
41351+
VT.getScalarType(), NVT.getSizeInBits() / VT.getScalarSizeInBits());
41352+
SDValue Mask = widenSubVector(N.getOperand(1), false, Subtarget, DAG,
41353+
DL, WideVT.getSizeInBits());
41354+
SDValue Perm = DAG.getNode(X86ISD::VPERMV, DL, WideVT, Mask,
41355+
DAG.getBitcast(WideVT, V1.getOperand(0)));
41356+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,
41357+
DAG.getIntPtrConstant(0, DL));
41358+
}
4136041359
}
41361-
4136241360
return SDValue();
4136341361
}
4136441362
default:

llvm/test/CodeGen/X86/pr97968.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
define <2 x i32> @PR97968(<16 x i32> %a0) {
55
; CHECK-LABEL: PR97968:
66
; CHECK: # %bb.0:
7-
; CHECK-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,7,2,7]
8-
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2
9-
; CHECK-NEXT: vpermi2d %xmm2, %xmm0, %xmm1
10-
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
7+
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [2,7,2,7]
8+
; CHECK-NEXT: # xmm1 = mem[0,0]
9+
; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
10+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1111
; CHECK-NEXT: vzeroupper
1212
; CHECK-NEXT: retq
1313
%sub0 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

0 commit comments

Comments
 (0)