Skip to content

Commit a0551f8

Browse files
[X86] Generate vpmuludq instead of vpmullq
When lowering `_mm512_mul_epu32` intrinsic if the generated value if later used in a vector shuffle we generate `vpmullq` instead of `vpmuludq` (https://godbolt.org/z/WbaGMqs8e) because `SimplifyDemandedVectorElts` simplifies the arguments and we fail the combine to `PMULDQ`. Added an override to `shouldSimplifyDemandedVectorElts` in `X86TargetLowering` to check if we can combine the `MUL` to `PMULDQ` first.
1 parent bc87a53 commit a0551f8

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60832,3 +60832,24 @@ Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
6083260832
return Align(1ULL << ExperimentalPrefInnermostLoopAlignment);
6083360833
return TargetLowering::getPrefLoopAlignment();
6083460834
}
60835+
60836+
bool X86TargetLowering::shouldSimplifyDemandedVectorElts(
60837+
SDValue Op, const TargetLoweringOpt &TLO) const {
60838+
if (Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
60839+
SDValue V0 = peekThroughBitcasts(Op.getOperand(0));
60840+
SDValue V1 = peekThroughBitcasts(Op.getOperand(1));
60841+
60842+
if (V0.getOpcode() == ISD::MUL || V1.getOpcode() == ISD::MUL) {
60843+
SDNode *Mul = V0.getOpcode() == ISD::MUL ? V0.getNode() : V1.getNode();
60844+
SelectionDAG &DAG = TLO.DAG;
60845+
const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
60846+
const SDLoc DL(Mul);
60847+
60848+
if (SDValue V = combineMulToPMULDQ(Mul, DL, DAG, Subtarget)) {
60849+
DAG.ReplaceAllUsesWith(Mul, V.getNode());
60850+
return false;
60851+
}
60852+
}
60853+
}
60854+
return true;
60855+
}

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1207,6 +1207,9 @@ namespace llvm {
12071207

12081208
bool hasBitTest(SDValue X, SDValue Y) const override;
12091209

1210+
bool shouldSimplifyDemandedVectorElts(
1211+
SDValue Op, const TargetLoweringOpt &TLO) const override;
1212+
12101213
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
12111214
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
12121215
unsigned OldShiftOpcode, unsigned NewShiftOpcode,

0 commit comments

Comments
 (0)