diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5e7f6523cd86d..95512e6bc628d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4278,6 +4278,25 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { if (!VPI) continue; switch (VPI->getOpcode()) { + // Selects are only modelled in the legacy cost model for safe + // divisors. + case Instruction::Select: { + VPValue *VPV = VPI->getVPSingleValue(); + if (VPV->getNumUsers() == 1) { + if (auto *WR = dyn_cast(*VPV->user_begin())) { + switch (WR->getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + continue; + default: + break; + } + } + } + [[fallthrough]]; + } case VPInstruction::ActiveLaneMask: case VPInstruction::ExplicitVectorLength: C += VPI->cost(VF, CostCtx); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f8fde0500b77a..63ed2d44ed8ff 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1029,6 +1029,19 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, } switch (getOpcode()) { + case Instruction::Select: { + // TODO: It may be possible to improve this by analyzing where the + // condition operand comes from. + CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; + auto *CondTy = Ctx.Types.inferScalarType(getOperand(0)); + auto *VecTy = Ctx.Types.inferScalarType(getOperand(1)); + if (!vputils::onlyFirstLaneUsed(this)) { + CondTy = toVectorTy(CondTy, VF); + VecTy = toVectorTy(VecTy, VF); + } + return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VecTy, CondTy, Pred, + Ctx.CostKind); + } case Instruction::ExtractElement: case VPInstruction::ExtractLane: { // Add on the cost of extracting the element. @@ -2099,8 +2112,10 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, case Instruction::SDiv: case Instruction::SRem: case Instruction::URem: - // More complex computation, let the legacy cost-model handle this for now. - return Ctx.getLegacyCost(cast(getUnderlyingValue()), VF); + // If the div/rem operation isn't safe to speculate and requires + // predication, then the only way we can even create a vplan is to insert + // a select on the second input operand to ensure we use the value of 1 + // for the inactive lanes. The select will be costed separately. case Instruction::FNeg: case Instruction::Add: case Instruction::FAdd: