diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 741392247c0d6..d89c05e22cc4c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -322,6 +322,17 @@ class VPBuilder { return createScalarCast(CastOp, Op, ResultTy, DL); } + VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, + DebugLoc DL) { + if (ResultTy == SrcTy) + return Op; + Instruction::CastOps CastOp = + ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits() + ? Instruction::Trunc + : Instruction::SExt; + return createScalarCast(CastOp, Op, ResultTy, DL); + } + VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) { VPIRFlags Flags; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fe60e97d44997..24a1d75b4d7fb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3783,9 +3783,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { }; /// A recipe for handling phi nodes of integer and floating-point inductions, -/// producing their scalar values. -class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, - public VPUnrollPartAccessor<3> { +/// producing their scalar values. Before unrolling the recipe has 3 operands: +/// IV, step and VF. Unrolling adds an extra operand StartIndex. +class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags { Instruction::BinaryOps InductionOpcode; public: @@ -3815,10 +3815,6 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, getDebugLoc()); } - /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that - /// this is only accurate after the VPlan has been unrolled. - bool isPart0() const { return getUnrollPart(*this) == 0; } - VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC) /// Generate the scalarized versions of the phi node as needed by their users. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1b1308c78c76e..47baa0f54c8ff 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2380,8 +2380,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { // iteration. bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this); // Compute the scalar steps and save the results in State. - Type *IntStepTy = - IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits()); unsigned StartLane = 0; unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue(); @@ -2390,20 +2388,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { EndLane = StartLane + 1; } Value *StartIdx0; - if (getUnrollPart(*this) == 0) - StartIdx0 = ConstantInt::get(IntStepTy, 0); - else { - StartIdx0 = State.get(getOperand(2), true); - if (getUnrollPart(*this) != 1) { - StartIdx0 = - Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(), - getUnrollPart(*this))); - } - StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy); - } - - if (BaseIVTy->isFloatingPointTy()) - StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy); + if (getNumOperands() == 3) { + StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0); + } else + StartIdx0 = State.get(getOperand(3), true); for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) { Value *StartIdx = Builder.CreateBinOp( diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a59c8cf9ea1ef..8432cf210807d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1459,7 +1459,9 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { // VPScalarIVSteps for part 0 can be replaced by their start value, if only // the first lane is demanded. if (auto *Steps = dyn_cast(Def)) { - if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) { + if ((Steps->getNumOperands() == 3 || + match(Steps->getOperand(3), m_ZeroInt())) && + vputils::onlyFirstLaneUsed(Steps)) { Steps->replaceAllUsesWith(Steps->getOperand(0)); return; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index f215476b1e163..a1b6440a67c4e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -53,6 +53,9 @@ class UnrollState { /// Unroll replicate region \p VPR by cloning the region UF - 1 times. void unrollReplicateRegionByUF(VPRegionBlock *VPR); + /// Add a start index operand to \p Steps for \p Part. + void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part); + /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across /// all parts. void unrollRecipeByUF(VPRecipeBase &R); @@ -123,6 +126,33 @@ class UnrollState { }; } // namespace +void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, + unsigned Part) { + if (Part == 0) { + Steps->addOperand(getConstantInt(Part)); + return; + } + + VPBuilder Builder(Steps); + Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0)); + Type *IntStepTy = + IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits()); + VPValue *StartIdx0 = Steps->getOperand(2); + StartIdx0 = Builder.createOverflowingOp( + Instruction::Mul, + {StartIdx0, + Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)}); + StartIdx0 = Builder.createScalarSExtOrTrunc( + StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0), + DebugLoc::getUnknown()); + + if (BaseIVTy->isFloatingPointTy()) + StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0, + BaseIVTy, DebugLoc::getUnknown()); + + Steps->addOperand(StartIdx0); +} + void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) { VPBlockBase *InsertPt = VPR->getSingleSuccessor(); for (unsigned Part = 1; Part != UF; ++Part) { @@ -136,9 +166,8 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) { VPBlockUtils::blocksOnly(Part0))) { for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) { remapOperands(&PartIR, Part); - if (auto *ScalarIVSteps = dyn_cast(&PartIR)) { - ScalarIVSteps->addOperand(getConstantInt(Part)); - } + if (auto *Steps = dyn_cast(&PartIR)) + addStartIndexForScalarSteps(Steps, Part); addRecipeForPart(&Part0R, &PartIR, Part); } @@ -311,10 +340,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { } remapOperands(Copy, Part); + if (auto *ScalarIVSteps = dyn_cast(Copy)) + addStartIndexForScalarSteps(ScalarIVSteps, Part); + // Add operand indicating the part to generate code for, to recipes still // requiring it. - if (isa(Copy) || + if (isa(Copy) || match(Copy, m_VPInstruction())) Copy->addOperand(getConstantInt(Part));