-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[VPlan] Add ReductionStartVector VPInstruction. #142290
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
53ef5ca
3c2e248
3c59e31
7e2d7a0
a44ea79
ded12fd
67ff597
f56cfac
332661d
8f86901
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -89,6 +89,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { | |||||||||||||||
| inferScalarType(R->getOperand(1)) && | ||||||||||||||||
| "different types inferred for different operands"); | ||||||||||||||||
| return IntegerType::get(Ctx, 1); | ||||||||||||||||
| case VPInstruction::ReductionStartVector: | ||||||||||||||||
| return inferScalarType(R->getOperand(0)); | ||||||||||||||||
| case VPInstruction::ComputeAnyOfResult: | ||||||||||||||||
| case VPInstruction::ComputeFindLastIVResult: | ||||||||||||||||
| case VPInstruction::ComputeReductionResult: { | ||||||||||||||||
|
|
@@ -395,6 +397,10 @@ static unsigned getVFScaleFactor(VPRecipeBase *R) { | |||||||||||||||
| return RR->getVFScaleFactor(); | ||||||||||||||||
| if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R)) | ||||||||||||||||
| return RR->getVFScaleFactor(); | ||||||||||||||||
| if (auto *VPI = dyn_cast<VPInstruction>(R)) | ||||||||||||||||
| assert( | ||||||||||||||||
| VPI->getOpcode() != VPInstruction::ReductionStartVector && | ||||||||||||||||
| "getting scaling factor of reduction-start-vector not implemented yet"); | ||||||||||||||||
|
||||||||||||||||
| if (auto *VPI = dyn_cast<VPInstruction>(R)) | |
| assert( | |
| VPI->getOpcode() != VPInstruction::ReductionStartVector && | |
| "getting scaling factor of reduction-start-vector not implemented yet"); | |
| assert((!isa<VPInstruction>(R) || | |
| cast<VPInstruction>(R)->getOpcode() != VPInstruction::ReductionStartVector) && | |
| "getting scaling factor of reduction-start-vector not implemented yet"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated thanks
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -604,6 +604,20 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
| return Builder.CreateVectorSplat( | ||
| State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast"); | ||
| } | ||
| case VPInstruction::ReductionStartVector: { | ||
| if (State.VF.isScalar()) | ||
| return State.get(getOperand(0), true); | ||
| IRBuilderBase::FastMathFlagGuard FMFG(Builder); | ||
| Builder.setFastMathFlags(getFastMathFlags()); | ||
| // If this start vector is scaled then it should produce a vector with fewer | ||
| // elements than the VF. | ||
| ElementCount VF = State.VF.divideCoefficientBy( | ||
| cast<ConstantInt>(getOperand(2)->getLiveInIRValue())->getZExtValue()); | ||
| auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true)); | ||
| Constant *Zero = Builder.getInt32(0); | ||
| return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true), | ||
| Zero); | ||
| } | ||
| case VPInstruction::ComputeAnyOfResult: { | ||
| // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary | ||
| // and will be removed by breaking up the recipe further. | ||
|
|
@@ -900,6 +914,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { | |
| case VPInstruction::PtrAdd: | ||
| case VPInstruction::WideIVStep: | ||
| case VPInstruction::StepVector: | ||
| case VPInstruction::ReductionStartVector: | ||
| return false; | ||
| default: | ||
| return true; | ||
|
|
@@ -930,6 +945,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
| case VPInstruction::CanonicalIVIncrementForPart: | ||
| case VPInstruction::BranchOnCount: | ||
| case VPInstruction::BranchOnCond: | ||
| case VPInstruction::ReductionStartVector: | ||
| return true; | ||
| case VPInstruction::PtrAdd: | ||
| return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); | ||
|
|
@@ -1035,6 +1051,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |
| case VPInstruction::FirstActiveLane: | ||
| O << "first-active-lane"; | ||
| break; | ||
| case VPInstruction::ReductionStartVector: | ||
| O << "reduction-start-vector"; | ||
| break; | ||
| default: | ||
| O << Instruction::getOpcodeName(getOpcode()); | ||
| } | ||
|
|
@@ -1618,6 +1637,7 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const { | |
| Opcode == Instruction::FDiv || Opcode == Instruction::FRem || | ||
| Opcode == Instruction::FCmp || Opcode == Instruction::Select || | ||
| Opcode == VPInstruction::WideIVStep || | ||
| Opcode == VPInstruction::ReductionStartVector || | ||
| Opcode == VPInstruction::ComputeReductionResult; | ||
| case OperationType::NonNegOp: | ||
| return Opcode == Instruction::ZExt; | ||
|
|
@@ -3848,17 +3868,19 @@ void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, | |
| #endif | ||
|
|
||
| void VPReductionPHIRecipe::execute(VPTransformState &State) { | ||
| // If this phi is fed by a scaled reduction then it should output a | ||
| // vector with fewer elements than the VF. | ||
| ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor); | ||
|
||
| // Reductions do not have to start at zero. They can start with | ||
| // any loop invariant values. | ||
| VPValue *StartVPV = getStartValue(); | ||
|
|
||
| // In order to support recurrences we need to be able to vectorize Phi nodes. | ||
| // Phi nodes have cycles, so we need to vectorize them in two stages. This is | ||
| // stage #1: We create a new vector PHI node with no incoming edges. We'll use | ||
| // this value when we vectorize all of the instructions that use the PHI. | ||
| auto *ScalarTy = State.TypeAnalysis.inferScalarType(this); | ||
| BasicBlock *VectorPH = | ||
| State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0)); | ||
| bool ScalarPHI = State.VF.isScalar() || IsInLoop; | ||
| Type *VecTy = ScalarPHI ? ScalarTy : VectorType::get(ScalarTy, VF); | ||
| Value *StartV = State.get(StartVPV, ScalarPHI); | ||
| Type *VecTy = StartV->getType(); | ||
|
|
||
| BasicBlock *HeaderBB = State.CFG.PrevBB; | ||
| assert(State.CurrentParentLoop->getHeader() == HeaderBB && | ||
|
|
@@ -3867,49 +3889,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) { | |
| Phi->insertBefore(HeaderBB->getFirstInsertionPt()); | ||
| State.set(this, Phi, IsInLoop); | ||
|
|
||
| BasicBlock *VectorPH = | ||
| State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0)); | ||
| // Create start and identity vector values for the reduction in the preheader. | ||
| // TODO: Introduce recipes in VPlan preheader to create initial values. | ||
| IRBuilderBase::InsertPointGuard IPBuilder(State.Builder); | ||
| State.Builder.SetInsertPoint(VectorPH->getTerminator()); | ||
|
|
||
| // Reductions do not have to start at zero. They can start with | ||
| // any loop invariant values. | ||
| VPValue *StartVPV = getStartValue(); | ||
| RecurKind RK = RdxDesc.getRecurrenceKind(); | ||
| if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || | ||
| RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || | ||
| RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { | ||
| // [I|F]FindLastIV will use a sentinel value to initialize the reduction | ||
| // phi or the resume value from the main vector loop when vectorizing the | ||
| // epilogue loop. In the exit block, ComputeReductionResult will generate | ||
| // checks to verify if the reduction result is the sentinel value. If the | ||
| // result is the sentinel value, it will be corrected back to the start | ||
| // value. | ||
| // TODO: The sentinel value is not always necessary. When the start value is | ||
| // a constant, and smaller than the start value of the induction variable, | ||
| // the start value can be directly used to initialize the reduction phi. | ||
| Phi->addIncoming(State.get(StartVPV, ScalarPHI), VectorPH); | ||
| return; | ||
| } | ||
|
|
||
| Value *Iden = getRecurrenceIdentity(RK, VecTy->getScalarType(), | ||
| RdxDesc.getFastMathFlags()); | ||
| unsigned CurrentPart = getUnrollPart(*this); | ||
| Value *StartV = StartVPV->getLiveInIRValue(); | ||
| if (!ScalarPHI) { | ||
| if (CurrentPart == 0) { | ||
| Iden = State.Builder.CreateVectorSplat(VF, Iden); | ||
| Constant *Zero = State.Builder.getInt32(0); | ||
| StartV = State.Builder.CreateInsertElement(Iden, StartV, Zero); | ||
| } else { | ||
| Iden = State.Builder.CreateVectorSplat(VF, Iden); | ||
| } | ||
| } | ||
|
|
||
| Value *StartVal = (CurrentPart == 0) ? StartV : Iden; | ||
| Phi->addIncoming(StartVal, VectorPH); | ||
| Phi->addIncoming(StartV, VectorPH); | ||
| } | ||
|
|
||
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code matches Instruction::ExtractElement and Instruction::Freeze, maybe we can combine them to avoid duplication?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense to move
case VPInstruction::ReductionStartVector:belowcase Instruction::Freeze:so they can share the same returninferScalarType(R->getOperand(0))statement?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved thanks!