diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 944a11b96325d..e595347d62bf5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -19,6 +19,7 @@ #include "VPlan.h" #include "LoopVectorizationPlanner.h" #include "VPlanCFG.h" +#include "VPlanDominatorTree.h" #include "VPlanHelpers.h" #include "VPlanPatternMatch.h" #include "VPlanTransforms.h" @@ -221,7 +222,7 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI, Loop *CurrentParentLoop, Type *CanonicalIVTy) : TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan), CurrentParentLoop(CurrentParentLoop), LVer(nullptr), - TypeAnalysis(CanonicalIVTy) {} + TypeAnalysis(CanonicalIVTy), VPDT(*Plan) {} Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) { if (Def->isLiveIn()) @@ -264,7 +265,11 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { return Data.VPV2Vector[Def]; auto GetBroadcastInstrs = [this, Def](Value *V) { - bool SafeToHoist = Def->isDefinedOutsideLoopRegions(); + bool SafeToHoist = + !Def->hasDefiningRecipe() || + VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(), + Plan->getVectorPreheader()); + if (VF.isScalar()) return V; // Place the code for broadcasting invariant variables in the new preheader. @@ -929,6 +934,10 @@ void VPlan::execute(VPTransformState *State) { State->CFG.PrevVPBB = nullptr; State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); + // Update VPDominatorTree since VPBasicBlock may be removed after State was + // constructed. + State->VPDT.recalculate(*this); + // Disconnect VectorPreHeader from ExitBB in both the CFG and DT. BasicBlock *VectorPreHeader = State->CFG.PrevBB; cast(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr); diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h index 787cd940c4e26..995c6b8b2c2fb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h +++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h @@ -39,8 +39,10 @@ class VPDominatorTree : public DominatorTreeBase { public: VPDominatorTree() = default; + explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); } /// Returns true if \p A properly dominates \p B. + using Base::properlyDominates; bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B); }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 8bdbf556efbb3..d53b72bb2258a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -16,6 +16,7 @@ #define LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H #include "VPlanAnalysis.h" +#include "VPlanDominatorTree.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -360,6 +361,9 @@ struct VPTransformState { /// VPlan-based type analysis. VPTypeAnalysis TypeAnalysis; + + /// VPlan-based dominator tree. + VPDominatorTree VPDT; }; /// Struct to hold various analysis needed for cost computations. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll index f969be80c3c00..ab21794baa924 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -23,9 +23,9 @@ ; Check that the extractvalue operands are actually free in vector code. ; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0 -; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1 ; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0 ; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer +; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1 ; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0 ; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer ; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2 @@ -75,9 +75,9 @@ declare float @powf(float, float) readnone nounwind ; FORCED-LABEL: define void @test_getVectorCallCost ; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0 -; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1 ; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0 ; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer +; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1 ; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0 ; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index 91dd6e475ec47..473fabfc9fecc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -5,9 +5,9 @@ target triple = "aarch64-unknown-linux-gnu" define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-LABEL: @widen_extractvalue( ; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0 -; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[EXTRACT0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1 ; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[EXTRACT1]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK: [[ADD:%.*]] = add [[DOTSPLAT]], [[DOTSPLAT2]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index 000c1570ef92d..36ae8493951bd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -276,10 +276,10 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq [[TMP7]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[X]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP9]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, ptr [[DST]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement poison, ptr [[TMP10]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT6]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, ptr [[DST]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]