diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 275b3d5678560..208cc7fa309e3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -488,8 +488,9 @@ class InnerLoopVectorizer { : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), AC(AC), ORE(ORE), VF(VecWidth), MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor), - Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI), - RTChecks(RTChecks), Plan(Plan), + Folder(PSE.getSE()->getDataLayout()), + Builder(PSE.getSE()->getContext(), Folder), Cost(CM), BFI(BFI), + PSI(PSI), RTChecks(RTChecks), Plan(Plan), VectorPHVPB(Plan.getEntry()->getSingleSuccessor()) {} virtual ~InnerLoopVectorizer() = default; @@ -597,8 +598,11 @@ class InnerLoopVectorizer { /// many different vector instructions. unsigned UF; - /// The builder that we use - IRBuilder<> Builder; + /// The folder that we use for the builder. + InstSimplifyFolder Folder; + + /// The builder that we use. + IRBuilder Builder; // --- Vectorization state --- diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index e634de1e17c69..43cfb3b28e417 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -775,29 +775,37 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { void applyFlags(Instruction &I) const { switch (OpType) { case OperationType::OverflowingBinOp: - I.setHasNoUnsignedWrap(WrapFlags.HasNUW); - I.setHasNoSignedWrap(WrapFlags.HasNSW); + if (isa(I) || isa(I)) { + I.setHasNoUnsignedWrap(WrapFlags.HasNUW); + I.setHasNoSignedWrap(WrapFlags.HasNSW); + } break; case OperationType::DisjointOp: - cast(&I)->setIsDisjoint(DisjointFlags.IsDisjoint); + if (auto *Disjoint = dyn_cast(&I)) + Disjoint->setIsDisjoint(DisjointFlags.IsDisjoint); break; case OperationType::PossiblyExactOp: - I.setIsExact(ExactFlags.IsExact); + if (isa(I)) + I.setIsExact(ExactFlags.IsExact); break; case OperationType::GEPOp: - cast(&I)->setNoWrapFlags(GEPFlags); + if (auto *GEP = dyn_cast(&I)) + GEP->setNoWrapFlags(GEPFlags); break; case OperationType::FPMathOp: - I.setHasAllowReassoc(FMFs.AllowReassoc); - I.setHasNoNaNs(FMFs.NoNaNs); - I.setHasNoInfs(FMFs.NoInfs); - I.setHasNoSignedZeros(FMFs.NoSignedZeros); - I.setHasAllowReciprocal(FMFs.AllowReciprocal); - I.setHasAllowContract(FMFs.AllowContract); - I.setHasApproxFunc(FMFs.ApproxFunc); + if (isa(I)) { + I.setHasAllowReassoc(FMFs.AllowReassoc); + I.setHasNoNaNs(FMFs.NoNaNs); + I.setHasNoInfs(FMFs.NoInfs); + I.setHasNoSignedZeros(FMFs.NoSignedZeros); + I.setHasAllowReciprocal(FMFs.AllowReciprocal); + I.setHasAllowContract(FMFs.AllowContract); + I.setHasApproxFunc(FMFs.ApproxFunc); + } break; case OperationType::NonNegOp: - I.setNonNeg(NonNegFlags.NonNeg); + if (isa(I)) + I.setNonNeg(NonNegFlags.NonNeg); break; case OperationType::Cmp: case OperationType::Other: diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index 2f60fb4c1b07b..63b1829ffeeb1 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -7,19 +7,17 @@ define void @pr63340(ptr %A, ptr %B) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 1 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 1 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -; CHECK-NEXT: store <4 x ptr> [[DOTSPLAT]], ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[OFFSET_IDX]] +; CHECK-NEXT: store <4 x ptr> [[DOTSPLAT]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 false, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll b/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll index 072ff23431e07..86187e3074285 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll @@ -12,13 +12,11 @@ define void @powi_only_first_lane_used_of_second_arg(ptr %p, i32 %pow) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[P]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[WIDE_LOAD]], i32 [[POW]]) -; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[TMP1]], align 4 +; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 false, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: