diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4a89f7dd8672e..0a8209ec3d9bf 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7306,6 +7306,10 @@ DenseMap LoopVectorizationPlanner::executePlan( BestVPlan); VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan); VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF); + VPlanTransforms::runPass(VPlanTransforms::unrollReplicateRegions, BestVPlan, + BestVF); + VPlanTransforms::runPass(VPlanTransforms::mergeBlocksIntoPredecessors, + BestVPlan); bool HasBranchWeights = hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()); if (HasBranchWeights) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6ca750fc53279..11f46d11087f3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3788,7 +3788,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their scalar values. class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, - public VPUnrollPartAccessor<3> { + public VPUnrollPartAccessor<4> { Instruction::BinaryOps InductionOpcode; public: @@ -3812,10 +3812,14 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, ~VPScalarIVStepsRecipe() override = default; VPScalarIVStepsRecipe *clone() override { - return new VPScalarIVStepsRecipe( + auto *NewR = new VPScalarIVStepsRecipe( getOperand(0), getOperand(1), getOperand(2), InductionOpcode, hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(), getDebugLoc()); + // Add lane/unroll-part operands, if present. + for (VPValue *Op : drop_begin(operands(), 3)) + NewR->addOperand(Op); + return NewR; } /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 6491a2ce6813b..68a8c0abf2682 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2368,7 +2368,16 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { if (State.Lane) { StartLane = State.Lane->getKnownLane(); EndLane = StartLane + 1; + } else if (getNumOperands() == 5) { + // Operand 3 is the Lane operand (when present after replicating by VF). + VPValue *Op3 = getOperand(3); + assert(Op3->isLiveIn() && "lane operand must be a live-in"); + auto *C = cast(Op3->getLiveInIRValue()); + unsigned Val = C->getZExtValue(); + StartLane = Val; + EndLane = Val + 1; } + Value *StartIdx0; if (getUnrollPart(*this) == 0) StartIdx0 = ConstantInt::get(IntStepTy, 0); @@ -2395,7 +2404,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { "scalable"); auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul); - State.set(this, Add, VPLane(Lane)); + if (State.Lane) + State.set(this, Add, VPLane(Lane)); + else + State.set(this, Add, VPLane(0)); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 827dd4b6439ae..7e750a3c13afa 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -481,7 +481,7 @@ static void addReplicateRegions(VPlan &Plan) { /// Remove redundant VPBasicBlocks by merging them into their predecessor if /// the predecessor has a single successor. -static bool mergeBlocksIntoPredecessors(VPlan &Plan) { +bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) { SmallVector WorkList; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getEntry()))) { @@ -1440,9 +1440,14 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { } // VPScalarIVSteps for part 0 can be replaced by their start value, if only - // the first lane is demanded. + // the first lane is demanded and both Lane and UnrollPart operands are 0. if (auto *Steps = dyn_cast(Def)) { - if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) { + bool LaneIsZero = Steps->getNumOperands() >= 4 && + match(Steps->getOperand(3), m_ZeroInt()); + bool PartIsZero = + Steps->getNumOperands() < 5 || match(Steps->getOperand(4), m_ZeroInt()); + if (Steps->isPart0() && LaneIsZero && PartIsZero && + vputils::onlyFirstLaneUsed(Steps)) { Steps->replaceAllUsesWith(Steps->getOperand(0)); return; } @@ -4314,9 +4319,9 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) { for (VPBasicBlock *VPBB : concat(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - if (!isa(&R)) + if (!isa(&R)) continue; - auto *DefR = cast(&R); + auto *DefR = cast(&R); auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) { VPRegionBlock *ParentRegion = cast(U)->getRegion(); return !U->usesScalars(DefR) || ParentRegion != LoopRegion; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index ae3797dee1f07..d9c4b1d96d6ea 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -178,6 +178,10 @@ struct VPlanTransforms { /// replicate regions, thereby dissolving the latter. static void replicateByVF(VPlan &Plan, ElementCount VF); + /// Replace replicate regions by explicitly replicating the regions' contents + /// \p VF times, each copy processing a single lane. + static void unrollReplicateRegions(VPlan &Plan, ElementCount VF); + /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the /// resulting plan to \p BestVF and \p BestUF. static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, @@ -189,6 +193,8 @@ struct VPlanTransforms { /// block merging. LLVM_ABI_FOR_TEST static void optimize(VPlan &Plan); + static bool mergeBlocksIntoPredecessors(VPlan &Plan); + /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then /// region block and remove the mask operand. Optimize the created regions by /// iteratively sinking scalar operands into the region, followed by merging diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index f215476b1e163..6673eaf9b67ae 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -24,6 +24,9 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "vplan" using namespace llvm; using namespace llvm::VPlanPatternMatch; @@ -121,6 +124,7 @@ class UnrollState { R->setOperand(OpIdx, getValueForPart(Op, Part)); } }; + } // namespace void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) { @@ -137,6 +141,7 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) { for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) { remapOperands(&PartIR, Part); if (auto *ScalarIVSteps = dyn_cast(&PartIR)) { + ScalarIVSteps->addOperand(getConstantInt(0)); ScalarIVSteps->addOperand(getConstantInt(Part)); } @@ -526,9 +531,21 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR, *RepR, RepR->getDebugLoc()); } else { - assert(isa(DefR) && + assert((isa(DefR)) && "DefR must be a VPReplicateRecipe or VPInstruction"); New = DefR->clone(); + if (isa(New)) { + // Add or update lane operand for VPScalarIVStepsRecipe. + if (NewOps.size() == 3) { + NewOps.push_back(Plan.getConstantInt(IdxTy, 0)); + New->addOperand(NewOps.back()); + } + NewOps.push_back(Plan.getConstantInt(IdxTy, Lane.getKnownLane())); + New->addOperand(NewOps.back()); + if (NewOps.size() == 5) + std::swap(NewOps[3], NewOps[4]); + } + for (const auto &[Idx, Op] : enumerate(NewOps)) { New->setOperand(Idx, Op); } @@ -558,7 +575,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { SmallVector ToRemove; for (VPBasicBlock *VPBB : VPBBsToUnroll) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - if (!isa(&R) || + if (!isa(&R) || (isa(&R) && cast(&R)->isSingleScalar()) || (isa(&R) && @@ -566,6 +583,19 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { cast(&R)->getOpcode() != VPInstruction::Unpack)) continue; + if (isa(&R) && Plan.hasScalarVFOnly()) { + // Add lane operand to VPScalarIVStepsRecipe only when the plan is + // scalar. + if (R.getNumOperands() == 4) { + R.addOperand(R.getOperand(3)); + R.setOperand(3, Plan.getConstantInt(IdxTy, 0)); + } else { + R.addOperand(Plan.getConstantInt(IdxTy, 0)); + R.addOperand(Plan.getConstantInt(IdxTy, 0)); + } + continue; + } + auto *DefR = cast(&R); VPBuilder Builder(DefR); if (DefR->getNumUsers() == 0) { @@ -608,3 +638,177 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { for (auto *R : reverse(ToRemove)) R->eraseFromParent(); } + +/// Process recipes in a single lane's blocks, updating them for lane-specific +/// operations. +static void processLane(VPlan &Plan, Type *IdxTy, unsigned Lane, + ElementCount VF, ArrayRef RegionBlocks, + DenseMap &Old2NewBlocks) { + DenseMap Old2NewVPValues; + for (VPBlockBase *OldVPB : RegionBlocks) { + auto *OldBB = cast(OldVPB); + auto *NewBB = cast(Old2NewBlocks.lookup(OldVPB)); + for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) { + for (const auto &[OldV, NewV] : + zip(OldR.definedValues(), NewR.definedValues())) + Old2NewVPValues[OldV] = NewV; + } + + // Update lane operands and remap operands to use copies for current lane. + for (VPRecipeBase &NewR : make_early_inc_range(*NewBB)) { + if (auto *Steps = dyn_cast(&NewR)) + Steps->setOperand(3, Plan.getConstantInt(IdxTy, Lane)); + else if (match(&NewR, m_ExtractElement(m_VPValue(), m_ZeroInt()))) + NewR.setOperand(1, Plan.getConstantInt(IdxTy, Lane)); + + // Remap operands to use lane-specific values. + for (const auto &[I, Op] : enumerate(NewR.operands())) { + // Use cloned value if operand was defined in the region. + if (auto *New = Old2NewVPValues.lookup(Op)) + NewR.setOperand(I, New); + } + } + } +} + +/// Process a single lane: clone blocks (or reuse original for lane 0), collect +/// value mappings, and process recipes for lane-specific operations. +static void processSingleLane( + VPlan &Plan, Type *IdxTy, unsigned Lane, ElementCount VF, + ArrayRef RegionBlocks, VPBlockBase *Entry, + VPBlockBase *Exiting, + SmallVectorImpl> &LaneClones) { + DenseMap Old2NewBlocks; + if (Lane == 0) { + // Lane 0 uses the original blocks, and the recipes are adjusted: + // VPReplicateRecipes are converted to single-scalar ones, branch-on-mask is + // converted into BranchOnCond and extracts are created as needed. + for (VPBlockBase *VPB : RegionBlocks) { + Old2NewBlocks[VPB] = VPB; + + for (VPRecipeBase &NewR : + make_early_inc_range(*cast(VPB))) { + VPBuilder Builder(&NewR); + for (const auto &[I, Op] : enumerate(NewR.operands())) { + // Skip operands that don't need extraction: scalar VF (no vectors), + // values defined in the same block (already scalar), or values that + // are already single scalars. + if (VF.isScalar() || + (Op->getDefiningRecipe() && + Op->getDefiningRecipe()->getParent() == VPB) || + vputils::isSingleScalar(Op)) + continue; + + // Extract the lane from values defined outside the region. + VPValue *Idx = Plan.getConstantInt(IdxTy, Lane); + VPValue *Extract = Builder.createNaryOp( + Instruction::ExtractElement, {Op, Idx}, NewR.getDebugLoc()); + NewR.setOperand(I, Extract); + } + + if (auto *RepR = dyn_cast(&NewR)) { + auto *New = new VPReplicateRecipe( + RepR->getUnderlyingInstr(), RepR->operands(), + /* IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR, *RepR, + RepR->getDebugLoc()); + New->insertBefore(RepR); + RepR->replaceAllUsesWith(New); + RepR->eraseFromParent(); + } else if (auto *BranchOnMask = dyn_cast(&NewR)) { + Builder.createNaryOp(VPInstruction::BranchOnCond, + {BranchOnMask->getOperand(0)}, + BranchOnMask->getDebugLoc()); + BranchOnMask->eraseFromParent(); + } else if (auto *Steps = dyn_cast(&NewR)) { + // Add lane operand (4th operand) for VPScalarIVStepsRecipe if not + // already present. + unsigned NumOps = Steps->getNumOperands(); + if (NumOps == 4) { + // Has UnrollPart at position 3, need to insert Lane before it. + VPValue *UnrollPart = Steps->getOperand(3); + Steps->setOperand(3, Plan.getConstantInt(IdxTy, Lane)); + Steps->addOperand(UnrollPart); + } else if (NumOps == 3) { + // Just BaseIV, Step, VF - add Lane. + Steps->addOperand(Plan.getConstantInt(IdxTy, Lane)); + Steps->addOperand(Plan.getConstantInt(IdxTy, 0)); + } + } + } + } + } else { + // Clone blocks and connect them according to original structure. + for (VPBlockBase *OrigBlock : RegionBlocks) { + VPBlockBase *ClonedBlock = OrigBlock->clone(); + Old2NewBlocks[OrigBlock] = ClonedBlock; + ClonedBlock->setParent(Entry->getParent()); + } + for (VPBlockBase *OrigBlock : RegionBlocks) { + if (OrigBlock == Exiting) + continue; + for (VPBlockBase *OrigSucc : OrigBlock->successors()) + VPBlockUtils::connectBlocks(Old2NewBlocks[OrigBlock], + Old2NewBlocks[OrigSucc]); + } + } + + processLane(Plan, IdxTy, Lane, VF, RegionBlocks, Old2NewBlocks); + LaneClones.push_back({Old2NewBlocks[Entry], Old2NewBlocks[Exiting]}); +} + +void VPlanTransforms::unrollReplicateRegions(VPlan &Plan, ElementCount VF) { + // Collect all replicate regions in the plan before modifying the CFG. + SmallVector ReplicateRegions; + for (VPBlockBase *Block : + vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) { + if (auto *Region = dyn_cast(Block)) { + if (Region->isReplicator()) + ReplicateRegions.push_back(Region); + } + } + + Type *IdxTy = IntegerType::get(Plan.getContext(), 32); + + for (VPRegionBlock *Region : ReplicateRegions) { + assert(!VF.isScalable() && "cannot replicate across scalable VFs"); + + VPBlockBase *Entry = Region->getEntry(); + VPBlockBase *Exiting = Region->getExiting(); + + // Skip regions with live-outs as packing scalar results back into vectors + // is not yet implemented. + if (any_of(*cast(Exiting), IsaPred)) + continue; + + // Get region context before dissolving. + VPBlockBase *Pred = Region->getSinglePredecessor(); + assert(Pred && "Replicate region must have a single predecessor"); + SmallVector Successors(Region->successors()); + + // Disconnect and dissolve the region. + VPBlockUtils::disconnectBlocks(Pred, Region); + for (VPBlockBase *Succ : Successors) + VPBlockUtils::disconnectBlocks(Region, Succ); + + SmallVector RegionBlocks(vp_depth_first_shallow(Entry)); + VPRegionBlock *ParentRegion = Region->getParent(); + for (VPBlockBase *Block : RegionBlocks) + Block->setParent(ParentRegion); + VPBlockUtils::connectBlocks(Pred, Entry); + + // Process each lane: clone blocks, collect value mappings, and process + // recipes for lane-specific operations. + SmallVector> LaneClones; + for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) { + processSingleLane(Plan, IdxTy, Lane, VF, RegionBlocks, Entry, Exiting, + LaneClones); + } + + // Connect lanes sequentially and connect last lane to successors. + for (unsigned Lane = 1; Lane < VF.getKnownMinValue(); ++Lane) + VPBlockUtils::connectBlocks(LaneClones[Lane - 1].second, + LaneClones[Lane].first); + for (VPBlockBase *Succ : Successors) + VPBlockUtils::connectBlocks(LaneClones.back().second, Succ); + } +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll index 8ab5723a52a11..1c2686b67331a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll @@ -199,8 +199,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[IV]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP72]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 0 ; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP9]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index b549a06f08f8c..7feb83e7d9cba 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -290,13 +290,12 @@ define void @latch_branch_cost(ptr %dst) { ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: br label %[[VECTOR_BODY:.*]] ; PRED: [[VECTOR_BODY]]: -; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ] +; PRED-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ] ; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ] ; PRED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 99) ; PRED-NEXT: [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0 ; PRED-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; PRED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]] ; PRED-NEXT: store i8 0, ptr [[TMP3]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -304,7 +303,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1 ; PRED-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] ; PRED: [[PRED_STORE_IF1]]: -; PRED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; PRED-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1 ; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]] ; PRED-NEXT: store i8 0, ptr [[TMP6]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]] @@ -312,7 +311,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2 ; PRED-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] ; PRED: [[PRED_STORE_IF3]]: -; PRED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 2 +; PRED-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], 2 ; PRED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]] ; PRED-NEXT: store i8 0, ptr [[TMP9]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE4]] @@ -320,7 +319,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3 ; PRED-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] ; PRED: [[PRED_STORE_IF5]]: -; PRED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 3 +; PRED-NEXT: [[TMP11:%.*]] = add i64 [[TMP2]], 3 ; PRED-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]] ; PRED-NEXT: store i8 0, ptr [[TMP12]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE6]] @@ -328,7 +327,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4 ; PRED-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] ; PRED: [[PRED_STORE_IF7]]: -; PRED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 4 +; PRED-NEXT: [[TMP14:%.*]] = add i64 [[TMP2]], 4 ; PRED-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]] ; PRED-NEXT: store i8 0, ptr [[TMP15]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE8]] @@ -336,7 +335,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5 ; PRED-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] ; PRED: [[PRED_STORE_IF9]]: -; PRED-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 5 +; PRED-NEXT: [[TMP17:%.*]] = add i64 [[TMP2]], 5 ; PRED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]] ; PRED-NEXT: store i8 0, ptr [[TMP18]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE10]] @@ -344,7 +343,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6 ; PRED-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] ; PRED: [[PRED_STORE_IF11]]: -; PRED-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 6 +; PRED-NEXT: [[TMP20:%.*]] = add i64 [[TMP2]], 6 ; PRED-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP20]] ; PRED-NEXT: store i8 0, ptr [[TMP21]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE12]] @@ -352,12 +351,12 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7 ; PRED-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]] ; PRED: [[PRED_STORE_IF13]]: -; PRED-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 7 +; PRED-NEXT: [[TMP23:%.*]] = add i64 [[TMP2]], 7 ; PRED-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]] ; PRED-NEXT: store i8 0, ptr [[TMP24]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE14]] ; PRED: [[PRED_STORE_CONTINUE14]]: -; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 8 ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104 ; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -710,121 +709,97 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP22]], i32 0 ; DEFAULT-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; DEFAULT: [[PRED_STORE_IF]]: -; DEFAULT-NEXT: [[TMP25:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP25]], align 4 ; DEFAULT-NEXT: [[TMP26:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP26]], align 4 ; DEFAULT-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i64 4 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP27]], align 4 -; DEFAULT-NEXT: [[TMP28:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0 -; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP28]], i64 8 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP29]], align 4 -; DEFAULT-NEXT: [[TMP30:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 0 +; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP26]], i64 8 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP30]], align 4 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP26]], align 4 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]] ; DEFAULT: [[PRED_STORE_CONTINUE]]: ; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <8 x i1> [[TMP22]], i32 1 ; DEFAULT-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] ; DEFAULT: [[PRED_STORE_IF14]]: -; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP32]], align 4 ; DEFAULT-NEXT: [[TMP33:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP33]], align 4 ; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP33]], i64 4 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP34]], align 4 -; DEFAULT-NEXT: [[TMP35:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1 -; DEFAULT-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[TMP35]], i64 8 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4 -; DEFAULT-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1 +; DEFAULT-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP33]], i64 8 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP37]], align 4 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP33]], align 4 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]] ; DEFAULT: [[PRED_STORE_CONTINUE15]]: ; DEFAULT-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP22]], i32 2 ; DEFAULT-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]] ; DEFAULT: [[PRED_STORE_IF16]]: -; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP39]], align 4 ; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP40]], align 4 ; DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[TMP40]], i64 4 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP41]], align 4 -; DEFAULT-NEXT: [[TMP42:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2 -; DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP42]], i64 8 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4 -; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2 +; DEFAULT-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[TMP40]], i64 8 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP44]], align 4 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP40]], align 4 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]] ; DEFAULT: [[PRED_STORE_CONTINUE17]]: ; DEFAULT-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP22]], i32 3 ; DEFAULT-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] ; DEFAULT: [[PRED_STORE_IF18]]: -; DEFAULT-NEXT: [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP46]], align 4 ; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP47]], align 4 ; DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP47]], i64 4 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP48]], align 4 -; DEFAULT-NEXT: [[TMP49:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3 -; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[TMP49]], i64 8 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4 -; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3 +; DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[TMP47]], i64 8 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP51]], align 4 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP47]], align 4 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]] ; DEFAULT: [[PRED_STORE_CONTINUE19]]: ; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <8 x i1> [[TMP22]], i32 4 ; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]] ; DEFAULT: [[PRED_STORE_IF20]]: -; DEFAULT-NEXT: [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP53]], align 4 ; DEFAULT-NEXT: [[TMP54:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP54]], align 4 ; DEFAULT-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP54]], i64 4 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP55]], align 4 -; DEFAULT-NEXT: [[TMP56:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4 -; DEFAULT-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr [[TMP56]], i64 8 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4 -; DEFAULT-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4 +; DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[TMP54]], i64 8 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP58]], align 4 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP54]], align 4 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]] ; DEFAULT: [[PRED_STORE_CONTINUE21]]: ; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP22]], i32 5 ; DEFAULT-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]] ; DEFAULT: [[PRED_STORE_IF22]]: -; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP60]], align 4 ; DEFAULT-NEXT: [[TMP61:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP61]], align 4 ; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP61]], i64 4 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP62]], align 4 -; DEFAULT-NEXT: [[TMP63:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5 -; DEFAULT-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[TMP63]], i64 8 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4 -; DEFAULT-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5 +; DEFAULT-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr [[TMP61]], i64 8 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP65]], align 4 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP61]], align 4 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]] ; DEFAULT: [[PRED_STORE_CONTINUE23]]: ; DEFAULT-NEXT: [[TMP66:%.*]] = extractelement <8 x i1> [[TMP22]], i32 6 ; DEFAULT-NEXT: br i1 [[TMP66]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]] ; DEFAULT: [[PRED_STORE_IF24]]: -; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP67]], align 4 ; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP68]], align 4 ; DEFAULT-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[TMP68]], i64 4 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP69]], align 4 -; DEFAULT-NEXT: [[TMP70:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6 -; DEFAULT-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[TMP70]], i64 8 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4 -; DEFAULT-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6 +; DEFAULT-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr [[TMP68]], i64 8 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP72]], align 4 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP68]], align 4 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]] ; DEFAULT: [[PRED_STORE_CONTINUE25]]: ; DEFAULT-NEXT: [[TMP73:%.*]] = extractelement <8 x i1> [[TMP22]], i32 7 ; DEFAULT-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27]] ; DEFAULT: [[PRED_STORE_IF26]]: -; DEFAULT-NEXT: [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP74]], align 4 ; DEFAULT-NEXT: [[TMP75:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP75]], align 4 ; DEFAULT-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[TMP75]], i64 4 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP76]], align 4 -; DEFAULT-NEXT: [[TMP77:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7 -; DEFAULT-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[TMP77]], i64 8 -; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4 -; DEFAULT-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7 +; DEFAULT-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[TMP75]], i64 8 ; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP79]], align 4 +; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP75]], align 4 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE27]] ; DEFAULT: [[PRED_STORE_CONTINUE27]]: ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -896,121 +871,97 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP26]], i32 0 ; PRED-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP29:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP29]], align 4 ; PRED-NEXT: [[TMP30:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP30]], align 4 ; PRED-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[TMP30]], i64 4 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP31]], align 4 -; PRED-NEXT: [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0 -; PRED-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[TMP32]], i64 8 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP33]], align 4 -; PRED-NEXT: [[TMP34:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 0 +; PRED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP30]], i64 8 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP34]], align 4 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP30]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] ; PRED: [[PRED_STORE_CONTINUE]]: ; PRED-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP26]], i32 1 ; PRED-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] ; PRED: [[PRED_STORE_IF14]]: -; PRED-NEXT: [[TMP36:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4 ; PRED-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP37]], align 4 ; PRED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[TMP37]], i64 4 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP38]], align 4 -; PRED-NEXT: [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1 -; PRED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[TMP39]], i64 8 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP40]], align 4 -; PRED-NEXT: [[TMP41:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1 +; PRED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[TMP37]], i64 8 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP41]], align 4 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP37]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE15]] ; PRED: [[PRED_STORE_CONTINUE15]]: ; PRED-NEXT: [[TMP42:%.*]] = extractelement <8 x i1> [[TMP26]], i32 2 ; PRED-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]] ; PRED: [[PRED_STORE_IF16]]: -; PRED-NEXT: [[TMP43:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4 ; PRED-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP44]], align 4 ; PRED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[TMP44]], i64 4 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP45]], align 4 -; PRED-NEXT: [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2 -; PRED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP46]], i64 8 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP47]], align 4 -; PRED-NEXT: [[TMP48:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2 +; PRED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP44]], i64 8 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP48]], align 4 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP44]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE17]] ; PRED: [[PRED_STORE_CONTINUE17]]: ; PRED-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP26]], i32 3 ; PRED-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] ; PRED: [[PRED_STORE_IF18]]: -; PRED-NEXT: [[TMP50:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4 ; PRED-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP51]], align 4 ; PRED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[TMP51]], i64 4 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP52]], align 4 -; PRED-NEXT: [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3 -; PRED-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[TMP53]], i64 8 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP54]], align 4 -; PRED-NEXT: [[TMP55:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3 +; PRED-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP51]], i64 8 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP55]], align 4 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP51]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE19]] ; PRED: [[PRED_STORE_CONTINUE19]]: ; PRED-NEXT: [[TMP56:%.*]] = extractelement <8 x i1> [[TMP26]], i32 4 ; PRED-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]] ; PRED: [[PRED_STORE_IF20]]: -; PRED-NEXT: [[TMP57:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4 ; PRED-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP58]], align 4 ; PRED-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr [[TMP58]], i64 4 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP59]], align 4 -; PRED-NEXT: [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4 -; PRED-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr [[TMP60]], i64 8 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP61]], align 4 -; PRED-NEXT: [[TMP62:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4 +; PRED-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP58]], i64 8 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP62]], align 4 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP58]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE21]] ; PRED: [[PRED_STORE_CONTINUE21]]: ; PRED-NEXT: [[TMP63:%.*]] = extractelement <8 x i1> [[TMP26]], i32 5 ; PRED-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]] ; PRED: [[PRED_STORE_IF22]]: -; PRED-NEXT: [[TMP64:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4 ; PRED-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP65]], align 4 ; PRED-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[TMP65]], i64 4 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP66]], align 4 -; PRED-NEXT: [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5 -; PRED-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[TMP67]], i64 8 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP68]], align 4 -; PRED-NEXT: [[TMP69:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5 +; PRED-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[TMP65]], i64 8 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP69]], align 4 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP65]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE23]] ; PRED: [[PRED_STORE_CONTINUE23]]: ; PRED-NEXT: [[TMP70:%.*]] = extractelement <8 x i1> [[TMP26]], i32 6 ; PRED-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]] ; PRED: [[PRED_STORE_IF24]]: -; PRED-NEXT: [[TMP71:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4 ; PRED-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP72]], align 4 ; PRED-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr [[TMP72]], i64 4 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP73]], align 4 -; PRED-NEXT: [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6 -; PRED-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr [[TMP74]], i64 8 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP75]], align 4 -; PRED-NEXT: [[TMP76:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6 +; PRED-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[TMP72]], i64 8 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP76]], align 4 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP72]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE25]] ; PRED: [[PRED_STORE_CONTINUE25]]: ; PRED-NEXT: [[TMP77:%.*]] = extractelement <8 x i1> [[TMP26]], i32 7 ; PRED-NEXT: br i1 [[TMP77]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27]] ; PRED: [[PRED_STORE_IF26]]: -; PRED-NEXT: [[TMP78:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4 ; PRED-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP79]], align 4 ; PRED-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr [[TMP79]], i64 4 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP80]], align 4 -; PRED-NEXT: [[TMP81:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7 -; PRED-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[TMP81]], i64 8 -; PRED-NEXT: store float 0.000000e+00, ptr [[TMP82]], align 4 -; PRED-NEXT: [[TMP83:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7 +; PRED-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[TMP79]], i64 8 ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP83]], align 4 +; PRED-NEXT: store float 0.000000e+00, ptr [[TMP79]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE27]] ; PRED: [[PRED_STORE_CONTINUE27]]: ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll index 14f5dd7d41691..d070451187cb0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll @@ -16,43 +16,38 @@ define void @low_trip_count_small(i32 %x, ptr %dst) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 0 ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2 ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[NEXT_GEP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP3]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP4]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <4 x i64> , [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 1 ; CHECK-NEXT: store i8 0, ptr [[TMP7]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] -; CHECK: [[PRED_STORE_IF5]]: +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] +; CHECK: [[PRED_STORE_IF4]]: ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 1 ; CHECK-NEXT: store i8 0, ptr [[TMP9]], align 1 -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] -; CHECK: [[PRED_STORE_CONTINUE6]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]] +; CHECK: [[PRED_STORE_CONTINUE5]]: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 -; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] -; CHECK: [[PRED_STORE_IF7]]: +; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] +; CHECK: [[PRED_STORE_IF6]]: ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP3]], i64 1 ; CHECK-NEXT: store i8 0, ptr [[TMP11]], align 1 -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]] -; CHECK: [[PRED_STORE_CONTINUE8]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] +; CHECK: [[PRED_STORE_CONTINUE7]]: ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 -; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] -; CHECK: [[PRED_STORE_IF9]]: +; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] +; CHECK: [[PRED_STORE_IF8]]: ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP4]], i64 1 ; CHECK-NEXT: store i8 0, ptr [[TMP13]], align 1 -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]] -; CHECK: [[PRED_STORE_CONTINUE10]]: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] +; CHECK: [[PRED_STORE_CONTINUE9]]: ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll index 26a9545764091..33be739be4718 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll @@ -20,10 +20,9 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ , [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP4]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 4b097ba2422e4..760fbaea52fbe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -610,8 +610,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 ; PRED-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; PRED: [[PRED_STORE_IF]]: -; PRED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; PRED-NEXT: [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2 +; PRED-NEXT: [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[INDEX]], i32 2 ; PRED-NEXT: store i32 0, ptr [[TMP9]], align 8 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]] ; PRED: [[PRED_STORE_CONTINUE]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll index 2557ae55d2c85..b396d584a8497 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll @@ -21,31 +21,30 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1 -; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]] -; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope [[META0]] -; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP8]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]] ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i24> poison, i24 [[TMP13]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i24> [[TMP17]], i24 [[TMP14]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i24> [[TMP18]], i24 [[TMP15]], i32 2 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i24> [[TMP19]], i24 [[TMP16]], i32 3 ; CHECK-NEXT: [[TMP21:%.*]] = zext <4 x i24> [[TMP20]] to <4 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP23]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll index 5b4bb70e6a479..2108c15b7f838 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll @@ -174,11 +174,10 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks( ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16> @@ -186,14 +185,14 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks( ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]] ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP6]], align 2 -; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP7]], align 2 -; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP8]], align 2 -; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2 +; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP15]], align 2 +; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP16]], align 2 +; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP17]], align 2 ; CHECK-NEXT: store i64 0, ptr [[A]], align 8 ; CHECK-NEXT: store i64 0, ptr [[B]], align 8 ; CHECK-NEXT: store i64 0, ptr [[C]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 3311cbc11881b..e667ed1d32946 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -64,7 +64,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] ; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR2:[0-9]+]] +; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]] ; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 ; TFCOMMON-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer @@ -80,7 +80,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TFCOMMON-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]] ; TFCOMMON: pred.store.if1: -; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0 +; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1 ; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8 ; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]] ; TFCOMMON: pred.store.continue2: @@ -106,7 +106,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR2:[0-9]+]] +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]] ; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 ; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer @@ -122,7 +122,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; TFA_INTERLEAVE: pred.store.if3: -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 ; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]] ; TFA_INTERLEAVE: pred.store.continue4: @@ -136,7 +136,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] ; TFA_INTERLEAVE: pred.store.if7: -; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 +; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 ; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]] ; TFA_INTERLEAVE: pred.store.continue8: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index bdbf08aecf6b3..e70bf7f01ee64 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -196,7 +196,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT5]], <16 x i8> poison, <16 x i32> zeroinitializer ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ] +; DEFAULT-NEXT: [[TMP9:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE35]] ] ; DEFAULT-NEXT: [[VEC_IND1:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT36:%.*]], %[[PRED_STORE_CONTINUE35]] ] ; DEFAULT-NEXT: [[TMP0:%.*]] = icmp ule <16 x i8> [[VEC_IND]], splat (i8 14) @@ -210,7 +210,6 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0 ; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; DEFAULT: [[PRED_STORE_IF]]: -; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]] ; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <16 x i8> [[TMP7]], i32 0 ; DEFAULT-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 1 @@ -219,7 +218,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1 ; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] ; DEFAULT: [[PRED_STORE_IF6]]: -; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 1 +; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[TMP9]], 1 ; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP13]] ; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <16 x i8> [[TMP7]], i32 1 ; DEFAULT-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1 @@ -228,7 +227,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2 ; DEFAULT-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] ; DEFAULT: [[PRED_STORE_IF8]]: -; DEFAULT-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 +; DEFAULT-NEXT: [[TMP17:%.*]] = add i64 [[TMP9]], 2 ; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP17]] ; DEFAULT-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP7]], i32 2 ; DEFAULT-NEXT: store i8 [[TMP19]], ptr [[TMP18]], align 1 @@ -237,7 +236,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3 ; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11:.*]] ; DEFAULT: [[PRED_STORE_IF10]]: -; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 +; DEFAULT-NEXT: [[TMP21:%.*]] = add i64 [[TMP9]], 3 ; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP21]] ; DEFAULT-NEXT: [[TMP23:%.*]] = extractelement <16 x i8> [[TMP7]], i32 3 ; DEFAULT-NEXT: store i8 [[TMP23]], ptr [[TMP22]], align 1 @@ -246,7 +245,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4 ; DEFAULT-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] ; DEFAULT: [[PRED_STORE_IF12]]: -; DEFAULT-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 4 +; DEFAULT-NEXT: [[TMP25:%.*]] = add i64 [[TMP9]], 4 ; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP25]] ; DEFAULT-NEXT: [[TMP27:%.*]] = extractelement <16 x i8> [[TMP7]], i32 4 ; DEFAULT-NEXT: store i8 [[TMP27]], ptr [[TMP26]], align 1 @@ -255,7 +254,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5 ; DEFAULT-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] ; DEFAULT: [[PRED_STORE_IF14]]: -; DEFAULT-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 5 +; DEFAULT-NEXT: [[TMP29:%.*]] = add i64 [[TMP9]], 5 ; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP29]] ; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <16 x i8> [[TMP7]], i32 5 ; DEFAULT-NEXT: store i8 [[TMP31]], ptr [[TMP30]], align 1 @@ -264,7 +263,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6 ; DEFAULT-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]] ; DEFAULT: [[PRED_STORE_IF16]]: -; DEFAULT-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 6 +; DEFAULT-NEXT: [[TMP33:%.*]] = add i64 [[TMP9]], 6 ; DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP33]] ; DEFAULT-NEXT: [[TMP35:%.*]] = extractelement <16 x i8> [[TMP7]], i32 6 ; DEFAULT-NEXT: store i8 [[TMP35]], ptr [[TMP34]], align 1 @@ -273,7 +272,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7 ; DEFAULT-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] ; DEFAULT: [[PRED_STORE_IF18]]: -; DEFAULT-NEXT: [[TMP37:%.*]] = add i64 [[INDEX]], 7 +; DEFAULT-NEXT: [[TMP37:%.*]] = add i64 [[TMP9]], 7 ; DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP37]] ; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <16 x i8> [[TMP7]], i32 7 ; DEFAULT-NEXT: store i8 [[TMP39]], ptr [[TMP38]], align 1 @@ -282,7 +281,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8 ; DEFAULT-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]] ; DEFAULT: [[PRED_STORE_IF20]]: -; DEFAULT-NEXT: [[TMP41:%.*]] = add i64 [[INDEX]], 8 +; DEFAULT-NEXT: [[TMP41:%.*]] = add i64 [[TMP9]], 8 ; DEFAULT-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP41]] ; DEFAULT-NEXT: [[TMP43:%.*]] = extractelement <16 x i8> [[TMP7]], i32 8 ; DEFAULT-NEXT: store i8 [[TMP43]], ptr [[TMP42]], align 1 @@ -291,7 +290,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9 ; DEFAULT-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]] ; DEFAULT: [[PRED_STORE_IF22]]: -; DEFAULT-NEXT: [[TMP45:%.*]] = add i64 [[INDEX]], 9 +; DEFAULT-NEXT: [[TMP45:%.*]] = add i64 [[TMP9]], 9 ; DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP45]] ; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <16 x i8> [[TMP7]], i32 9 ; DEFAULT-NEXT: store i8 [[TMP47]], ptr [[TMP46]], align 1 @@ -300,7 +299,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP48:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10 ; DEFAULT-NEXT: br i1 [[TMP48]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]] ; DEFAULT: [[PRED_STORE_IF24]]: -; DEFAULT-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], 10 +; DEFAULT-NEXT: [[TMP49:%.*]] = add i64 [[TMP9]], 10 ; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP49]] ; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP7]], i32 10 ; DEFAULT-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1 @@ -309,7 +308,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11 ; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27:.*]] ; DEFAULT: [[PRED_STORE_IF26]]: -; DEFAULT-NEXT: [[TMP53:%.*]] = add i64 [[INDEX]], 11 +; DEFAULT-NEXT: [[TMP53:%.*]] = add i64 [[TMP9]], 11 ; DEFAULT-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP53]] ; DEFAULT-NEXT: [[TMP55:%.*]] = extractelement <16 x i8> [[TMP7]], i32 11 ; DEFAULT-NEXT: store i8 [[TMP55]], ptr [[TMP54]], align 1 @@ -318,7 +317,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP56:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12 ; DEFAULT-NEXT: br i1 [[TMP56]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]] ; DEFAULT: [[PRED_STORE_IF28]]: -; DEFAULT-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 12 +; DEFAULT-NEXT: [[TMP57:%.*]] = add i64 [[TMP9]], 12 ; DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP57]] ; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <16 x i8> [[TMP7]], i32 12 ; DEFAULT-NEXT: store i8 [[TMP59]], ptr [[TMP58]], align 1 @@ -327,7 +326,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13 ; DEFAULT-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]] ; DEFAULT: [[PRED_STORE_IF30]]: -; DEFAULT-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 13 +; DEFAULT-NEXT: [[TMP61:%.*]] = add i64 [[TMP9]], 13 ; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP61]] ; DEFAULT-NEXT: [[TMP63:%.*]] = extractelement <16 x i8> [[TMP7]], i32 13 ; DEFAULT-NEXT: store i8 [[TMP63]], ptr [[TMP62]], align 1 @@ -336,7 +335,7 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14 ; DEFAULT-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]] ; DEFAULT: [[PRED_STORE_IF32]]: -; DEFAULT-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 14 +; DEFAULT-NEXT: [[TMP65:%.*]] = add i64 [[TMP9]], 14 ; DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP65]] ; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP7]], i32 14 ; DEFAULT-NEXT: store i8 [[TMP67]], ptr [[TMP66]], align 1 @@ -345,13 +344,13 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15 ; DEFAULT-NEXT: br i1 [[TMP68]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35]] ; DEFAULT: [[PRED_STORE_IF34]]: -; DEFAULT-NEXT: [[TMP69:%.*]] = add i64 [[INDEX]], 15 +; DEFAULT-NEXT: [[TMP69:%.*]] = add i64 [[TMP9]], 15 ; DEFAULT-NEXT: [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP69]] ; DEFAULT-NEXT: [[TMP71:%.*]] = extractelement <16 x i8> [[TMP7]], i32 15 ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] ; DEFAULT: [[PRED_STORE_CONTINUE35]]: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP9]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; DEFAULT-NEXT: [[VEC_IND_NEXT36]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll index b2be0e1d7a442..3b6405fac3050 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll @@ -127,57 +127,56 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 -; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 -; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 -; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 -; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 -; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 -; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 -; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 -; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 -; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3 +; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4 +; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5 +; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6 +; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7 +; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8 +; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9 +; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10 +; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11 +; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12 +; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13 +; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14 +; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15 +; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] -; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] -; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] -; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] -; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] -; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] -; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] -; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] -; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] -; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] -; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] -; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] -; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] -; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] -; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] -; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]] +; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] +; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] +; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] +; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] +; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] +; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] +; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] +; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] +; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] +; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] +; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] +; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] +; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] ; CHECK-INTERLEAVE1-NEXT: [[TMP35:%.*]] = load i16, ptr [[TMP19]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP20]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP21]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP22]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP39:%.*]] = load i16, ptr [[TMP23]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP40:%.*]] = load i16, ptr [[TMP24]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP25]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP26]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP27]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP28]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP29]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP30]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP31]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP32]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP33]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP34]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP33]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP34]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP20]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP39:%.*]] = load i16, ptr [[TMP21]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP40:%.*]] = load i16, ptr [[TMP22]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP23]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP24]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP25]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP26]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP27]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP28]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP29]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP30]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP31]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP32]], align 2 ; CHECK-INTERLEAVE1-NEXT: [[TMP51:%.*]] = insertelement <16 x i16> poison, i16 [[TMP35]], i32 0 ; CHECK-INTERLEAVE1-NEXT: [[TMP52:%.*]] = insertelement <16 x i16> [[TMP51]], i16 [[TMP36]], i32 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP53:%.*]] = insertelement <16 x i16> [[TMP52]], i16 [[TMP37]], i32 2 @@ -216,22 +215,21 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP137:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 -; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 -; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 -; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 -; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 +; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6 +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7 +; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9 +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13 +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15 ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 17 ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 18 @@ -248,28 +246,28 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> -; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] -; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] -; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] -; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] -; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] -; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] -; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] -; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] -; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] -; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] -; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] -; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] -; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] -; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] -; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]] +; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] +; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] +; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] +; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] +; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] +; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] +; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] +; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] +; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] +; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] ; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP16]] ; CHECK-INTERLEAVED-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP18]] @@ -287,21 +285,21 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP30]] ; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP31]] ; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = load i16, ptr [[TMP37]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = load i16, ptr [[TMP38]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = load i16, ptr [[TMP39]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = load i16, ptr [[TMP40]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP41]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i16, ptr [[TMP42]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = load i16, ptr [[TMP43]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = load i16, ptr [[TMP44]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = load i16, ptr [[TMP45]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = load i16, ptr [[TMP46]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP47]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i16, ptr [[TMP48]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = load i16, ptr [[TMP49]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP50]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP51]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP52]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = load i16, ptr [[TMP51]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = load i16, ptr [[TMP52]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = load i16, ptr [[TMP38]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP39]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i16, ptr [[TMP40]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = load i16, ptr [[TMP41]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = load i16, ptr [[TMP42]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = load i16, ptr [[TMP43]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = load i16, ptr [[TMP44]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP45]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i16, ptr [[TMP46]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = load i16, ptr [[TMP47]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP48]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP49]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP50]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = insertelement <16 x i16> poison, i16 [[TMP69]], i32 0 ; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = insertelement <16 x i16> [[TMP85]], i16 [[TMP70]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <16 x i16> [[TMP86]], i16 [[TMP71]], i32 2 @@ -375,57 +373,56 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ] -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 -; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 -; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 -; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 -; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 -; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 -; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 -; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 -; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 -; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4 +; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5 +; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6 +; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7 +; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8 +; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9 +; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10 +; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11 +; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12 +; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13 +; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14 +; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15 +; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1 ; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] -; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] -; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] -; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] -; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] -; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] -; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] -; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] -; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] -; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] -; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] -; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] -; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] -; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] -; CHECK-MAXBW-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] -; CHECK-MAXBW-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]] +; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] +; CHECK-MAXBW-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-MAXBW-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] +; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] +; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] +; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] +; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] +; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] +; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] +; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] +; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] +; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] +; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] +; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] ; CHECK-MAXBW-NEXT: [[TMP35:%.*]] = load i16, ptr [[TMP19]], align 2 -; CHECK-MAXBW-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP20]], align 2 -; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP21]], align 2 -; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP22]], align 2 -; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = load i16, ptr [[TMP23]], align 2 -; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = load i16, ptr [[TMP24]], align 2 -; CHECK-MAXBW-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP25]], align 2 -; CHECK-MAXBW-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP26]], align 2 -; CHECK-MAXBW-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP27]], align 2 -; CHECK-MAXBW-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP28]], align 2 -; CHECK-MAXBW-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP29]], align 2 -; CHECK-MAXBW-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP30]], align 2 -; CHECK-MAXBW-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP31]], align 2 -; CHECK-MAXBW-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP32]], align 2 -; CHECK-MAXBW-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP33]], align 2 -; CHECK-MAXBW-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP34]], align 2 +; CHECK-MAXBW-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP33]], align 2 +; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP34]], align 2 +; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP20]], align 2 +; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = load i16, ptr [[TMP21]], align 2 +; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = load i16, ptr [[TMP22]], align 2 +; CHECK-MAXBW-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP23]], align 2 +; CHECK-MAXBW-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP24]], align 2 +; CHECK-MAXBW-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP25]], align 2 +; CHECK-MAXBW-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP26]], align 2 +; CHECK-MAXBW-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP27]], align 2 +; CHECK-MAXBW-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP28]], align 2 +; CHECK-MAXBW-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP29]], align 2 +; CHECK-MAXBW-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP30]], align 2 +; CHECK-MAXBW-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP31]], align 2 +; CHECK-MAXBW-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP32]], align 2 ; CHECK-MAXBW-NEXT: [[TMP51:%.*]] = insertelement <16 x i16> poison, i16 [[TMP35]], i32 0 ; CHECK-MAXBW-NEXT: [[TMP52:%.*]] = insertelement <16 x i16> [[TMP51]], i16 [[TMP36]], i32 1 ; CHECK-MAXBW-NEXT: [[TMP53:%.*]] = insertelement <16 x i16> [[TMP52]], i16 [[TMP37]], i32 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 2bea0733f65b0..e8cd68bca8eec 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -415,57 +415,56 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 -; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 -; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 -; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 -; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 -; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 -; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 -; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 -; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 -; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3 +; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4 +; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5 +; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6 +; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7 +; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8 +; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9 +; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10 +; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11 +; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12 +; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13 +; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14 +; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15 +; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] -; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] -; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] -; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] -; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] -; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] -; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] -; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] -; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] -; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] -; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] -; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] -; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] -; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] -; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] -; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]] +; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] +; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] +; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] +; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] +; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] +; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] +; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] +; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] +; CHECK-INTERLEAVE1-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] +; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] +; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] +; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] +; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] ; CHECK-INTERLEAVE1-NEXT: [[TMP35:%.*]] = load i16, ptr [[TMP19]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP20]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP21]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP22]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP23]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP24]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP25]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP26]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP27]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP28]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP29]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP56:%.*]] = load i16, ptr [[TMP30]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP31]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP32]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP33]], align 2 -; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP34]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = load i16, ptr [[TMP33]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP34]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = load i16, ptr [[TMP20]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = load i16, ptr [[TMP21]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = load i16, ptr [[TMP22]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP23]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = load i16, ptr [[TMP24]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = load i16, ptr [[TMP25]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = load i16, ptr [[TMP26]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP27]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP56:%.*]] = load i16, ptr [[TMP28]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = load i16, ptr [[TMP29]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP48:%.*]] = load i16, ptr [[TMP30]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP31]], align 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = load i16, ptr [[TMP32]], align 2 ; CHECK-INTERLEAVE1-NEXT: [[TMP51:%.*]] = insertelement <16 x i16> poison, i16 [[TMP35]], i32 0 ; CHECK-INTERLEAVE1-NEXT: [[TMP52:%.*]] = insertelement <16 x i16> [[TMP51]], i16 [[TMP36]], i32 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP53:%.*]] = insertelement <16 x i16> [[TMP52]], i16 [[TMP37]], i32 2 @@ -504,22 +503,21 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP137:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 -; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 -; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 -; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 -; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 +; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6 +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7 +; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9 +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13 +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15 ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 16 ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 17 ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 18 @@ -536,28 +534,28 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> -; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] -; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] -; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] -; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] -; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] -; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] -; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] -; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] -; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] -; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] -; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] -; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] -; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] -; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] -; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] -; CHECK-INTERLEAVED-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]] +; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] +; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] +; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] +; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] +; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] +; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] +; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] +; CHECK-INTERLEAVED-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] +; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] +; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] ; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP16]] ; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP18]] @@ -575,21 +573,21 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP30]] ; CHECK-INTERLEAVED-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP31]] ; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = load i16, ptr [[TMP39]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = load i16, ptr [[TMP40]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = load i16, ptr [[TMP41]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = load i16, ptr [[TMP42]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP43]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i16, ptr [[TMP44]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = load i16, ptr [[TMP45]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = load i16, ptr [[TMP46]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = load i16, ptr [[TMP47]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = load i16, ptr [[TMP48]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP49]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i16, ptr [[TMP50]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = load i16, ptr [[TMP51]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP52]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP53]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP54]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP70:%.*]] = load i16, ptr [[TMP51]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = load i16, ptr [[TMP37]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP72:%.*]] = load i16, ptr [[TMP38]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP52]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i16, ptr [[TMP40]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = load i16, ptr [[TMP41]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = load i16, ptr [[TMP42]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = load i16, ptr [[TMP43]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP78:%.*]] = load i16, ptr [[TMP44]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP45]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i16, ptr [[TMP46]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = load i16, ptr [[TMP47]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP82:%.*]] = load i16, ptr [[TMP48]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = load i16, ptr [[TMP49]], align 2 +; CHECK-INTERLEAVED-NEXT: [[TMP84:%.*]] = load i16, ptr [[TMP50]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = insertelement <16 x i16> poison, i16 [[TMP69]], i32 0 ; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = insertelement <16 x i16> [[TMP85]], i16 [[TMP70]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <16 x i16> [[TMP86]], i16 [[TMP71]], i32 2 @@ -663,57 +661,56 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-MAXBW-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ] -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 -; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 -; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 -; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 -; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 -; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 -; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 -; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 -; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 -; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3 +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4 +; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5 +; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6 +; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7 +; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 8 +; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 9 +; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 10 +; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 11 +; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 12 +; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 13 +; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 14 +; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 15 +; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-MAXBW-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> -; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] -; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] -; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] -; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] -; CHECK-MAXBW-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] -; CHECK-MAXBW-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] -; CHECK-MAXBW-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] -; CHECK-MAXBW-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] -; CHECK-MAXBW-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] -; CHECK-MAXBW-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] -; CHECK-MAXBW-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] -; CHECK-MAXBW-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] -; CHECK-MAXBW-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] -; CHECK-MAXBW-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] -; CHECK-MAXBW-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] -; CHECK-MAXBW-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]] +; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] +; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] +; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] +; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] +; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]] +; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]] +; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]] +; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]] +; CHECK-MAXBW-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] +; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]] +; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP10]] +; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]] +; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP12]] +; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]] +; CHECK-MAXBW-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]] ; CHECK-MAXBW-NEXT: [[TMP101:%.*]] = load i16, ptr [[TMP37]], align 2 -; CHECK-MAXBW-NEXT: [[TMP102:%.*]] = load i16, ptr [[TMP38]], align 2 -; CHECK-MAXBW-NEXT: [[TMP103:%.*]] = load i16, ptr [[TMP39]], align 2 -; CHECK-MAXBW-NEXT: [[TMP104:%.*]] = load i16, ptr [[TMP40]], align 2 -; CHECK-MAXBW-NEXT: [[TMP105:%.*]] = load i16, ptr [[TMP41]], align 2 -; CHECK-MAXBW-NEXT: [[TMP106:%.*]] = load i16, ptr [[TMP42]], align 2 -; CHECK-MAXBW-NEXT: [[TMP107:%.*]] = load i16, ptr [[TMP43]], align 2 -; CHECK-MAXBW-NEXT: [[TMP108:%.*]] = load i16, ptr [[TMP44]], align 2 -; CHECK-MAXBW-NEXT: [[TMP109:%.*]] = load i16, ptr [[TMP45]], align 2 -; CHECK-MAXBW-NEXT: [[TMP110:%.*]] = load i16, ptr [[TMP46]], align 2 -; CHECK-MAXBW-NEXT: [[TMP111:%.*]] = load i16, ptr [[TMP47]], align 2 -; CHECK-MAXBW-NEXT: [[TMP112:%.*]] = load i16, ptr [[TMP48]], align 2 -; CHECK-MAXBW-NEXT: [[TMP113:%.*]] = load i16, ptr [[TMP49]], align 2 -; CHECK-MAXBW-NEXT: [[TMP114:%.*]] = load i16, ptr [[TMP50]], align 2 -; CHECK-MAXBW-NEXT: [[TMP115:%.*]] = load i16, ptr [[TMP51]], align 2 -; CHECK-MAXBW-NEXT: [[TMP116:%.*]] = load i16, ptr [[TMP52]], align 2 +; CHECK-MAXBW-NEXT: [[TMP102:%.*]] = load i16, ptr [[TMP18]], align 2 +; CHECK-MAXBW-NEXT: [[TMP103:%.*]] = load i16, ptr [[TMP19]], align 2 +; CHECK-MAXBW-NEXT: [[TMP104:%.*]] = load i16, ptr [[TMP20]], align 2 +; CHECK-MAXBW-NEXT: [[TMP105:%.*]] = load i16, ptr [[TMP21]], align 2 +; CHECK-MAXBW-NEXT: [[TMP106:%.*]] = load i16, ptr [[TMP22]], align 2 +; CHECK-MAXBW-NEXT: [[TMP107:%.*]] = load i16, ptr [[TMP23]], align 2 +; CHECK-MAXBW-NEXT: [[TMP108:%.*]] = load i16, ptr [[TMP24]], align 2 +; CHECK-MAXBW-NEXT: [[TMP109:%.*]] = load i16, ptr [[TMP25]], align 2 +; CHECK-MAXBW-NEXT: [[TMP110:%.*]] = load i16, ptr [[TMP26]], align 2 +; CHECK-MAXBW-NEXT: [[TMP111:%.*]] = load i16, ptr [[TMP27]], align 2 +; CHECK-MAXBW-NEXT: [[TMP112:%.*]] = load i16, ptr [[TMP28]], align 2 +; CHECK-MAXBW-NEXT: [[TMP113:%.*]] = load i16, ptr [[TMP29]], align 2 +; CHECK-MAXBW-NEXT: [[TMP114:%.*]] = load i16, ptr [[TMP30]], align 2 +; CHECK-MAXBW-NEXT: [[TMP115:%.*]] = load i16, ptr [[TMP31]], align 2 +; CHECK-MAXBW-NEXT: [[TMP116:%.*]] = load i16, ptr [[TMP33]], align 2 ; CHECK-MAXBW-NEXT: [[TMP117:%.*]] = insertelement <16 x i16> poison, i16 [[TMP101]], i32 0 ; CHECK-MAXBW-NEXT: [[TMP118:%.*]] = insertelement <16 x i16> [[TMP117]], i16 [[TMP102]], i32 1 ; CHECK-MAXBW-NEXT: [[TMP119:%.*]] = insertelement <16 x i16> [[TMP118]], i16 [[TMP103]], i32 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 131b3d1b02727..9cff5e5b77e92 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -360,7 +360,6 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -376,7 +375,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i64 [[N:%.*]], [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i64 [[N:%.*]], [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i64 [[N]], [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = sub nsw i64 [[N]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = sub nsw i64 [[N]], [[TMP3]] @@ -409,7 +408,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x i64> [[TMP45]], i64 [[TMP30]], i32 14 ; CHECK-NEXT: [[TMP47:%.*]] = insertelement <16 x i64> [[TMP46]], i64 [[TMP31]], i32 15 ; CHECK-NEXT: [[TMP48:%.*]] = trunc <16 x i64> [[TMP47]] to <16 x i8> -; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[TMP0]], [[TMP16]] +; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP48]], i32 15 ; CHECK-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1 @@ -425,7 +424,6 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[INDEX2]], 0 ; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[INDEX2]], 1 ; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[INDEX2]], 2 ; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[INDEX2]], 3 @@ -433,7 +431,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: [[TMP58:%.*]] = add i64 [[INDEX2]], 5 ; CHECK-NEXT: [[TMP59:%.*]] = add i64 [[INDEX2]], 6 ; CHECK-NEXT: [[TMP60:%.*]] = add i64 [[INDEX2]], 7 -; CHECK-NEXT: [[TMP61:%.*]] = sub nsw i64 [[N]], [[TMP53]] +; CHECK-NEXT: [[TMP61:%.*]] = sub nsw i64 [[N]], [[INDEX2]] ; CHECK-NEXT: [[TMP62:%.*]] = sub nsw i64 [[N]], [[TMP54]] ; CHECK-NEXT: [[TMP63:%.*]] = sub nsw i64 [[N]], [[TMP55]] ; CHECK-NEXT: [[TMP64:%.*]] = sub nsw i64 [[N]], [[TMP56]] @@ -450,7 +448,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: [[TMP75:%.*]] = insertelement <8 x i64> [[TMP74]], i64 [[TMP67]], i32 6 ; CHECK-NEXT: [[TMP76:%.*]] = insertelement <8 x i64> [[TMP75]], i64 [[TMP68]], i32 7 ; CHECK-NEXT: [[TMP77:%.*]] = trunc <8 x i64> [[TMP76]] to <8 x i8> -; CHECK-NEXT: [[TMP78:%.*]] = add i64 [[TMP53]], [[TMP61]] +; CHECK-NEXT: [[TMP78:%.*]] = add i64 [[INDEX2]], [[TMP61]] ; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]] ; CHECK-NEXT: [[TMP80:%.*]] = extractelement <8 x i8> [[TMP77]], i32 7 ; CHECK-NEXT: store i8 [[TMP80]], ptr [[TMP79]], align 1 @@ -484,7 +482,6 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: br label [[VECTOR_BODY:%.*]] ; IC2: vector.body: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -516,7 +513,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; IC2-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; IC2-NEXT: [[TMP32:%.*]] = sub nsw i64 [[N:%.*]], [[TMP0]] +; IC2-NEXT: [[TMP32:%.*]] = sub nsw i64 [[N:%.*]], [[INDEX]] ; IC2-NEXT: [[TMP33:%.*]] = sub nsw i64 [[N]], [[TMP1]] ; IC2-NEXT: [[TMP34:%.*]] = sub nsw i64 [[N]], [[TMP2]] ; IC2-NEXT: [[TMP35:%.*]] = sub nsw i64 [[N]], [[TMP3]] @@ -582,7 +579,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: [[TMP95:%.*]] = insertelement <16 x i64> [[TMP94]], i64 [[TMP79]], i32 15 ; IC2-NEXT: [[TMP96:%.*]] = trunc <16 x i64> [[TMP63]] to <16 x i8> ; IC2-NEXT: [[TMP97:%.*]] = trunc <16 x i64> [[TMP95]] to <16 x i8> -; IC2-NEXT: [[TMP98:%.*]] = add i64 [[TMP0]], [[TMP32]] +; IC2-NEXT: [[TMP98:%.*]] = add i64 [[INDEX]], [[TMP32]] ; IC2-NEXT: [[TMP99:%.*]] = add i64 [[TMP16]], [[TMP64]] ; IC2-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP98]] ; IC2-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP99]] @@ -602,7 +599,6 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; IC2: vec.epilog.vector.body: ; IC2-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; IC2-NEXT: [[TMP105:%.*]] = add i64 [[INDEX1]], 0 ; IC2-NEXT: [[TMP106:%.*]] = add i64 [[INDEX1]], 1 ; IC2-NEXT: [[TMP107:%.*]] = add i64 [[INDEX1]], 2 ; IC2-NEXT: [[TMP108:%.*]] = add i64 [[INDEX1]], 3 @@ -610,7 +606,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: [[TMP110:%.*]] = add i64 [[INDEX1]], 5 ; IC2-NEXT: [[TMP111:%.*]] = add i64 [[INDEX1]], 6 ; IC2-NEXT: [[TMP112:%.*]] = add i64 [[INDEX1]], 7 -; IC2-NEXT: [[TMP113:%.*]] = sub nsw i64 [[N]], [[TMP105]] +; IC2-NEXT: [[TMP113:%.*]] = sub nsw i64 [[N]], [[INDEX1]] ; IC2-NEXT: [[TMP114:%.*]] = sub nsw i64 [[N]], [[TMP106]] ; IC2-NEXT: [[TMP115:%.*]] = sub nsw i64 [[N]], [[TMP107]] ; IC2-NEXT: [[TMP116:%.*]] = sub nsw i64 [[N]], [[TMP108]] @@ -627,7 +623,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: [[TMP127:%.*]] = insertelement <8 x i64> [[TMP126]], i64 [[TMP119]], i32 6 ; IC2-NEXT: [[TMP128:%.*]] = insertelement <8 x i64> [[TMP127]], i64 [[TMP120]], i32 7 ; IC2-NEXT: [[TMP129:%.*]] = trunc <8 x i64> [[TMP128]] to <8 x i8> -; IC2-NEXT: [[TMP130:%.*]] = add i64 [[TMP105]], [[TMP113]] +; IC2-NEXT: [[TMP130:%.*]] = add i64 [[INDEX1]], [[TMP113]] ; IC2-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP130]] ; IC2-NEXT: [[TMP132:%.*]] = extractelement <8 x i8> [[TMP129]], i32 7 ; IC2-NEXT: store i8 [[TMP132]], ptr [[TMP131]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll index edf7e280d7416..b6e24352326fe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll @@ -23,7 +23,8 @@ define i32 @pr70988(ptr %src, i32 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_LOAD_CONTINUE5]] ] ; CHECK-NEXT: br i1 [[ACTIVE_LANE_MASK]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll index f67a3d9be408a..447435c369869 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll @@ -239,7 +239,7 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -1) ; CHECK-NEXT: [[TMP1:%.*]] = srem <4 x i32> [[TMP0]], [[BROADCAST_SPLAT]] @@ -288,7 +288,6 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -297,7 +296,7 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e ; CHECK: [[PRED_STORE_IF7]]: ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP25]], i32 1 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[TMP29]], 1 ; CHECK-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]] ; CHECK: [[PRED_STORE_CONTINUE8]]: @@ -306,7 +305,7 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e ; CHECK: [[PRED_STORE_IF9]]: ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i64> [[TMP25]], i32 2 ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP35]] -; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP29]], 2 ; CHECK-NEXT: store i32 [[TMP37]], ptr [[TMP36]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]] ; CHECK: [[PRED_STORE_CONTINUE10]]: @@ -315,11 +314,11 @@ define void @srem_sdiv_without_tail_folding(i32 %d.0, i32 %d.1, ptr %dst, i32 %e ; CHECK: [[PRED_STORE_IF11]]: ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i64> [[TMP25]], i32 3 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP29]], 3 ; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; CHECK: [[PRED_STORE_CONTINUE12]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP29]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index fceab6f823d5a..6a5c0df1b58a6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -552,10 +552,9 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> @@ -569,19 +568,19 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]] -; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]] -; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72 -; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[GEP_72]], align 8 -; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i64 72 +; CHECK-NEXT: [[L_P_2:%.*]] = load ptr, ptr [[TMP13]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 ; CHECK-NEXT: [[LV:%.*]] = load double, ptr [[L_P_2]], align 8 ; CHECK-NEXT: [[TMP17:%.*]] = load double, ptr [[TMP15]], align 8 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[LV]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP17]], i32 1 ; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP9]], [[TMP19]] ; CHECK-NEXT: [[TMP21]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP20]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -674,23 +673,23 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24 -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 10 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 12 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 14 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 18 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 20 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 22 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 24 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 26 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 28 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 30 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]] @@ -705,23 +704,22 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP13]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP16]], align 1 -; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP17]], align 1 -; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP18]], align 1 -; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP19]], align 1 -; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP20]], align 1 -; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP21]], align 1 -; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[TMP22]], align 1 -; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP23]], align 1 -; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[TMP24]], align 1 -; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP25]], align 1 -; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP26]], align 1 -; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP27]], align 1 -; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP28]], align 1 -; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[TMP29]], align 1 -; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[TMP30]], align 1 -; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP31]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP31]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP17]], align 1 +; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[TMP21]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP26]], align 1 +; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP27]], align 1 +; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[TMP28]], align 1 +; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP30]], align 1 ; CHECK-NEXT: [[TMP48:%.*]] = insertelement <16 x i8> poison, i8 [[TMP32]], i32 0 ; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x i8> [[TMP48]], i8 [[TMP33]], i32 1 ; CHECK-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP49]], i8 [[TMP34]], i32 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll index 99c735f777b66..8c34e6815f843 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll @@ -152,9 +152,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr % ; VF2IC1-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC1: [[PRED_STORE_IF]]: -; VF2IC1-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; VF2IC1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP5]] -; VF2IC1-NEXT: store i8 1, ptr [[TMP6]], align 1 +; VF2IC1-NEXT: store i8 1, ptr [[TMP2]], align 1 ; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE]] ; VF2IC1: [[PRED_STORE_CONTINUE]]: ; VF2IC1-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 @@ -207,9 +205,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr % ; VF2IC2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC2: [[PRED_STORE_IF]]: -; VF2IC2-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 -; VF2IC2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[TMP12]] -; VF2IC2-NEXT: store i8 1, ptr [[TMP13]], align 1 +; VF2IC2-NEXT: store i8 1, ptr [[TMP7]], align 1 ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]] ; VF2IC2: [[PRED_STORE_CONTINUE]]: ; VF2IC2-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll index c26176028626b..2db4b96270c2c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll @@ -34,12 +34,9 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4: [[VECTOR_BODY]]: ; VF4-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF4: [[PRED_STORE_IF]]: -; VF4-NEXT: [[TMP3:%.*]] = shl nsw i64 0, 1 -; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP3]] -; VF4-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -; VF4-NEXT: store i64 [[TMP5]], ptr [[TMP4]], align 8 -; VF4-NEXT: [[TMP6:%.*]] = or disjoint i64 [[TMP3]], 1 -; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]] +; VF4-NEXT: [[TMP0:%.*]] = load i64, ptr [[DATA]], align 8 +; VF4-NEXT: store i64 [[TMP0]], ptr [[DATA]], align 8 +; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 1 ; VF4-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF4-NEXT: store i64 [[TMP8]], ptr [[TMP7]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll index 6d0c55b1d246c..267e410de1f26 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll @@ -736,12 +736,11 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) { ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF2: [[VECTOR_BODY]]: ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]] +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i32>, ptr [[TMP2]], align 8 ; VF2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[WIDE_VEC]], <4 x i32> poison, <2 x i32> -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0 +; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 0 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0 ; VF2-NEXT: [[WIDE_VEC1:%.*]] = load <6 x i32>, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP13:%.*]] = shufflevector <6 x i32> [[WIDE_VEC1]], <6 x i32> poison, <2 x i32> @@ -751,7 +750,7 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) { ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1 ; VF2-NEXT: store i32 [[TMP15]], ptr [[TMP8]], align 8 ; VF2-NEXT: store i32 [[TMP16]], ptr [[TMP9]], align 8 -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1 +; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 1 ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1 ; VF2-NEXT: [[TMP23:%.*]] = mul <2 x i32> [[TMP7]], [[TMP22]] ; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP23]], i32 0 @@ -773,14 +772,13 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) { ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] ; VF4: [[VECTOR_BODY]]: ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]] +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0 +; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 0 ; VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0 ; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 0 ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 0 @@ -796,7 +794,7 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) { ; VF4-NEXT: store i32 [[TMP30]], ptr [[TMP17]], align 8 ; VF4-NEXT: store i32 [[TMP31]], ptr [[TMP18]], align 8 ; VF4-NEXT: store i32 [[TMP32]], ptr [[TMP19]], align 8 -; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1 +; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 1 ; VF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1 ; VF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 1 ; VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll index 4761cb0d63de7..a23ab6671f50b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll @@ -13,40 +13,39 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[WIDE_LOAD]], splat (i16 10) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[WIDE_LOAD]], splat (i16 10) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 ; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 +; CHECK-NEXT: [[TMP17:%.*]] = ashr exact i64 [[TMP13]], 32 ; CHECK-NEXT: [[TMP18:%.*]] = ashr exact i64 [[TMP14]], 32 ; CHECK-NEXT: [[TMP19:%.*]] = ashr exact i64 [[TMP15]], 32 ; CHECK-NEXT: [[TMP20:%.*]] = ashr exact i64 [[TMP16]], 32 -; CHECK-NEXT: [[TMP21:%.*]] = ashr exact i64 [[TMP17]], 32 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP18]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP20]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP21]] +; CHECK-NEXT: store i16 [[TMP5]], ptr [[TMP21]], align 2 ; CHECK-NEXT: store i16 [[TMP6]], ptr [[TMP22]], align 2 ; CHECK-NEXT: store i16 [[TMP7]], ptr [[TMP23]], align 2 ; CHECK-NEXT: store i16 [[TMP8]], ptr [[TMP24]], align 2 -; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP25]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_INC1286_LOOPEXIT:%.*]] ; CHECK: for.inc1286.loopexit: @@ -84,44 +83,43 @@ define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[C]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[C]], align 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT]] to <4 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[WIDE_LOAD]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP7]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP7]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[WIDE_LOAD]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 ; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = ashr exact i64 [[TMP15]], 32 ; CHECK-NEXT: [[TMP20:%.*]] = ashr exact i64 [[TMP16]], 32 ; CHECK-NEXT: [[TMP21:%.*]] = ashr exact i64 [[TMP17]], 32 ; CHECK-NEXT: [[TMP22:%.*]] = ashr exact i64 [[TMP18]], 32 -; CHECK-NEXT: [[TMP23:%.*]] = ashr exact i64 [[TMP19]], 32 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP20]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[TMP23]] +; CHECK-NEXT: store i16 [[TMP7]], ptr [[TMP23]], align 2 ; CHECK-NEXT: store i16 [[TMP8]], ptr [[TMP24]], align 2 ; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP25]], align 2 ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP26]], align 2 -; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP27]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_INC1286_LOOPEXIT:%.*]] ; CHECK: for.inc1286.loopexit: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll index dcb890670e33b..c185f3fe8a2f2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll @@ -188,7 +188,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8 -; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: [[GEPDST:%.*]] = getelementptr inbounds nuw double, ptr [[DST]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store double [[CALL]], ptr [[GEPDST]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -212,7 +212,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 ; INTERLEAVE: pred.store.if: ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 -; INTERLEAVE-NEXT: [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR4:[0-9]+]] +; INTERLEAVE-NEXT: [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR5:[0-9]+]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; INTERLEAVE-NEXT: store double [[TMP3]], ptr [[TMP4]], align 8 ; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -222,7 +222,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 1 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP5]] ; INTERLEAVE-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -; INTERLEAVE-NEXT: [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR4]] +; INTERLEAVE-NEXT: [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR5]] ; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[TMP5]] ; INTERLEAVE-NEXT: store double [[TMP8]], ptr [[TMP9]], align 8 ; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE4]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index cf035d0b2b2ee..043ddb286761c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -110,15 +110,14 @@ define void @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double> ; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x double> @__simd_sin_v2f64(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[TMP1]] ; CHECK-NEXT: store double [[TMP8]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index f2f65685e9bad..49fe1056e3ab2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -849,11 +849,10 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { ; RV64: [[VECTOR_BODY]]: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV64-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; RV64-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 ; RV64-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 -; RV64-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP0]], -1 +; RV64-NEXT: [[TMP4:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1 ; RV64-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1 ; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1 @@ -900,11 +899,10 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { ; RV32: [[VECTOR_BODY]]: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV32-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; RV32-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 ; RV32-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 -; RV32-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP0]], -1 +; RV32-NEXT: [[TMP4:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1 ; RV32-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1 ; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1 @@ -951,7 +949,6 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV64-UF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; RV64-UF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 ; RV64-UF2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 @@ -959,7 +956,7 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -5 ; RV64-UF2-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], -6 ; RV64-UF2-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -7 -; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP0]], -1 +; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP9:%.*]] = add nsw i64 [[TMP1]], -1 ; RV64-UF2-NEXT: [[TMP10:%.*]] = add nsw i64 [[TMP2]], -1 ; RV64-UF2-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP3]], -1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 55e7018c49eec..28b097e2c0536 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -602,9 +602,6 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]] ; FIXEDLEN: [[VECTOR_BODY]]: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 5 -; FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 6 ; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 7 ; FIXEDLEN-NEXT: store i64 [[TMP4]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll index 78c71fd3beb89..fab2da95f7f1d 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll @@ -20,9 +20,8 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) { ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP2]] -; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: store i64 [[OFFSET_IDX]], ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll index ee84ef243570a..c665c0754b241 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -20,13 +20,14 @@ define void @func_21() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE4]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> [[TMP21]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] @@ -45,7 +46,7 @@ define void @func_21() { ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 ; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll index cfb180594b0ec..66d032ad1fd77 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll @@ -16,15 +16,13 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i64 0, 4 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 1, 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul nsw i64 2, 4 ; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i64 3, 4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[SRC_1]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP7]], align 1 @@ -33,8 +31,7 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP10]], i32 2 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP11]], i32 3 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr @src, i64 0, i64 4 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr getelementptr inbounds nuw (i8, ptr @src, i64 16), align 4 ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0 ; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index 69505eb176f50..3b895d447c6c1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -66,7 +66,6 @@ define void @PR31671(float %x, ptr %d) #0 { ; FORCE: [[VECTOR_BODY]]: ; FORCE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCE-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5 -; FORCE-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FORCE-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5 ; FORCE-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10 ; FORCE-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15 @@ -74,7 +73,7 @@ define void @PR31671(float %x, ptr %d) #0 { ; FORCE-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 25 ; FORCE-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 30 ; FORCE-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 35 -; FORCE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[DATA:%.*]], ptr [[D]], i64 0, i32 3, i64 [[TMP0]] +; FORCE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[DATA:%.*]], ptr [[D]], i64 0, i32 3, i64 [[OFFSET_IDX]] ; FORCE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP2]] ; FORCE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP4]] ; FORCE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP6]] @@ -90,7 +89,7 @@ define void @PR31671(float %x, ptr %d) #0 { ; FORCE-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC2]] ; FORCE-NEXT: [[TMP14:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC4]] ; FORCE-NEXT: [[TMP15:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC6]] -; FORCE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP0]] +; FORCE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[OFFSET_IDX]] ; FORCE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP1]] ; FORCE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP2]] ; FORCE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP3]] @@ -192,24 +191,23 @@ define void @PR40816() #1 { ; FORCE: [[VECTOR_PH]]: ; FORCE-NEXT: br label %[[VECTOR_BODY:.*]] ; FORCE: [[VECTOR_BODY]]: -; FORCE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; FORCE-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; FORCE-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; FORCE-NEXT: [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 2) ; FORCE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; FORCE-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; FORCE: [[PRED_STORE_IF]]: -; FORCE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; FORCE-NEXT: store i32 [[TMP0]], ptr @b, align 1 ; FORCE-NEXT: br label %[[PRED_STORE_CONTINUE]] ; FORCE: [[PRED_STORE_CONTINUE]]: ; FORCE-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 ; FORCE-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; FORCE: [[PRED_STORE_IF1]]: -; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 +; FORCE-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 1 ; FORCE-NEXT: store i32 [[TMP1]], ptr @b, align 1 ; FORCE-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; FORCE: [[PRED_STORE_CONTINUE2]]: -; FORCE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; FORCE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2 ; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2) ; FORCE-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 ; FORCE-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index f65a9d7d45ed8..9c4aff56a62a4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -63,28 +63,27 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 -; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP8:%.*]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 ; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: @@ -138,28 +137,27 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 -; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP7:%.*]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: @@ -214,28 +212,27 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 -; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP9:%.*]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 ; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 ; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3 ; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 3 ; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll index 651e2ad5e74da..5eed334de2619 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll @@ -414,7 +414,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP8:%.*]] = shl nsw i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = shl nsw i64 [[TMP6]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr nusw double, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP10]] @@ -433,10 +433,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0 ; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP24:%.*]] = shl nsw i64 [[TMP3]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP24]] -; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP25]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP11]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i32 1 @@ -592,17 +589,14 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX40:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT51:%.*]], [[PRED_STORE_CONTINUE50:%.*]] ] ; CHECK-NEXT: [[TMP87:%.*]] = shl nsw i64 [[INDEX40]], 2 -; CHECK-NEXT: [[TMP89:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP87]] ; CHECK-NEXT: [[WIDE_VEC41:%.*]] = load <16 x double>, ptr [[TMP89]], align 8 ; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC41]], <16 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP90:%.*]] = fcmp oeq <4 x double> [[STRIDED_VEC42]], zeroinitializer ; CHECK-NEXT: [[TMP91:%.*]] = extractelement <4 x i1> [[TMP90]], i32 0 ; CHECK-NEXT: br i1 [[TMP91]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]] ; CHECK: pred.store.if43: -; CHECK-NEXT: [[TMP86:%.*]] = add i64 [[INDEX40]], 0 -; CHECK-NEXT: [[TMP92:%.*]] = shl nsw i64 [[TMP86]], 2 -; CHECK-NEXT: [[TMP93:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP92]] -; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP93]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP89]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE44]] ; CHECK: pred.store.continue44: ; CHECK-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP90]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 801f910c5e13d..e7c262656feff 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -90,7 +90,6 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP121:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP122:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 32 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 32 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 64 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96 @@ -106,7 +105,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 416 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 448 ; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 480 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] @@ -154,7 +153,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x float> [[TMP63]], float [[TMP60]], i32 1 ; CHECK-NEXT: [[TMP65:%.*]] = insertelement <4 x float> [[TMP64]], float [[TMP61]], i32 2 ; CHECK-NEXT: [[TMP66:%.*]] = insertelement <4 x float> [[TMP65]], float [[TMP62]], i32 3 -; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] @@ -236,11 +235,10 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x float> [ [[TMP125]], [[VEC_EPILOG_PH]] ], [ [[TMP155:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX12:%.*]] = mul i64 [[INDEX10]], 32 -; CHECK-NEXT: [[TMP126:%.*]] = add i64 [[OFFSET_IDX12]], 0 ; CHECK-NEXT: [[TMP127:%.*]] = add i64 [[OFFSET_IDX12]], 32 ; CHECK-NEXT: [[TMP128:%.*]] = add i64 [[OFFSET_IDX12]], 64 ; CHECK-NEXT: [[TMP129:%.*]] = add i64 [[OFFSET_IDX12]], 96 -; CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP126]] +; CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET_IDX12]] ; CHECK-NEXT: [[TMP131:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP127]] ; CHECK-NEXT: [[TMP132:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP128]] ; CHECK-NEXT: [[TMP133:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP129]] @@ -252,7 +250,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[TMP139:%.*]] = insertelement <4 x float> [[TMP138]], float [[TMP135]], i32 1 ; CHECK-NEXT: [[TMP140:%.*]] = insertelement <4 x float> [[TMP139]], float [[TMP136]], i32 2 ; CHECK-NEXT: [[TMP141:%.*]] = insertelement <4 x float> [[TMP140]], float [[TMP137]], i32 3 -; CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP126]] +; CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX12]] ; CHECK-NEXT: [[TMP143:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP127]] ; CHECK-NEXT: [[TMP144:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP128]] ; CHECK-NEXT: [[TMP145:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP129]] @@ -472,11 +470,10 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 40 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 80 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 120 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP7]] ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] ; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 4423b89e981b9..7ec45bedb7eb5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -287,12 +287,10 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 0, 1 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 1, 1 ; CHECK-NEXT: [[TMP8:%.*]] = sub i64 2, 1 ; CHECK-NEXT: [[TMP9:%.*]] = sub i64 3, 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP9]] @@ -300,11 +298,10 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3 -; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[PTRS]], align 8 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 4 [[TMP10]], <4 x i1> , <4 x float> poison), !invariant.load [[META0]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 0 -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP21]], align 4 +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 84579d97b38e2..86444d3354fe8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -354,7 +354,6 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 3 @@ -362,7 +361,7 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 7 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 0bac10a41640e..de2b1d1eedb81 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -151,9 +151,8 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ] ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]] ; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 @@ -166,7 +165,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: -; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -266,9 +265,8 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 -; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]] ; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 @@ -281,7 +279,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: -; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], i64 [[TMP0]], i32 1 +; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], i64 [[OFFSET_IDX]], i32 1 ; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -368,9 +366,8 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ] ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]] ; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 @@ -383,7 +380,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: -; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; FVW2-NEXT: store float [[TMP13]], ptr addrspace(1) [[TMP12]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -469,9 +466,8 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ] ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]] ; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 @@ -484,7 +480,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: -; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -570,9 +566,8 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ] ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]] ; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 @@ -585,7 +580,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: -; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; FVW2-NEXT: store float [[TMP13]], ptr addrspace(1) [[TMP12]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -779,9 +774,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; FVW2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[OFFSET_IDX9:%.*]] = mul i64 [[INDEX]], 64 -; FVW2-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX9]], 0 ; FVW2-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX9]], 64 -; FVW2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP17]] +; FVW2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[OFFSET_IDX9]] ; FVW2-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP18]] ; FVW2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]] ; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP21]], align 4, !alias.scope [[META8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 04bff3c393f62..a903cbd55304b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -255,7 +255,6 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -271,7 +270,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] @@ -585,8 +584,7 @@ define void @wide_iv_trunc(ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 0 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[DST]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll index 14fb2a76eb75b..fbdc342f0b7f3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll @@ -16,7 +16,6 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 8 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 16 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 24 @@ -32,7 +31,7 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 104 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 112 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 120 -; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[M]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP1]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP2]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll index 829fdff5123e2..297d313404679 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll @@ -20,43 +20,42 @@ define void @pr63602_1(ptr %arr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 4, [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX1]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 6 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX1]], 9 -; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 6 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 9 +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[OFFSET_IDX1]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]] +; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP11]], align 4 ; CHECK-NEXT: store i32 [[TMP8]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store i32 [[TMP9]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP14]], align 4 -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP16]] -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <12 x i32>, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP15]] +; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <12 x i32>, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC2]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC2]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = add <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC3]] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP18]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP18]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP18]], i32 2 -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP18]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC3]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP17]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP17]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP17]], i32 3 +; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP11]], align 4 ; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP14]], align 4 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -127,68 +126,66 @@ define void @pr63602_2(ptr %arr) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 4, [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX1]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX1]], 6 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX1]], 9 -; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP1]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP10]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[INDEX]], 3 +; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 4, [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX1]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX1]], 9 +; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP8]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[OFFSET_IDX1]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP9]] +; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP14]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP15]], align 4 ; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP16]], align 4 ; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP17]], align 4 -; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP18]], align 4 -; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP19]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = add nuw nsw i64 [[TMP1]], 2 -; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[TMP2]], 2 -; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[TMP3]], 2 -; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP19:%.*]] = add nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = add nuw nsw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP20]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP23]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP18]], align 4 -; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP19]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> poison, i32 [[TMP28]], i32 0 -; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 1 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 2 -; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP31]], i32 3 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP27]], i32 1 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 2 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 3 +; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP22]], align 4 +; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP23]], align 4 ; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP24]], align 4 ; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP25]], align 4 -; CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP26]], align 4 -; CHECK-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP27]], align 4 -; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> poison, i32 [[TMP36]], i32 0 -; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP37]], i32 1 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP38]], i32 2 -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP39]], i32 3 -; CHECK-NEXT: [[TMP44:%.*]] = add <4 x i32> [[TMP35]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[TMP44]], i32 0 -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i32> [[TMP44]], i32 1 -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i32> [[TMP44]], i32 2 -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x i32> [[TMP44]], i32 3 +; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i32> poison, i32 [[TMP34]], i32 0 +; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP35]], i32 1 +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP36]], i32 2 +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP37]], i32 3 +; CHECK-NEXT: [[TMP42:%.*]] = add <4 x i32> [[TMP33]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x i32> [[TMP42]], i32 0 +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[TMP42]], i32 1 +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[TMP42]], i32 2 +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i32> [[TMP42]], i32 3 +; CHECK-NEXT: store i32 [[TMP43]], ptr [[TMP14]], align 4 +; CHECK-NEXT: store i32 [[TMP44]], ptr [[TMP15]], align 4 ; CHECK-NEXT: store i32 [[TMP45]], ptr [[TMP16]], align 4 ; CHECK-NEXT: store i32 [[TMP46]], ptr [[TMP17]], align 4 -; CHECK-NEXT: store i32 [[TMP47]], ptr [[TMP18]], align 4 -; CHECK-NEXT: store i32 [[TMP48]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 78363e13595cb..92ab4c23d43e6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -106,7 +106,6 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -122,7 +121,7 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -170,7 +169,7 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -241,7 +240,6 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP97:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP98:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP99:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -257,7 +255,7 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -396,23 +394,38 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP145:%.*]], [[PRED_LOAD_CONTINUE33]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP146:%.*]], [[PRED_LOAD_CONTINUE33]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP147:%.*]], [[PRED_LOAD_CONTINUE33]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i64> [[TMP64]], i64 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <4 x i64> [[TMP69]], i64 [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP79:%.*]] = insertelement <4 x i64> [[TMP74]], i64 [[TMP3]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[TMP84:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP89:%.*]] = insertelement <4 x i64> [[TMP84]], i64 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP94:%.*]] = insertelement <4 x i64> [[TMP89]], i64 [[TMP6]], i32 2 +; CHECK-NEXT: [[TMP99:%.*]] = insertelement <4 x i64> [[TMP94]], i64 [[TMP7]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 +; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i64> poison, i64 [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP109:%.*]] = insertelement <4 x i64> [[TMP104]], i64 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP114:%.*]] = insertelement <4 x i64> [[TMP109]], i64 [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP119:%.*]] = insertelement <4 x i64> [[TMP114]], i64 [[TMP11]], i32 3 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP124:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i64> [[TMP124]], i64 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP134:%.*]] = insertelement <4 x i64> [[TMP129]], i64 [[TMP14]], i32 2 +; CHECK-NEXT: [[TMP139:%.*]] = insertelement <4 x i64> [[TMP134]], i64 [[TMP15]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -462,7 +475,7 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 ; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] @@ -653,7 +666,6 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP76:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 @@ -669,7 +681,7 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]] @@ -717,7 +729,7 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3 -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8 ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12 @@ -812,7 +824,6 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1024, [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3 @@ -828,7 +839,7 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -876,7 +887,7 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -995,7 +1006,6 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -1011,7 +1021,7 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1059,7 +1069,7 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] @@ -1166,7 +1176,6 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1182,7 +1191,7 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1230,7 +1239,7 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -1297,7 +1306,6 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1313,7 +1321,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1361,7 +1369,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -1428,7 +1436,6 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1444,7 +1451,7 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1492,7 +1499,7 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -1568,7 +1575,6 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP76:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 @@ -1584,7 +1590,7 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]] @@ -1632,7 +1638,7 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3 -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8 ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12 @@ -1728,7 +1734,6 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1744,7 +1749,7 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1792,7 +1797,7 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -1860,7 +1865,6 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1876,7 +1880,7 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1924,7 +1928,7 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -2002,7 +2006,6 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -2018,7 +2021,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -2066,7 +2069,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -2142,7 +2145,6 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -2158,7 +2160,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 39 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 42 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 45 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -2206,7 +2208,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] @@ -2331,7 +2333,6 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP56:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 12 @@ -2339,7 +2340,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 20 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 28 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -2363,7 +2364,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i1> [[TMP28]], i1 [[TMP25]], i32 1 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i1> [[TMP29]], i1 [[TMP26]], i32 2 ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i1> [[TMP30]], i1 [[TMP27]], i32 3 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] @@ -2460,7 +2461,6 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15 @@ -2476,7 +2476,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 65 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 70 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 75 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -2524,7 +2524,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] @@ -2650,7 +2650,6 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -2666,7 +2665,7 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -2714,7 +2713,7 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] @@ -2842,7 +2841,6 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP130:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP131:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -2858,7 +2856,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP1]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP3]], 2 @@ -2874,7 +2872,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[TMP13]], 2 ; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[TMP14]], 2 ; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP15]], 2 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll index cbf62865cdce5..d36260a7a5492 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll @@ -28,23 +28,23 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y ; CHECK: [[PRED_SDIV_IF]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]] ; CHECK: [[PRED_SDIV_CONTINUE]]: -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SDIV_IF4:.*]], label %[[PRED_SDIV_CONTINUE5:.*]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF4:.*]], label %[[PRED_SDIV_CONTINUE5:.*]] ; CHECK: [[PRED_SDIV_IF4]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE5]] ; CHECK: [[PRED_SDIV_CONTINUE5]]: -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SDIV_IF6:.*]], label %[[PRED_SDIV_CONTINUE7:.*]] ; CHECK: [[PRED_SDIV_IF6]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE7]] ; CHECK: [[PRED_SDIV_CONTINUE7]]: -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SDIV_IF8:.*]], label %[[PRED_SDIV_CONTINUE9:.*]] ; CHECK: [[PRED_SDIV_IF8]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE9]] ; CHECK: [[PRED_SDIV_CONTINUE9]]: -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF10:.*]], label %[[PRED_SDIV_CONTINUE11:.*]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_SDIV_IF10:.*]], label %[[PRED_SDIV_CONTINUE11:.*]] ; CHECK: [[PRED_SDIV_IF10]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE11]] ; CHECK: [[PRED_SDIV_CONTINUE11]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll index 1350e40c77e66..1396029b20c9f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -21,9 +21,6 @@ define void @foo(ptr %ptr, ptr %ptr.2) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3 ; CHECK-NEXT: store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index 34c54de2140cc..3cdffa8284ba9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -11,9 +11,8 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x ptr> [[TMP16]], ptr [[TMP2]], i32 1 @@ -35,7 +34,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll index fb5a5f9c068b9..7e08ed43a2432 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll @@ -18,8 +18,7 @@ define void @test_pr55096(i64 %c, ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[TMP4]], 2008 +; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 2008 ; CHECK-NEXT: [[TMP6:%.*]] = udiv i16 4943, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[P]], i16 [[TMP6]] ; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index 368842634c374..b1b432cda3016 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -57,11 +57,10 @@ define void @test(ptr %p) { ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0 ; VEC-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3 -; VEC-NEXT: [[TMP19:%.*]] = shl i64 [[TMP15]], 1 +; VEC-NEXT: [[TMP19:%.*]] = shl i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP20:%.*]] = shl i64 [[TMP16]], 1 ; VEC-NEXT: [[TMP21:%.*]] = shl i64 [[TMP17]], 1 ; VEC-NEXT: [[TMP22:%.*]] = shl i64 [[TMP18]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll index 3813560d9300a..e7c786b6b6e9e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll @@ -28,7 +28,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I64-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; I64-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; I64-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) -; I64-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0 ; I64-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; I64-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 2 ; I64-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 3 @@ -64,7 +63,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I64-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP23]], i32 1 ; I64-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP23]], i32 2 ; I64-NEXT: [[TMP71:%.*]] = extractelement <4 x double> [[TMP23]], i32 3 -; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[IV]] +; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX]] ; I64-NEXT: [[TMP25:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]] ; I64-NEXT: [[TMP26:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]] ; I64-NEXT: [[TMP27:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP7]] @@ -134,7 +133,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I64: [[VEC_EPILOG_VECTOR_BODY]]: ; I64-NEXT: [[INDEX4:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; I64-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; I64-NEXT: [[TMP75:%.*]] = add i32 [[INDEX4]], 0 ; I64-NEXT: [[TMP76:%.*]] = add i32 [[INDEX4]], 1 ; I64-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 2 ; I64-NEXT: [[TMP78:%.*]] = add i32 [[INDEX4]], 3 @@ -143,7 +141,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I64-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP79]], i32 1 ; I64-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP79]], i32 2 ; I64-NEXT: [[TMP91:%.*]] = extractelement <4 x double> [[TMP79]], i32 3 -; I64-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]] +; I64-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX4]] ; I64-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]] ; I64-NEXT: [[TMP86:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]] ; I64-NEXT: [[TMP93:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP78]] @@ -184,7 +182,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I32-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; I32-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; I32-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) -; I32-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; I32-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 ; I32-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2 ; I32-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3 @@ -220,7 +217,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I32-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP55]], i32 1 ; I32-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP55]], i32 2 ; I32-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP55]], i32 3 -; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP3]] +; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX]] ; I32-NEXT: [[TMP16:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP4]] ; I32-NEXT: [[TMP17:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]] ; I32-NEXT: [[TMP18:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]] @@ -290,7 +287,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I32: [[VEC_EPILOG_VECTOR_BODY]]: ; I32-NEXT: [[INDEX4:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; I32-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; I32-NEXT: [[TMP74:%.*]] = add i32 [[INDEX4]], 0 ; I32-NEXT: [[TMP75:%.*]] = add i32 [[INDEX4]], 1 ; I32-NEXT: [[TMP76:%.*]] = add i32 [[INDEX4]], 2 ; I32-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 3 @@ -299,7 +295,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I32-NEXT: [[TMP88:%.*]] = extractelement <4 x double> [[TMP78]], i32 1 ; I32-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP78]], i32 2 ; I32-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP78]], i32 3 -; I32-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP74]] +; I32-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX4]] ; I32-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]] ; I32-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]] ; I32-NEXT: [[TMP92:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]] @@ -352,11 +348,10 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n) ; I64-NEXT: br label %[[VECTOR_BODY:.*]] ; I64: [[VECTOR_BODY]]: ; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; I64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]] +; I64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] ; I64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]] ; I64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]] ; I64-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]] @@ -364,7 +359,7 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n) ; I64-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8 ; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8 ; I64-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8 -; I64-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1 +; I64-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 1 ; I64-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1 ; I64-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1 ; I64-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1 @@ -399,11 +394,10 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n) ; I32-NEXT: br label %[[VECTOR_BODY:.*]] ; I32: [[VECTOR_BODY]]: ; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; I32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]] +; I32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] ; I32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]] ; I32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]] ; I32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]] @@ -411,7 +405,7 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n) ; I32-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8 ; I32-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8 ; I32-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8 -; I32-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1 +; I32-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 1 ; I32-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1 ; I32-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1 ; I32-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1 @@ -716,7 +710,6 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias % ; I32-NEXT: br label %[[VECTOR_BODY:.*]] ; I32: [[VECTOR_BODY]]: ; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; I32-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 ; I32-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 ; I32-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 @@ -724,7 +717,7 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias % ; I32-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 5 ; I32-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6 ; I32-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 7 -; I32-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], 1 +; I32-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; I32-NEXT: [[TMP12:%.*]] = add i64 [[TMP4]], 1 ; I32-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], 1 ; I32-NEXT: [[TMP14:%.*]] = add i64 [[TMP6]], 1 @@ -798,7 +791,7 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias % ; I32-NEXT: [[TMP88:%.*]] = load float, ptr [[TMP87]], align 4 ; I32-NEXT: [[TMP90:%.*]] = load float, ptr [[TMP89]], align 4 ; I32-NEXT: [[TMP92:%.*]] = load float, ptr [[TMP91]], align 4 -; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; I32-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] ; I32-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]] ; I32-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]] @@ -864,7 +857,6 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s ; I64-NEXT: br label %[[VECTOR_BODY:.*]] ; I64: [[VECTOR_BODY]]: ; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -872,7 +864,7 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s ; I64-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 ; I64-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 ; I64-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; I64-NEXT: [[TMP8:%.*]] = mul i64 [[TMP0]], [[OFFSET]] +; I64-NEXT: [[TMP8:%.*]] = mul i64 [[INDEX]], [[OFFSET]] ; I64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP1]], [[OFFSET]] ; I64-NEXT: [[TMP10:%.*]] = mul i64 [[TMP2]], [[OFFSET]] ; I64-NEXT: [[TMP11:%.*]] = mul i64 [[TMP3]], [[OFFSET]] @@ -981,11 +973,10 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s ; I32-NEXT: br label %[[VECTOR_BODY:.*]] ; I32: [[VECTOR_BODY]]: ; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; I32-NEXT: [[TMP4:%.*]] = mul i64 [[TMP0]], [[OFFSET]] +; I32-NEXT: [[TMP4:%.*]] = mul i64 [[INDEX]], [[OFFSET]] ; I32-NEXT: [[TMP5:%.*]] = mul i64 [[TMP1]], [[OFFSET]] ; I32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], [[OFFSET]] ; I32-NEXT: [[TMP7:%.*]] = mul i64 [[TMP3]], [[OFFSET]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index 93cf59c019d5f..7ab83f04c188a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -182,7 +182,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] ; CHECK: pred.store.continue23: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[TMP18]], i64 3 -; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26]] +; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE26]] ; CHECK: pred.store.if24: ; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr @b, i64 [[TMP43]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index da48f984cb329..c1264ca2f8413 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -26,7 +26,6 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -58,15 +57,15 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]] @@ -98,7 +97,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP29]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP30]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP31]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[INT_TBAA1]] @@ -193,17 +192,16 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0 ; CHECK-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1 ; CHECK-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2 ; CHECK-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3 -; CHECK-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]] +; CHECK-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX9]] ; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP153:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX9]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP153]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[INT_TBAA1]] @@ -257,7 +255,6 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ] ; MAX-BW-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ] ; MAX-BW-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ] -; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; MAX-BW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -289,15 +286,15 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; MAX-BW-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; MAX-BW-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] -; MAX-BW-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 -; MAX-BW-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 -; MAX-BW-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 +; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX]] +; MAX-BW-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 +; MAX-BW-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 +; MAX-BW-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 ; MAX-BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] -; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] +; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP35:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP43:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]] @@ -329,7 +326,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP29]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP70:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP30]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP71:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP31]], i64 [[IDXPROM5]] -; MAX-BW-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP40]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP41]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP42]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[INT_TBAA1]] @@ -424,17 +421,16 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW: [[VEC_EPILOG_VECTOR_BODY]]: ; MAX-BW-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; MAX-BW-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; MAX-BW-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0 ; MAX-BW-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1 ; MAX-BW-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2 ; MAX-BW-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3 -; MAX-BW-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]] +; MAX-BW-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX9]] ; MAX-BW-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]] +; MAX-BW-NEXT: [[TMP153:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX9]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]] -; MAX-BW-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP154]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[TMP158:%.*]] = load i32, ptr [[TMP153]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP159:%.*]] = load i32, ptr [[TMP155]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP160:%.*]] = load i32, ptr [[TMP156]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP157]], align 4, !tbaa [[INT_TBAA1]] @@ -507,7 +503,6 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -515,7 +510,7 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> @@ -529,7 +524,7 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i8> [[TMP19]], i32 5 ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i8> [[TMP19]], i32 6 ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i8> [[TMP19]], i32 7 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP2]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP3]] @@ -562,7 +557,6 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW: [[VECTOR_BODY]]: ; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; MAX-BW-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; MAX-BW-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -578,7 +572,7 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; MAX-BW-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; MAX-BW-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP0]] +; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[OFFSET_IDX]] ; MAX-BW-NEXT: [[WIDE_VEC:%.*]] = load <32 x i32>, ptr [[TMP32]], align 4 ; MAX-BW-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> ; MAX-BW-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> @@ -600,7 +594,7 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW-NEXT: [[TMP65:%.*]] = extractelement <16 x i8> [[TMP35]], i32 13 ; MAX-BW-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[TMP35]], i32 14 ; MAX-BW-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP35]], i32 15 -; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP0]] +; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[OFFSET_IDX]] ; MAX-BW-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP1]] ; MAX-BW-NEXT: [[TMP71:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP2]] ; MAX-BW-NEXT: [[TMP72:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index dbd7019188d07..74201fef14b58 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -198,9 +198,6 @@ define void @uniform_store_varying_value(ptr align(4) %addr) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 12 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP0]], 13 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP0]], 14 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], 15 ; CHECK-NEXT: store i32 [[TMP7]], ptr [[ADDR:%.*]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll index 38617d25bfd7a..d4335961e19b2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll @@ -16,11 +16,10 @@ define void @test(ptr %A) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = shl nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP3]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index b5d42a8f71430..1a960facbd7e1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -1399,8 +1399,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] ; DISABLED_MASKED_STRIDED: pred.store.continue44: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if45: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]] @@ -1408,8 +1407,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] ; DISABLED_MASKED_STRIDED: pred.store.continue46: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if47: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]] @@ -1417,58 +1415,52 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] ; DISABLED_MASKED_STRIDED: pred.store.continue48: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if49: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], ptr [[TMP143]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] ; DISABLED_MASKED_STRIDED: pred.store.continue50: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if51: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP147]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] ; DISABLED_MASKED_STRIDED: pred.store.continue52: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if53: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP151]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] ; DISABLED_MASKED_STRIDED: pred.store.continue54: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if55: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP155]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] ; DISABLED_MASKED_STRIDED: pred.store.continue56: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if57: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], ptr [[TMP159]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] ; DISABLED_MASKED_STRIDED: pred.store.continue58: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.if59: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP163]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -1804,8 +1796,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] ; DISABLED_MASKED_STRIDED: pred.store.continue44: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if45: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]] @@ -1813,8 +1804,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] ; DISABLED_MASKED_STRIDED: pred.store.continue46: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if47: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]] @@ -1822,58 +1812,52 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] ; DISABLED_MASKED_STRIDED: pred.store.continue48: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if49: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], ptr [[TMP143]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] ; DISABLED_MASKED_STRIDED: pred.store.continue50: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if51: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP147]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] ; DISABLED_MASKED_STRIDED: pred.store.continue52: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if53: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP151]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] ; DISABLED_MASKED_STRIDED: pred.store.continue54: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if55: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP155]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] ; DISABLED_MASKED_STRIDED: pred.store.continue56: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if57: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], ptr [[TMP159]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] ; DISABLED_MASKED_STRIDED: pred.store.continue58: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.if59: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP163]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -2129,8 +2113,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] ; ENABLED_MASKED_STRIDED: pred.store.continue44: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if45: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]] @@ -2138,8 +2121,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] ; ENABLED_MASKED_STRIDED: pred.store.continue46: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if47: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]] @@ -2147,58 +2129,52 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p ; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] ; ENABLED_MASKED_STRIDED: pred.store.continue48: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if49: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], ptr [[TMP143]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] ; ENABLED_MASKED_STRIDED: pred.store.continue50: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if51: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP147]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] ; ENABLED_MASKED_STRIDED: pred.store.continue52: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if53: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP151]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] ; ENABLED_MASKED_STRIDED: pred.store.continue54: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if55: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP155]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] ; ENABLED_MASKED_STRIDED: pred.store.continue56: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] ; ENABLED_MASKED_STRIDED: pred.store.if57: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], ptr [[TMP159]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] ; ENABLED_MASKED_STRIDED: pred.store.continue58: -; ENABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] +; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE60]] ; ENABLED_MASKED_STRIDED: pred.store.if59: ; ENABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]] -; ENABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 -; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1 +; ENABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 +; ENABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP163]], align 1 ; ENABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; ENABLED_MASKED_STRIDED: pred.store.continue60: ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -2524,8 +2500,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] ; DISABLED_MASKED_STRIDED: pred.store.continue46: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = sub <8 x i8> zeroinitializer, [[TMP101]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP135]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP102]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if47: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP136:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP136]] @@ -2533,8 +2508,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP138]], ptr [[TMP137]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] ; DISABLED_MASKED_STRIDED: pred.store.continue48: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP139]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP106]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if49: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP140:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP140]] @@ -2542,58 +2516,52 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP142]], ptr [[TMP141]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] ; DISABLED_MASKED_STRIDED: pred.store.continue50: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP143]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP110]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if51: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP144]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i8> [[TMP134]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP146]], ptr [[TMP145]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = extractelement <8 x i8> [[TMP134]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP143]], ptr [[TMP145]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] ; DISABLED_MASKED_STRIDED: pred.store.continue52: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP147]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP114]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if53: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP148]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i8> [[TMP134]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP150]], ptr [[TMP149]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i8> [[TMP134]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP146]], ptr [[TMP149]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] ; DISABLED_MASKED_STRIDED: pred.store.continue54: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP151]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP118]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if55: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP152]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i8> [[TMP134]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP154]], ptr [[TMP153]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i8> [[TMP134]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP150]], ptr [[TMP153]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] ; DISABLED_MASKED_STRIDED: pred.store.continue56: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP155]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP122]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if57: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP156]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i8> [[TMP134]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP158]], ptr [[TMP157]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i8> [[TMP134]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP154]], ptr [[TMP157]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] ; DISABLED_MASKED_STRIDED: pred.store.continue58: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP159]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP126]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE60:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if59: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP160]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i8> [[TMP134]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP162]], ptr [[TMP161]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = extractelement <8 x i8> [[TMP134]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP155]], ptr [[TMP161]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP163]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP130]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE62]] ; DISABLED_MASKED_STRIDED: pred.store.if61: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP164]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP166:%.*]] = extractelement <8 x i8> [[TMP134]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP166]], ptr [[TMP165]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i8> [[TMP134]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP158]], ptr [[TMP165]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE62]] ; DISABLED_MASKED_STRIDED: pred.store.continue62: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -2958,8 +2926,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE44]] ; DISABLED_MASKED_STRIDED: pred.store.continue44: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP133:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP100]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if45: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]] @@ -2967,8 +2934,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP136]], ptr [[TMP135]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE46]] ; DISABLED_MASKED_STRIDED: pred.store.continue46: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP104]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if47: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]] @@ -2976,58 +2942,52 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP140]], ptr [[TMP139]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE48]] ; DISABLED_MASKED_STRIDED: pred.store.continue48: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP108]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE50:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if49: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP143]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP141:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP141]], ptr [[TMP143]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE50]] ; DISABLED_MASKED_STRIDED: pred.store.continue50: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP112]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE52:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if51: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP147]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP144]], ptr [[TMP147]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE52]] ; DISABLED_MASKED_STRIDED: pred.store.continue52: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP116]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE54:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if53: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP151]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP148]], ptr [[TMP151]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE54]] ; DISABLED_MASKED_STRIDED: pred.store.continue54: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP120]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE56:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if55: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP155]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP152]], ptr [[TMP155]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE56]] ; DISABLED_MASKED_STRIDED: pred.store.continue56: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP124]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE58:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if57: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP160]], ptr [[TMP159]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP153:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP153]], ptr [[TMP159]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE58]] ; DISABLED_MASKED_STRIDED: pred.store.continue58: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP128]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.if59: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]] -; DISABLED_MASKED_STRIDED-NEXT: [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 -; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1 +; DISABLED_MASKED_STRIDED-NEXT: [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7 +; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP156]], ptr [[TMP163]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: ; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll index cfc588a7f5d89..69a35d43183f3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll @@ -181,8 +181,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr align 2 [[TMP19]], <4 x i1> [[TMP0]], <4 x i16> poison) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], splat (i64 1) -; DISABLED_MASKED_STRIDED-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if8: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i64 0 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP22]] @@ -190,8 +189,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE9]] ; DISABLED_MASKED_STRIDED: pred.store.continue9: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if10: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP20]], i64 1 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP27:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP26]] @@ -199,8 +197,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP28]], ptr [[TMP27]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE11]] ; DISABLED_MASKED_STRIDED: pred.store.continue11: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] ; DISABLED_MASKED_STRIDED: pred.store.if12: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP30:%.*]] = extractelement <4 x i64> [[TMP20]], i64 2 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP31:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP30]] @@ -208,8 +205,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP32]], ptr [[TMP31]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE13]] ; DISABLED_MASKED_STRIDED: pred.store.continue13: -; DISABLED_MASKED_STRIDED-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 -; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]] +; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]] ; DISABLED_MASKED_STRIDED: pred.store.if14: ; DISABLED_MASKED_STRIDED-NEXT: [[TMP34:%.*]] = extractelement <4 x i64> [[TMP20]], i64 3 ; DISABLED_MASKED_STRIDED-NEXT: [[TMP35:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP34]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll index d29719de79ffd..d7e853931417b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll @@ -30,12 +30,11 @@ define void @example() { ; FORCED: [[VECTOR_BODY]]: ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FORCED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; FORCED-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[VEC_IND]] to <2 x x86_fp80> ; FORCED-NEXT: [[TMP5:%.*]] = extractelement <2 x x86_fp80> [[TMP2]], i32 0 ; FORCED-NEXT: [[TMP6:%.*]] = extractelement <2 x x86_fp80> [[TMP2]], i32 1 -; FORCED-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[TMP0]] +; FORCED-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[INDEX]] ; FORCED-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[TMP1]] ; FORCED-NEXT: store x86_fp80 [[TMP5]], ptr [[TMP3]], align 16 ; FORCED-NEXT: store x86_fp80 [[TMP6]], ptr [[TMP4]], align 16 @@ -44,8 +43,9 @@ define void @example() { ; FORCED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FORCED-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br [[EXIT:label %.*]] -; FORCED: [[SCALAR_PH:.*:]] +; FORCED-NEXT: br label %[[EXIT:.*]] +; FORCED: [[EXIT]]: +; FORCED-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/as_cast.ll b/llvm/test/Transforms/LoopVectorize/as_cast.ll index 67aacefebd555..2437bcfde08f9 100644 --- a/llvm/test/Transforms/LoopVectorize/as_cast.ll +++ b/llvm/test/Transforms/LoopVectorize/as_cast.ll @@ -22,7 +22,7 @@ loop: ; CHECK: [[ID2:%.*]] = add i64 %{{.*}}, 1 ; CHECK: [[AS2:%.*]] = addrspacecast ptr addrspace(1) %in to ptr ; CHECK: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[AS2]], i64 [[ID2]] -; CHECK: store i64 [[ID2]], ptr %9, align 4 +; CHECK: store i64 [[ID2]], ptr [[GEP2]], align 4 %cmp = icmp eq i64 %next, 7 br i1 %cmp, label %exit, label %loop diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll index 0b58bb65e2bc5..a70d218cc8ce3 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll @@ -119,9 +119,8 @@ define void @cast_induction_tail_folding(ptr %A) { ; IC2-LABEL: @cast_induction_tail_folding( ; IC2: [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ] -; IC2-NEXT: [[INDEX0:%.+]] = add i32 [[INDEX]], 0 ; IC2-NEXT: [[INDEX1:%.+]] = add i32 [[INDEX]], 1 -; IC2-NEXT: = icmp ule i32 [[INDEX0]], 2 +; IC2-NEXT: = icmp ule i32 [[INDEX]], 2 ; IC2-NEXT: = icmp ule i32 [[INDEX1]], 2 ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 6c63b823b7666..f76b38bfda1c7 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -243,15 +243,14 @@ define i32 @interleaved_access_forward(ptr %p, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX1]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX1]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1 @@ -390,15 +389,14 @@ define i32 @interleaved_access_reverse(ptr %p, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], -2 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP18]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[OFFSET_IDX]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP5]], i32 1 @@ -544,18 +542,13 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x ptr> [[TMP13]], ptr [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x ptr> [[TMP19]], ptr [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x ptr> [[TMP25]], ptr [[TMP6]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 8 @@ -637,10 +630,8 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) { ; INTER-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; INTER-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; INTER: [[PRED_STORE_IF]]: -; INTER-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; INTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP7]], i32 0 ; INTER-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0 -; INTER-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +; INTER-NEXT: store i32 [[TMP6]], ptr [[TMP2]], align 8 ; INTER-NEXT: br label %[[PRED_STORE_CONTINUE]] ; INTER: [[PRED_STORE_CONTINUE]]: ; INTER-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 @@ -734,11 +725,10 @@ define void @irregular_type(ptr %a, i64 %n) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX1]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP3]] @@ -778,11 +768,10 @@ define void @irregular_type(ptr %a, i64 %n) { ; INTER-NEXT: br label %[[VECTOR_BODY:.*]] ; INTER: [[VECTOR_BODY]]: ; INTER-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; INTER-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0 ; INTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 1 ; INTER-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 2 ; INTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 3 -; INTER-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX]] +; INTER-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX1]] ; INTER-NEXT: [[TMP2:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP1]] ; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP5]] ; INTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP3]] @@ -947,11 +936,10 @@ define void @pointer_iv_non_uniform_0(ptr %a, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX1]], 48 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]] ; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]] @@ -1062,11 +1050,10 @@ define void @pointer_iv_non_uniform_0(ptr %a, i64 %n) { ; INTER: [[VECTOR_BODY]]: ; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16 -; INTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0 ; INTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX1]], 16 ; INTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 32 ; INTER-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX1]], 48 -; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]] ; INTER-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]] ; INTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] ; INTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]] @@ -1181,11 +1168,10 @@ define void @pointer_iv_non_uniform_1(ptr %a, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 48 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]] ; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] @@ -1230,11 +1216,10 @@ define void @pointer_iv_non_uniform_1(ptr %a, i64 %n) { ; INTER: [[VECTOR_BODY]]: ; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16 -; INTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0 ; INTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16 ; INTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32 ; INTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 48 -; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]] ; INTER-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] ; INTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]] ; INTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] @@ -1469,11 +1454,10 @@ define void @pointer_operand_geps_with_different_indexed_types(ptr %A, ptr %B, i ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -1498,7 +1482,7 @@ define void @pointer_operand_geps_with_different_indexed_types(ptr %A, ptr %B, i ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i8> [[TMP27]], i8 [[TMP24]], i32 2 ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP32]], i8 [[TMP25]], i32 3 ; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i8> [[TMP20]], [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX1]] ; CHECK-NEXT: store <4 x i8> [[TMP29]], ptr [[TMP30]], align 1, !alias.scope [[META30:![0-9]+]], !noalias [[META27]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 4 ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll index b6d7a9f81ec9d..06e4d775d2232 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll @@ -131,9 +131,8 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa ; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i8> [[TMP31]] to <4 x i64> ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 0 ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x i64> [[TMP32]], i32 0 -; CHECK-NEXT: store i64 [[TMP35]], ptr [[TMP34]], align 4 +; CHECK-NEXT: store i64 [[TMP35]], ptr [[C]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]] @@ -185,11 +184,10 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa ; CHECK-NEXT: store i64 [[TMP49]], ptr [[TMP48]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] ; CHECK: [[PRED_STORE_CONTINUE28]]: -; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 0 ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 1 ; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 2 ; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 3 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP50]], i32 0 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i32 0 ; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x ptr> [[TMP54]], ptr [[TMP51]], i32 1 ; CHECK-NEXT: [[TMP56:%.*]] = insertelement <4 x ptr> [[TMP55]], ptr [[TMP52]], i32 2 ; CHECK-NEXT: [[TMP57:%.*]] = insertelement <4 x ptr> [[TMP56]], ptr [[TMP53]], i32 3 @@ -203,7 +201,7 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa ; CHECK-NEXT: [[TMP65:%.*]] = insertelement <4 x ptr> [[TMP64]], ptr [[TMP61]], i32 3 ; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]] ; CHECK: [[PRED_LOAD_IF29]]: -; CHECK-NEXT: [[TMP66:%.*]] = load i8, ptr [[TMP50]], align 1 +; CHECK-NEXT: [[TMP66:%.*]] = load i8, ptr [[B]], align 1 ; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i8> poison, i8 [[TMP66]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE30]] ; CHECK: [[PRED_LOAD_CONTINUE30]]: @@ -272,7 +270,7 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46:.*]] ; CHECK: [[PRED_STORE_IF45]]: ; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i8> [[TMP100]], i32 0 -; CHECK-NEXT: store i8 [[TMP102]], ptr [[TMP50]], align 1 +; CHECK-NEXT: store i8 [[TMP102]], ptr [[B]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE46]] ; CHECK: [[PRED_STORE_CONTINUE46]]: ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF47:.*]], label %[[PRED_STORE_CONTINUE48:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll index 03e0853d29075..15295ad5f0bd0 100644 --- a/llvm/test/Transforms/LoopVectorize/debugloc.ll +++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll @@ -66,8 +66,7 @@ define i32 @test_debug_loc_on_branch_in_loop(ptr noalias %src, ptr noalias %dst) ; CHECK-NEXT: br i1 [[EXT]], label %pred.store.if, label %pred.store.continue, !dbg [[LOC3]] ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: [[IDX:%.+]] = add i64 %index, 0 -; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 [[IDX]] +; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 %index ; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 ; CHECK-NEXT: br label %pred.store.continue, !dbg [[LOC3]] ; CHECK-EMPTY: @@ -105,8 +104,7 @@ define i32 @test_different_debug_loc_on_replicate_recipe(ptr noalias %src, ptr n ; CHECK-NEXT: br i1 [[EXT]], label %pred.store.if, label %pred.store.continue, !dbg [[LOC4]] ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: [[IDX:%.+]] = add i64 %index, 0 -; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 [[IDX]], !dbg [[LOC5:!.+]] +; CHECK-NEXT: [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 %index, !dbg [[LOC5:!.+]] ; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 ; CHECK-NEXT: br label %pred.store.continue, !dbg [[LOC4]] ; CHECK-EMPTY: @@ -173,11 +171,10 @@ define void @test_scalar_steps(ptr nocapture %a, ptr noalias %b, i64 %size) !dbg ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0, !dbg [[LOC8:!.+]] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2, !dbg [[LOC8]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2, !dbg [[LOC8:!.+]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll index 156c2bdca7b0e..8f7ae20288575 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -164,8 +164,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store i32 13, ptr [[TMP3]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 1a99c47aa351d..0607ec38e7e46 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -325,11 +325,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 48 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] ; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP7]] @@ -378,11 +377,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP43:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX13:%.*]] = mul i64 [[INDEX11]], 16 -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX13]], 0 ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX13]], 16 ; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[OFFSET_IDX13]], 32 ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX13]], 48 -; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP26]] +; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX13]] ; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]] ; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP28]] ; CHECK-NEXT: [[NEXT_GEP17:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP29]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll index cf2e7ccd1b2f0..fe9cafea2db84 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll @@ -15,7 +15,6 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3 @@ -23,7 +22,7 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 5 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 6 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 7 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll index 28b46726f80dc..08abf5cfefdbf 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll @@ -20,13 +20,14 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] ; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1 ; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC1: [[PRED_LOAD_IF]]: -; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 ; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 ; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -45,7 +46,7 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC1: [[PRED_STORE_IF]]: -; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 ; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0 ; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] @@ -96,16 +97,19 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1 ; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 +; VF2IC2-NEXT: [[TMP69:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 +; VF2IC2-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> [[TMP69]], i64 [[TMP7]], i32 1 ; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC2: [[PRED_LOAD_IF]]: -; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0 ; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -143,7 +147,7 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC2: [[PRED_STORE_IF]]: -; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0 ; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0 ; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] @@ -314,13 +318,14 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] ; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1 ; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC1: [[PRED_LOAD_IF]]: -; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 ; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 ; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -339,7 +344,7 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC1: [[PRED_STORE_IF]]: -; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 ; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0 ; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] @@ -386,16 +391,19 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1 ; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 +; VF2IC2-NEXT: [[TMP65:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 +; VF2IC2-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> [[TMP65]], i64 [[TMP7]], i32 1 ; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC2: [[PRED_LOAD_IF]]: -; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0 ; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -433,7 +441,7 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC2: [[PRED_STORE_IF]]: -; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0 ; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0 ; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] @@ -597,13 +605,14 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] ; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1 ; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC1: [[PRED_LOAD_IF]]: -; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 ; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 ; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -622,7 +631,7 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC1: [[PRED_STORE_IF]]: -; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 ; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0 ; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] @@ -675,16 +684,19 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1 ; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 +; VF2IC2-NEXT: [[TMP74:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 +; VF2IC2-NEXT: [[TMP75:%.*]] = insertelement <2 x i64> [[TMP74]], i64 [[TMP7]], i32 1 ; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC2: [[PRED_LOAD_IF]]: -; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0 ; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -722,7 +734,7 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC2: [[PRED_STORE_IF]]: -; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0 ; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0 ; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 063f47ce2b32d..36d4bb8110eb5 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1041,8 +1041,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 10 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 12 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 14 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2 @@ -1132,8 +1130,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2 ; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4 ; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6 ; SINK-AFTER-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2 @@ -1423,7 +1419,6 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200 -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600 @@ -1431,7 +1426,7 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1000 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1200 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 1400 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] ; UNROLL-NO-IC-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] ; UNROLL-NO-IC-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] @@ -1595,11 +1590,10 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) { ; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200 -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200 ; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600 -; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]] ; SINK-AFTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] ; SINK-AFTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] ; SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] @@ -1898,7 +1892,6 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1906,8 +1899,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDEX]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1 @@ -1942,7 +1935,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = sext <4 x i16> [[TMP34]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP37]] ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]] -; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] +; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP41]], ptr [[TMP43]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP45]], align 4 @@ -2046,12 +2039,11 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] -; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] +; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDEX]], i64 1 ; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1 ; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1 ; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1 @@ -2068,7 +2060,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER-NEXT: [[TMP19:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP20:%.*]] = sext <4 x i16> [[TMP17]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP21:%.*]] = mul nsw <4 x i32> [[TMP20]], [[TMP19]] -; SINK-AFTER-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] +; SINK-AFTER-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; SINK-AFTER-NEXT: store <4 x i32> [[TMP21]], ptr [[TMP22]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -2723,7 +2715,8 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV2]], [[TRIP_COUNT_MINUS_1]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.udiv.if: -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = udiv i32 219220132, [[OFFSET_IDX]] +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0 +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = udiv i32 219220132, [[TMP4]] ; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE]] ; UNROLL-NO-VF: pred.udiv.continue: ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ] @@ -2861,20 +2854,27 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE29]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3 +; UNROLL-NO-IC-NEXT: [[TMP76:%.*]] = insertelement <4 x i32> poison, i32 [[OFFSET_IDX]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP77:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP3]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP78:%.*]] = insertelement <4 x i32> [[TMP77]], i32 [[TMP4]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP79:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP5]], i32 3 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -4 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -5 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -6 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -7 +; UNROLL-NO-IC-NEXT: [[TMP80:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP81:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP7]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP82:%.*]] = insertelement <4 x i32> [[TMP81]], i32 [[TMP8]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP83:%.*]] = insertelement <4 x i32> [[TMP82]], i32 [[TMP9]], i32 3 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ule <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; UNROLL-NO-IC: pred.udiv.if: -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = udiv i32 219220132, [[TMP2]] +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = udiv i32 219220132, [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i32 0 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE]] ; UNROLL-NO-IC: pred.udiv.continue: @@ -2942,9 +2942,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP48]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NO-IC: pred.store.if: -; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP49]] -; UNROLL-NO-IC-NEXT: store i32 [[TMP2]], ptr [[TMP50]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]] +; UNROLL-NO-IC-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP50]], align 4 ; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NO-IC: pred.store.continue: ; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1 @@ -3097,15 +3096,18 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE12]] ] ; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_STORE_CONTINUE12]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] -; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 ; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2 ; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3 +; SINK-AFTER-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> poison, i32 [[OFFSET_IDX]], i32 0 +; SINK-AFTER-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP3]], i32 1 +; SINK-AFTER-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP4]], i32 2 +; SINK-AFTER-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP5]], i32 3 ; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; SINK-AFTER-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; SINK-AFTER-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; SINK-AFTER: pred.udiv.if: -; SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[TMP2]] +; SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[OFFSET_IDX]] ; SINK-AFTER-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0 ; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE]] ; SINK-AFTER: pred.udiv.continue: @@ -3139,9 +3141,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; SINK-AFTER-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; SINK-AFTER: pred.store.if: -; SINK-AFTER-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP26]] -; SINK-AFTER-NEXT: store i32 [[TMP2]], ptr [[TMP27]], align 4 +; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]] +; SINK-AFTER-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP27]], align 4 ; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE]] ; SINK-AFTER: pred.store.continue: ; SINK-AFTER-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index f56699a45320d..ca83000494e8b 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -1322,8 +1322,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; VEC4_INTERL1-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC4_INTERL1: pred.store.if: -; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VEC4_INTERL1-NEXT: store float [[DOTCAST2]], ptr [[TMP3]], align 4 +; VEC4_INTERL1-NEXT: store float [[DOTCAST2]], ptr [[TMP0]], align 4 ; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC4_INTERL1: pred.store.continue: ; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 @@ -1402,8 +1401,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 ; VEC4_INTERL2-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC4_INTERL2: pred.store.if: -; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VEC4_INTERL2-NEXT: store float [[DOTCAST2]], ptr [[TMP35]], align 4 +; VEC4_INTERL2-NEXT: store float [[DOTCAST2]], ptr [[TMP1]], align 4 ; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC4_INTERL2: pred.store.continue: ; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 @@ -1573,8 +1571,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC2_INTERL1_PRED_STORE: pred.store.if: -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store float [[DOTCAST2]], ptr [[TMP3]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: store float [[DOTCAST2]], ptr [[TMP0]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC2_INTERL1_PRED_STORE: pred.store.continue: ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1 diff --git a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll index 7bbc186dcbbae..0b6f3aa080a71 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll @@ -162,8 +162,7 @@ define void @dont_hoist_predicated_load(ptr %dst, ptr %invariant_ptr, ptr %cond_ ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META14:![0-9]+]] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP9]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META18:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll index 87942911e915f..aff640d154d24 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll @@ -22,15 +22,14 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META3]] ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1 @@ -38,9 +37,9 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP14]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 @@ -51,21 +50,21 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] ; CHECK: [[PRED_STORE_CONTINUE7]]: -; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10) +; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10) ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] ; CHECK: [[PRED_STORE_IF8]]: -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0 -; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META5]], !noalias [[META7]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0 +; CHECK-NEXT: store i32 [[TMP26]], ptr [[TMP21]], align 4, !alias.scope [[META5]], !noalias [[META7]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] ; CHECK: [[PRED_STORE_CONTINUE9]]: ; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] ; CHECK: [[PRED_STORE_IF10]]: ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1 -; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META5]], !noalias [[META7]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP18]], i32 1 +; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP41]], align 4, !alias.scope [[META5]], !noalias [[META7]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] ; CHECK: [[PRED_STORE_CONTINUE11]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -135,16 +134,17 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE21:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: store i32 99, ptr [[TMP10]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META15]], !noalias [[META17]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 @@ -165,7 +165,7 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 ; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META12]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -181,36 +181,36 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15:.*]] ; CHECK: [[PRED_LOAD_IF14]]: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META15]], !noalias [[META17]] ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]] ; CHECK: [[PRED_LOAD_CONTINUE15]]: -; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE13]] ], [ [[TMP29]], %[[PRED_LOAD_IF14]] ] +; CHECK-NEXT: [[TMP34:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE13]] ], [ [[TMP29]], %[[PRED_LOAD_IF14]] ] ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF16:.*]], label %[[PRED_LOAD_CONTINUE17:.*]] ; CHECK: [[PRED_LOAD_IF16]]: ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META15]], !noalias [[META17]] -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[TMP33]], i32 1 +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META15]], !noalias [[META17]] +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i32> [[TMP34]], i32 [[TMP30]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE17]] ; CHECK: [[PRED_LOAD_CONTINUE17]]: -; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE15]] ], [ [[TMP34]], %[[PRED_LOAD_IF16]] ] -; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10) +; CHECK-NEXT: [[TMP42:%.*]] = phi <2 x i32> [ [[TMP34]], %[[PRED_LOAD_CONTINUE15]] ], [ [[TMP35]], %[[PRED_LOAD_IF16]] ] +; CHECK-NEXT: [[TMP33:%.*]] = add <2 x i32> [[TMP42]], splat (i32 10) ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] ; CHECK: [[PRED_STORE_IF18]]: -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0 -; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META19]], !noalias [[META12]] +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i32> [[TMP33]], i32 0 +; CHECK-NEXT: store i32 [[TMP36]], ptr [[TMP38]], align 4, !alias.scope [[META19]], !noalias [[META12]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE19]] ; CHECK: [[PRED_STORE_CONTINUE19]]: ; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21]] ; CHECK: [[PRED_STORE_IF20]]: ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1 -; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META19]], !noalias [[META12]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP33]], i32 1 +; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP41]], align 4, !alias.scope [[META19]], !noalias [[META12]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE21]] ; CHECK: [[PRED_STORE_CONTINUE21]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -290,17 +290,16 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META22:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]] -; CHECK-NEXT: store i32 10, ptr [[TMP10]], align 4, !alias.scope [[META25:![0-9]+]], !noalias [[META27:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[INDEX]] +; CHECK-NEXT: store i32 10, ptr [[TMP12]], align 4, !alias.scope [[META25:![0-9]+]], !noalias [[META27:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 @@ -310,44 +309,42 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: store i32 10, ptr [[TMP16]], align 4, !alias.scope [[META25]], !noalias [[META27]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] ; CHECK: [[PRED_STORE_CONTINUE20]]: -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META30:![0-9]+]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META30]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META30:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP13]], align 4, !alias.scope [[META30]] ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5) -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] +; CHECK-NEXT: [[TMP17:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5) +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] ; CHECK: [[PRED_STORE_IF21]]: -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 -; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0 +; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] ; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] ; CHECK: [[PRED_STORE_IF23]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 -; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 +; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] ; CHECK: [[PRED_STORE_CONTINUE24]]: -; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) +; CHECK-NEXT: [[TMP24:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] ; CHECK: [[PRED_STORE_IF25]]: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[TMP38]], i32 0 -; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4, !alias.scope [[META31]], !noalias [[META32]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP24]], i32 0 +; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP28]], align 4, !alias.scope [[META31]], !noalias [[META32]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] ; CHECK: [[PRED_STORE_CONTINUE26]]: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]] ; CHECK: [[PRED_STORE_IF27]]: ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP38]], i32 1 -; CHECK-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4, !alias.scope [[META31]], !noalias [[META32]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP24]], i32 1 +; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP43]], align 4, !alias.scope [[META31]], !noalias [[META32]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] ; CHECK: [[PRED_STORE_CONTINUE28]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -419,16 +416,17 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE17:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META35:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META35:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META38:![0-9]+]], !noalias [[META40:![0-9]+]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META38]], !noalias [[META40]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 @@ -449,7 +447,7 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 ; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META42:![0-9]+]], !noalias [[META35]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -465,19 +463,19 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] ; CHECK: [[PRED_STORE_IF14]]: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !alias.scope [[META42]], !noalias [[META35]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP32]], align 4, !alias.scope [[META42]], !noalias [[META35]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]] ; CHECK: [[PRED_STORE_CONTINUE15]]: ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17]] ; CHECK: [[PRED_STORE_IF16]]: ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP31]], align 4, !alias.scope [[META38]], !noalias [[META40]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP31]], align 4, !alias.scope [[META38]], !noalias [[META40]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP33]], align 4, !alias.scope [[META42]], !noalias [[META35]] +; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP33]], align 4, !alias.scope [[META42]], !noalias [[META35]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]] ; CHECK: [[PRED_STORE_CONTINUE17]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -560,17 +558,16 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META45:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META45:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]] -; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META48:![0-9]+]], !noalias [[META50:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[INDEX]] +; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4, !alias.scope [[META48:![0-9]+]], !noalias [[META50:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 @@ -580,34 +577,32 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4, !alias.scope [[META48]], !noalias [[META50]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] ; CHECK: [[PRED_STORE_CONTINUE20]]: -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META53:![0-9]+]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META53]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META53:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP13]], align 4, !alias.scope [[META53]] ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] +; CHECK-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] ; CHECK: [[PRED_STORE_IF21]]: -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 -; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0 +; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] ; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] ; CHECK: [[PRED_STORE_IF23]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 -; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 +; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] ; CHECK: [[PRED_STORE_CONTINUE24]]: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] ; CHECK: [[PRED_STORE_IF25]]: -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP31]], align 4, !alias.scope [[META54]], !noalias [[META55]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP29]], align 4, !alias.scope [[META54]], !noalias [[META55]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] ; CHECK: [[PRED_STORE_CONTINUE26]]: ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 @@ -686,35 +681,34 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META58:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META58:![0-9]+]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META61:![0-9]+]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META65:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP11]], align 4, !alias.scope [[META61:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP9]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META65:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] ; CHECK: [[PRED_STORE_IF6]]: ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META61]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META61]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP16]], align 4, !alias.scope [[META63]], !noalias [[META65]] +; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP16]], align 4, !alias.scope [[META63]], !noalias [[META65]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] ; CHECK: [[PRED_STORE_CONTINUE7]]: ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] ; CHECK: [[PRED_STORE_IF8]]: -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 10, ptr [[TMP18]], align 4, !alias.scope [[META63]], !noalias [[META65]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store i32 10, ptr [[TMP15]], align 4, !alias.scope [[META63]], !noalias [[META65]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] ; CHECK: [[PRED_STORE_CONTINUE9]]: ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 @@ -788,20 +782,19 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE15:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META68:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP11]], align 4, !alias.scope [[META68:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] -; CHECK-NEXT: store i32 20, ptr [[TMP6]], align 4, !alias.scope [[META71:![0-9]+]], !noalias [[META73:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META71]], !noalias [[META73]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[TMP0]] -; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP8]], align 4, !alias.scope [[META75:![0-9]+]], !noalias [[META68]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store i32 20, ptr [[TMP7]], align 4, !alias.scope [[META71:![0-9]+]], !noalias [[META73:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META71]], !noalias [[META73]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[INDEX]] +; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP14]], align 4, !alias.scope [[META75:![0-9]+]], !noalias [[META68]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 @@ -809,16 +802,16 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr ; CHECK: [[PRED_STORE_IF10]]: ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]] ; CHECK-NEXT: store i32 20, ptr [[TMP10]], align 4, !alias.scope [[META71]], !noalias [[META73]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META71]], !noalias [[META73]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META71]], !noalias [[META73]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[TMP1]] -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP12]], align 4, !alias.scope [[META75]], !noalias [[META68]] +; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP12]], align 4, !alias.scope [[META75]], !noalias [[META68]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] ; CHECK: [[PRED_STORE_CONTINUE11]]: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] ; CHECK: [[PRED_STORE_IF12]]: -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] -; CHECK-NEXT: store i32 10, ptr [[TMP14]], align 4, !alias.scope [[META71]], !noalias [[META73]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store i32 10, ptr [[TMP19]], align 4, !alias.scope [[META71]], !noalias [[META73]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]] ; CHECK: [[PRED_STORE_CONTINUE13]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 @@ -884,9 +877,8 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE9:.*]] ] ; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16 -; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[GEP_SRC]], i32 0 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[TMP22]], i32 1 @@ -903,7 +895,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1 ; CHECK-NEXT: [[TMP33:%.*]] = xor <2 x i1> [[TMP31]], splat (i1 true) ; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00) -; CHECK-NEXT: [[GEP_DST1_ELSE:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: [[GEP_DST1_ELSE:%.*]] = getelementptr double, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x ptr> poison, ptr [[GEP_DST1_ELSE]], i32 0 ; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x ptr> [[TMP38]], ptr [[TMP37]], i32 1 @@ -949,19 +941,17 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK-NEXT: [[TMP60:%.*]] = extractelement <2 x i1> [[TMP31]], i32 0 ; CHECK-NEXT: br i1 [[TMP60]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] ; CHECK: [[PRED_STORE_IF6]]: -; CHECK-NEXT: [[TMP62:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store double [[TMP13]], ptr [[TMP62]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[TMP62]], i64 8 -; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP64]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: store double [[TMP13]], ptr [[GEP_DST1_ELSE]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[GEP_DST1_ELSE]], i64 8 +; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP42]], align 8, !alias.scope [[META81]], !noalias [[META78]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] ; CHECK: [[PRED_STORE_CONTINUE7]]: ; CHECK-NEXT: [[TMP66:%.*]] = extractelement <2 x i1> [[TMP31]], i32 1 ; CHECK-NEXT: br i1 [[TMP66]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9]] ; CHECK: [[PRED_STORE_IF8]]: -; CHECK-NEXT: [[TMP68:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] -; CHECK-NEXT: store double [[TMP14]], ptr [[TMP68]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr [[TMP68]], i64 8 -; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP70]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: store double [[TMP14]], ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr [[TMP37]], i64 8 +; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP55]], align 8, !alias.scope [[META81]], !noalias [[META78]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] ; CHECK: [[PRED_STORE_CONTINUE9]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll index f6dd8564c001b..724a3cb10eb3c 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll @@ -22,21 +22,20 @@ define void @test(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP1]], align 4, !alias.scope [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]] ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP36]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP36]], <2 x i32> [[TMP25]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP37]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP10]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP38]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -100,16 +99,17 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE13:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 ; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP15]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -128,7 +128,7 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) { ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 ; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]] ; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP49]], align 4 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] @@ -144,7 +144,7 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) { ; CHECK: [[PRED_LOAD_CONTINUE13]]: ; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP32]], %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP27]], %[[PRED_LOAD_IF12]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP33]], <2 x i32> [[TMP23]] -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP34]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 @@ -213,16 +213,15 @@ define void @non_complementary_masks(ptr %dst, ptr %src, ptr %cond1, ptr %cond2) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE17:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND1]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[COND2]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META14:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x i32>, ptr [[TMP25]], align 4, !alias.scope [[META17:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[COND1]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND2]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META14:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META17:![0-9]+]] ; CHECK-NEXT: [[TMP37:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP38:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD11]], splat (i32 20) ; CHECK-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[TMP37]], splat (i1 true) @@ -262,7 +261,7 @@ define void @non_complementary_masks(ptr %dst, ptr %src, ptr %cond1, ptr %cond2) ; CHECK-NEXT: [[TMP32:%.*]] = phi <2 x i32> [ [[TMP36]], %[[PRED_LOAD_CONTINUE15]] ], [ [[TMP31]], %[[PRED_LOAD_IF16]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP19]], <2 x i32> [[TMP28]], <2 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI18:%.*]] = select <2 x i1> [[TMP37]], <2 x i32> [[TMP32]], <2 x i32> [[PREDPHI]] -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI18]], ptr [[TMP41]], align 4, !alias.scope [[META21:![0-9]+]], !noalias [[META23:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 @@ -332,13 +331,12 @@ define void @different_access_sizes(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> [[TMP8]], ptr [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META26:![0-9]+]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true) @@ -377,7 +375,7 @@ define void @different_access_sizes(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i8> [ [[TMP25]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] ; CHECK-NEXT: [[TMP30:%.*]] = zext <2 x i8> [[TMP29]] to <2 x i32> ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP30]], <2 x i32> [[TMP21]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META33:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 @@ -441,21 +439,20 @@ define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META36:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META36:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 2, !alias.scope [[META39:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP1]], align 2, !alias.scope [[META39:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 2, !alias.scope [[META39]] ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1 ; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i32> [[TMP25]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP25]], <2 x i32> [[TMP26]] -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP35]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP10]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] @@ -581,21 +578,20 @@ define void @duplicate_gep(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META46:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META46:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META49:![0-9]+]] ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META49]] ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0 ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP8]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP29]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP29]], <2 x i32> [[TMP19]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4, !alias.scope [[META51:![0-9]+]], !noalias [[META53:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP10]], align 4, !alias.scope [[META51:![0-9]+]], !noalias [[META53:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] @@ -660,21 +656,20 @@ define void @non_unit_stride_i64(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP6]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META56:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META56:![0-9]+]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META59:![0-9]+]] ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]] ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0 ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> [[TMP25]], i32 [[TMP10]], i32 1 ; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP31]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[TMP31]], <2 x i32> [[TMP21]] -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP6]] -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP32]], align 4, !alias.scope [[META61:![0-9]+]], !noalias [[META63:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP11]], align 4, !alias.scope [[META61:![0-9]+]], !noalias [[META63:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP33]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP64:![0-9]+]] @@ -784,16 +779,17 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE10:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP43]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP64]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -815,7 +811,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP32]], i32 0 ; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] ; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] @@ -835,7 +831,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP30]], i32 0 ; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]] ; CHECK: [[PRED_LOAD_IF7]]: -; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP61]], align 4 ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x i32> poison, i32 [[TMP34]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]] @@ -852,7 +848,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s ; CHECK-NEXT: [[TMP45:%.*]] = phi <2 x i32> [ [[TMP35]], %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP44]], %[[PRED_LOAD_IF9]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP32]], <2 x i32> [[TMP22]], <2 x i32> [[TMP15]] ; CHECK-NEXT: [[TMP42:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[TMP45]], <2 x i32> [[PREDPHI]] -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 32 ; CHECK-NEXT: store <2 x i32> [[TMP42]], ptr [[TMP40]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -924,12 +920,11 @@ define void @hoist_predicated_load_with_chained_geps1(ptr %dst, ptr %src, i1 %co ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i64 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META68:![0-9]+]] -; CHECK-NEXT: store i16 [[TMP4]], ptr [[DST]], align 2, !alias.scope [[META71:![0-9]+]], !noalias [[META68]] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META68:![0-9]+]] +; CHECK-NEXT: store i16 [[TMP3]], ptr [[DST]], align 2, !alias.scope [[META71:![0-9]+]], !noalias [[META68]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP73:![0-9]+]] @@ -984,12 +979,11 @@ define void @hoist_predicated_load_with_chained_geps2(ptr %dst, ptr %src, i1 %co ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 -; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META75:![0-9]+]] -; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META78:![0-9]+]], !noalias [[META75]] +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META75:![0-9]+]] +; CHECK-NEXT: store i16 [[TMP3]], ptr [[DST]], align 2, !alias.scope [[META78:![0-9]+]], !noalias [[META75]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP80:![0-9]+]] @@ -1042,13 +1036,12 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> [[TMP8]], ptr [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP12:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 20) @@ -1108,7 +1101,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP42:%.*]] = phi <2 x i32> [ [[TMP38]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP41]], %[[PRED_LOAD_IF10]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP34]], <2 x i32> [[TMP24]] ; CHECK-NEXT: [[PREDPHI16:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP42]], <2 x i32> [[PREDPHI]] -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI16]], ptr [[TMP43]], align 4, !alias.scope [[META85:![0-9]+]], !noalias [[META82]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index f9dd626e523e8..d41bf087880a8 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -82,11 +82,9 @@ define i32 @test(ptr nocapture %f) #0 { ; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; VEC-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VEC: pred.store.if: -; VEC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP5]] ; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 ; VEC-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP7]], 20 -; VEC-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 4 +; VEC-NEXT: store i32 [[TMP8]], ptr [[TMP1]], align 4 ; VEC-NEXT: br label [[PRED_STORE_CONTINUE]] ; VEC: pred.store.continue: ; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 @@ -305,10 +303,8 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4 ; VEC-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if: -; VEC-NEXT: [[INDVARS_IV3:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[INDVARS_IV3]] ; VEC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 -; VEC-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX16]], align 4 +; VEC-NEXT: store i32 [[TMP11]], ptr [[TMP7]], align 4 ; VEC-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 1 ; VEC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP12]] ; VEC-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 @@ -444,10 +440,8 @@ define void @minimal_bit_widths(ptr %p, i1 %c) { ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP1]], align 1 ; VEC-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if: -; VEC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP8]] ; VEC-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0 -; VEC-NEXT: store i8 [[TMP4]], ptr [[TMP3]], align 1 +; VEC-NEXT: store i8 [[TMP4]], ptr [[TMP1]], align 1 ; VEC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP5]] ; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1 @@ -553,10 +547,8 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) { ; VEC-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP2]], align 1 ; VEC-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if: -; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP0]] ; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0 -; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1 +; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP2]], align 1 ; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP7]] ; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll index ccf05d73945ff..d0c3eaf0f716a 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll @@ -11,14 +11,12 @@ define void @multiple_iv_uses_in_same_instruction(ptr %ptr) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR:%.*]], i64 0, i64 [[TMP0]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR:%.*]], i64 0, i64 [[INDEX]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR]], i64 0, i64 [[TMP1]], i64 [[TMP1]] -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 ; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 diff --git a/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll b/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll index 8f773e2f0edd3..a2e60c46ebf59 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll @@ -22,9 +22,8 @@ define void @int_iv_based_on_pointer_iv(ptr %A) { ; VF2: vector.body: ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 -; VF2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]] +; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: store i8 0, ptr [[TMP9]], align 1 ; VF2-NEXT: store i8 0, ptr [[TMP10]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index b6fb378a042fd..614af1547a1d9 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -863,15 +863,14 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP12]], align 4 @@ -1065,11 +1064,10 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]] +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] @@ -1081,7 +1079,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP10]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]] +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] @@ -1237,9 +1235,8 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 8 @@ -1391,11 +1388,10 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) { ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[TMP0]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP1]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP2]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP3]], i32 1 @@ -1564,19 +1560,18 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP26]], align 1, !alias.scope [[META17:![0-9]+]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1, !alias.scope [[META17]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP26]], align 1, !alias.scope [[META17:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 1, !alias.scope [[META17]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP10]], i32 1 -; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] -; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]] +; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] +; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1774,7 +1769,6 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 3 @@ -1788,18 +1782,18 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] ; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP37]] -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP18]], align 1, !alias.scope [[META17:![0-9]+]] -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP20]], align 1, !alias.scope [[META17]] -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP22]], align 1, !alias.scope [[META17]] -; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP38]], align 1, !alias.scope [[META17]] -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[TMP9]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP18]], align 1, !alias.scope [[META17:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 1, !alias.scope [[META17]] +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP22]], align 1, !alias.scope [[META17]] +; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP38]], align 1, !alias.scope [[META17]] +; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP10]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP11]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP12]], i32 1 -; UNROLL-NO-IC-NEXT: store i32 [[TMP23]], ptr [[TMP27]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] -; UNROLL-NO-IC-NEXT: store i32 [[TMP24]], ptr [[TMP28]], align 1, !alias.scope [[META20]], !noalias [[META17]] -; UNROLL-NO-IC-NEXT: store i32 [[TMP25]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]] -; UNROLL-NO-IC-NEXT: store i32 [[TMP26]], ptr [[TMP30]], align 1, !alias.scope [[META20]], !noalias [[META17]] +; UNROLL-NO-IC-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] +; UNROLL-NO-IC-NEXT: store i32 [[TMP23]], ptr [[TMP28]], align 1, !alias.scope [[META20]], !noalias [[META17]] +; UNROLL-NO-IC-NEXT: store i32 [[TMP24]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]] +; UNROLL-NO-IC-NEXT: store i32 [[TMP25]], ptr [[TMP30]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -2428,13 +2422,12 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP4]], i32 1 ; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP8]], align 2 @@ -2590,7 +2583,6 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 @@ -2602,7 +2594,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc <2 x i32> [[TMP8]] to <2 x i16> ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[TMP10]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[TMP3]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP4]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP5]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP6]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 64caecc847096..5b11efdc0f865 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -138,9 +138,8 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue4 ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i1 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i1 false, [[TMP0]] -; CHECK-NEXT: [[INDUCTION:%.*]] = add i1 [[OFFSET_IDX]], false ; CHECK-NEXT: [[INDUCTION3:%.*]] = add i1 [[OFFSET_IDX]], true -; CHECK-NEXT: br i1 [[INDUCTION]], label %pred.store.if, label %pred.store.continue +; CHECK-NEXT: br i1 [[OFFSET_IDX]], label %pred.store.if, label %pred.store.continue ; CHECK: pred.store.if: ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll index 9bbd67059e84d..f64c649604512 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll @@ -20,26 +20,25 @@ define void @merge_tbaa_interleave_group(ptr nocapture readonly %p, ptr noalias ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_VEC4R:%.*]], ptr [[P]], i64 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_VEC4R:%.*]], ptr [[P]], i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP3]], align 8, !tbaa [[TBAA0]] ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], splat (double 2.000000e+00) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [20 x [[STRUCT_VEC2R:%.*]]], ptr [[CP]], i64 0, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP10]], align 8, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8, !tbaa [[TBAA5:![0-9]+]] ; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP11]], align 8, !tbaa [[TBAA5]] ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> [[TMP14]], double [[TMP13]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[TMP15]], splat (double 3.000000e+00) ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP16]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP17]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP9]], align 8, !tbaa [[TBAA6:![0-9]+]] +; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 8, !tbaa [[TBAA6:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll index 16a56f3a38ac9..1faaea56be173 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll @@ -35,10 +35,8 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[DOTSPLIT3:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT3]], i64 8 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 -; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP3]], align 8 +; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP0]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1 @@ -134,9 +132,8 @@ define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 -; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP1]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1 @@ -242,10 +239,8 @@ define void @interleaved_with_cond_store_2(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[DOTSPLIT3:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[DOTSPLIT3]], i64 8 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 -; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP3]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1 diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 4f19a7c586bc3..6a03bd3859829 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1068,9 +1068,8 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 -; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]] +; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]] ; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP1]], align 2 ; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]] ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -1138,9 +1137,8 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2 -; VEC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2 -; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]] +; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[OFFSET_IDX]] ; VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] ; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2 ; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll b/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll index e02eb433f9247..3510391ffde1d 100644 --- a/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll +++ b/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll @@ -89,11 +89,10 @@ define void @test3(ptr %p) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[POS_337:%.*]] = add i32 [[ADD41]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[ADD41]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[ADD41]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ADD41]], 3 -; CHECK-NEXT: [[INC46:%.*]] = add i32 [[POS_337]], 1 +; CHECK-NEXT: [[INC46:%.*]] = add i32 [[ADD41]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP3]], 1 @@ -103,8 +102,7 @@ define void @test3(ptr %p) { ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP7]], i32 3 ; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[IDXPROM4738:%.*]] = add i64 [[IDXPROM4736]], 0 -; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P:%.*]], i64 0, i64 [[IDXPROM4738]] +; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P:%.*]], i64 0, i64 [[IDXPROM4736]] ; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX48]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 8d3d0ff7a6406..44c7f099e5436 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -191,8 +191,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DEST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], [[TMP17]] @@ -309,8 +308,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = shl nsw i32 [[TMP11]], 2 ; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4 @@ -396,11 +394,9 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[TMP13:%.*]] = shl nsw i32 [[TMP10]], 2 ; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -411,8 +407,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP15]] ; CHECK-NEXT: [[TMP20:%.*]] = shl nsw i32 [[TMP17]], 2 ; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] @@ -556,13 +551,10 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP12]], i32 1 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = shl nsw i32 [[TMP11]], 2 ; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -675,7 +667,7 @@ define void @adding_offset_overflows(i32 %n, ptr %A) { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] +; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE3]] ; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[C]], i64 [[TMP10]] diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll index b14a1cdff92c2..0f66692aacf06 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll @@ -21,9 +21,10 @@ define i8 @test_negative_off(i16 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_LOAD_CONTINUE2]] ] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 -1000, [[DOTCAST]] -; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i16 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i16 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = load i1, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = load i1, ptr [[TMP3]], align 1 @@ -31,7 +32,7 @@ define i8 @test_negative_off(i16 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i1> [[TMP6]], i1 [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i8> poison, i8 [[TMP10]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll index 5e88072517b37..3175879ae29d7 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll @@ -47,8 +47,7 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) { ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]] ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -67,8 +66,9 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) { ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -123,8 +123,7 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -141,10 +140,11 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) { ; CHECK: [[PRED_STORE_CONTINUE2]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -194,8 +194,7 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -211,10 +210,11 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of ; CHECK: [[PRED_STORE_CONTINUE2]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -269,8 +269,7 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) { ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -287,10 +286,11 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) { ; CHECK: [[PRED_STORE_CONTINUE2]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -333,7 +333,7 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1, !noundef [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1, !noundef [[META6:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10) ; CHECK-NEXT: [[TMP2:%.*]] = sub i16 1, [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[TMP2]], [[TMP0]] @@ -343,8 +343,7 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -360,10 +359,11 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) { ; CHECK: [[PRED_STORE_CONTINUE2]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop.header diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index 4db3d1eed4771..b43c35b873f8c 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -1061,9 +1061,7 @@ define void @scalar_predication(ptr %addr) { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[ADDR]], i64 [[TMP0]] -; CHECK-NEXT: store float 1.000000e+01, ptr [[TMP6]], align 4 +; CHECK-NEXT: store float 1.000000e+01, ptr [[TMP1]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 @@ -1299,9 +1297,8 @@ define i16 @multiple_exit_none_via_latch(ptr %dst, i64 %x) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: store i64 0, ptr [[TMP5]], align 8 ; CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll b/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll index 9339aed927960..0fa47f71851e6 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll @@ -17,9 +17,8 @@ define i16 @multiple_exit_one_with_constant_condition(ptr %dst, i64 %x) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: store i64 0, ptr [[TMP5]], align 8 ; CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll index 30ee4803de607..efc6c870a9a79 100644 --- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll @@ -34,8 +34,7 @@ define void @maxvf3() { ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[INDEX]] ; CHECK-NEXT: store i8 69, ptr [[TMP3]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: @@ -48,19 +47,17 @@ define void @maxvf3() { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw <2 x i32> splat (i32 3), [[VEC_IND]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP9]] ; CHECK-NEXT: store i8 7, ptr [[TMP10]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 -; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] +; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP12]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP11]] ; CHECK-NEXT: store i8 7, ptr [[TMP13]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: diff --git a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll index b19f9c5a3b60d..1a13377a291ff 100644 --- a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll @@ -77,9 +77,9 @@ define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly ; CHECK-LABEL: define void @predicated_sincos( ; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { ; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_BODY:.*]]: -; CHECK: [[VECTOR_BODY1:.*:]] -; CHECK: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; CHECK: [[VECTOR_BODY1:.*]]: +; CHECK: [[VECTOR_BODY:.*:]] +; CHECK: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_BODY1]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_END:.*]] ] ; CHECK: [[TMP4:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 0 ; CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 1 @@ -93,16 +93,16 @@ define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly ; CHECK: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; CHECK: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[IF_THEN1]] +; CHECK: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[FOR_END]] ; CHECK: [[PRED_STORE_IF1]]: ; CHECK: [[TMP15:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 ; CHECK: store float [[TMP15]], ptr [[TMP14:%.*]], align 4 ; CHECK: [[TMP17:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 ; CHECK: store float [[TMP17]], ptr [[TMP16:%.*]], align 4 -; CHECK: br label %[[IF_THEN1]] -; CHECK: [[IF_THEN1]]: -; CHECK: [[IF_MERGE:.*:]] -; CHECK: [[FOR_END:.*:]] +; CHECK: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: +; CHECK: [[MIDDLE_BLOCK:.*:]] +; CHECK: [[FOR_END1:.*:]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll index 1736b66511bfb..3f8644c18957c 100644 --- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll @@ -14,9 +14,6 @@ define void @narrow_select_to_single_scalar(i1 %invar.cond, ptr noalias %A, ptr ; VF4IC1: [[VECTOR_BODY]]: ; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF4IC1-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; VF4IC1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0 -; VF4IC1-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1 -; VF4IC1-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 2 ; VF4IC1-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 3 ; VF4IC1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP5]] ; VF4IC1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1 @@ -41,7 +38,6 @@ define void @narrow_select_to_single_scalar(i1 %invar.cond, ptr noalias %A, ptr ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; VF2IC2-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2 ; VF2IC2-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3 ; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP3]] ; VF2IC2-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 1 @@ -86,7 +82,6 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts( ; VF4IC1: [[VECTOR_BODY]]: ; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF4IC1-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; VF4IC1-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; VF4IC1-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; VF4IC1-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 @@ -99,7 +94,7 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts( ; VF4IC1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP7]] ; VF4IC1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP9]] ; VF4IC1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP11]] -; VF4IC1-NEXT: store i32 [[TMP0]], ptr [[TMP6]], align 4 +; VF4IC1-NEXT: store i32 [[INDEX]], ptr [[TMP6]], align 4 ; VF4IC1-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 ; VF4IC1-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4 ; VF4IC1-NEXT: store i32 [[TMP3]], ptr [[TMP12]], align 4 @@ -120,11 +115,10 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts( ; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF2IC2-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1 ; VF2IC2-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 2 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 3 -; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP7]], 1 +; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[INDEX]], 1 ; VF2IC2-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 1 ; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]] ; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/operand-bundles.ll b/llvm/test/Transforms/LoopVectorize/operand-bundles.ll index ce0736486de28..18bd16873c1d5 100644 --- a/llvm/test/Transforms/LoopVectorize/operand-bundles.ll +++ b/llvm/test/Transforms/LoopVectorize/operand-bundles.ll @@ -142,18 +142,17 @@ define void @assume_loop_variant_operand_bundle(ptr noalias %a, ptr noalias %b) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 -; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP0]]) ] +; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[INDEX]]) ] ; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP1]]) ] ; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP2]]) ] ; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP3]]) ] ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600 diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 1a1c05187590e..a0f00bf77a451 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -797,7 +797,6 @@ define void @multiple_ivs_wide(ptr %dst) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 6 @@ -806,7 +805,7 @@ define void @multiple_ivs_wide(ptr %dst) { ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] @@ -833,7 +832,6 @@ define void @multiple_ivs_wide(ptr %dst) { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX1]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], 6 @@ -842,7 +840,7 @@ define void @multiple_ivs_wide(ptr %dst) { ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP19]], i32 1 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP19]], i32 2 ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP19]], i32 3 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP17]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP18]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll index 94392f856c386..515ef2b9b8636 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll @@ -15,22 +15,20 @@ define void @wide_ptr_induction_index_width_smaller_than_iv_width(ptr noalias %s ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], 8 -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 8 ; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 16 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 24 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP6]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP16]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP17]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index d96134e8adf1d..c23efe18da607 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -25,18 +25,13 @@ define void @a(ptr readnone %b) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]] -; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -3 @@ -46,8 +41,7 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: store i8 95, ptr [[TMP9]], align 1 +; CHECK-NEXT: store i8 95, ptr [[TMP3]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 @@ -536,15 +530,13 @@ define i64 @ivopt_widen_ptr_indvar_2(ptr noalias %a, i64 %stride, i64 %n) { ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]] -; STRIDED-NEXT: [[TMP8:%.*]] = mul i64 0, [[TMP1]] -; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]] ; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP1]] ; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]] ; STRIDED-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP1]] ; STRIDED-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], [[TMP12]] ; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP1]] ; STRIDED-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], [[TMP14]] -; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]] +; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[OFFSET_IDX]] ; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP13]] ; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP15]] @@ -643,12 +635,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) { ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]] -; STRIDED-NEXT: [[TMP4:%.*]] = mul i64 0, [[TMP1]] -; STRIDED-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] -; STRIDED-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP1]] -; STRIDED-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], [[TMP6]] -; STRIDED-NEXT: [[TMP8:%.*]] = mul i64 2, [[TMP1]] -; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]] ; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]] ; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]] ; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 33b3d263e634a..cc0a52a08503f 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -37,20 +37,19 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[START]], [[N_VEC]] -; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = xor i1 [[TMP13]], true ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: store i32 10, ptr [[B]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 -; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]] +; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_IF2]]: ; CHECK-NEXT: store i32 10, ptr [[B]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index 9ed35fb0a79e8..b1f00466d871a 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -23,8 +23,7 @@ define void @pr45679(ptr %A) { ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: store i32 13, ptr [[TMP3]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: @@ -75,8 +74,7 @@ define void @pr45679(ptr %A) { ; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF2UF2: pred.store.if: -; VF2UF2-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 -; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]] +; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; VF2UF2-NEXT: store i32 13, ptr [[TMP4]], align 1 ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]] ; VF2UF2: pred.store.continue: @@ -97,7 +95,7 @@ define void @pr45679(ptr %A) { ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE5]] ; VF2UF2: pred.store.continue4: ; VF2UF2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; VF2UF2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] +; VF2UF2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE7]] ; VF2UF2: pred.store.if5: ; VF2UF2-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 3 ; VF2UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP12]] @@ -120,17 +118,16 @@ define void @pr45679(ptr %A) { ; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF1UF4: vector.body: ; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; VF1UF4-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; VF1UF4-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; VF1UF4-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; VF1UF4-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i32 [[TMP0]], 13 +; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i32 [[INDEX]], 13 ; VF1UF4-NEXT: [[TMP5:%.*]] = icmp ule i32 [[TMP1]], 13 ; VF1UF4-NEXT: [[TMP6:%.*]] = icmp ule i32 [[TMP2]], 13 ; VF1UF4-NEXT: [[TMP7:%.*]] = icmp ule i32 [[TMP3]], 13 ; VF1UF4-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF1UF4: pred.store.if: -; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] +; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; VF1UF4-NEXT: store i32 13, ptr [[TMP8]], align 1 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] ; VF1UF4: pred.store.continue: @@ -188,8 +185,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 ; CHECK-NEXT: store i64 [[TMP4]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -244,8 +240,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF2UF2: pred.store.if: -; VF2UF2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] +; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; VF2UF2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 ; VF2UF2-NEXT: store i64 [[TMP5]], ptr [[B:%.*]], align 8 ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -269,7 +264,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE5]] ; VF2UF2: pred.store.continue4: ; VF2UF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; VF2UF2-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] +; VF2UF2-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE7]] ; VF2UF2: pred.store.if5: ; VF2UF2-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3 ; VF2UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP18]] @@ -293,17 +288,16 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF1UF4: vector.body: ; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; VF1UF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; VF1UF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; VF1UF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; VF1UF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 13 +; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i64 [[INDEX]], 13 ; VF1UF4-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 13 ; VF1UF4-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 13 ; VF1UF4-NEXT: [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 13 ; VF1UF4-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF1UF4: pred.store.if: -; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; VF1UF4-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF1UF4-NEXT: store i64 [[TMP9]], ptr [[B:%.*]], align 8 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 3276528e54225..d4405ca17c4cb 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -18,12 +18,9 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC1-NEXT: br label %[[VECTOR_BODY:.*]] ; IC1: [[VECTOR_BODY]]: ; IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE13:.*]] ] -; IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]] +; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] -; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1 ; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12) ; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13) @@ -113,18 +110,13 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC2-NEXT: br label %[[VECTOR_BODY:.*]] ; IC2: [[VECTOR_BODY]]: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] -; IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 -; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]] +; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] -; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1 ; IC2-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] ; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] -; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0 -; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1 ; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index 43dede0b612f3..328fdc7793319 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -1207,11 +1207,11 @@ for.end: define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-NEXT: [[_LR_PH1:.*]]: +; CHECK-NEXT: [[_LR_PH:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] +; CHECK: [[_LR_PH1:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1231,11 +1231,11 @@ define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocaptu ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-INTERLEAVED-NEXT: [[_LR_PH1:.*]]: +; CHECK-INTERLEAVED-NEXT: [[_LR_PH:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] +; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -2264,11 +2264,14 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP3]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 @@ -2284,7 +2287,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[INDEX]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP23]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -2342,15 +2345,22 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP94:%.*]], %[[PRED_LOAD_CONTINUE15]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP98:%.*]], %[[PRED_LOAD_CONTINUE15]] ] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP1]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP2]], i32 2 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP3]], i32 3 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 +; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP5]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP6]], i32 2 +; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP7]], i32 3 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[INDEX]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 @@ -2379,7 +2389,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP41]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF]]: -; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = insertelement <4 x i32> poison, i32 [[TMP44]], i32 0 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index dfdf1100eb57b..202352d380da4 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -329,11 +329,10 @@ define void @reduc_store_inside_unrolled(ptr %dst, ptr readonly %src) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]] @@ -535,11 +534,10 @@ define void @reduc_store_middle_store_predicated(ptr %dst, ptr readonly %src) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll index fa03c62bb4927..ffb16d07f3ea8 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll @@ -488,8 +488,7 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 ; CHECK-VF4-IC1-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK-VF4-IC1: pred.store.if: -; CHECK-VF4-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-VF4-IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] +; CHECK-VF4-IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-VF4-IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] ; CHECK-VF4-IC1-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], 1 ; CHECK-VF4-IC1-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META6]] @@ -607,8 +606,7 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK-VF4-IC2: pred.store.if: -; CHECK-VF4-IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-VF4-IC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] +; CHECK-VF4-IC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-VF4-IC2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP17]], 1 ; CHECK-VF4-IC2-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4, !alias.scope [[META9]], !noalias [[META6]] diff --git a/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll index ca32808bc482a..ca4bd56531f1d 100644 --- a/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll @@ -82,7 +82,8 @@ define float @pr72720reduction_using_active_lane_mask(ptr %src) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[VEC_IV1]], 14 ; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[SRC]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[SRC]], i32 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 4 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: diff --git a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll index b9be77ff224ff..a944712ac62e1 100644 --- a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll +++ b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll @@ -488,14 +488,14 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) { ; VF4-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP15]], 0 ; VF4-NEXT: [[TMP17:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP12]], 0 ; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP17]], float [[TMP16]], i32 1 -; VF4-NEXT: [[TMP19:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP12]], <4 x float> [[TMP18]], 0 +; VF4-NEXT: [[TMP55:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP12]], <4 x float> [[TMP18]], 0 ; VF4-NEXT: [[TMP20:%.*]] = extractvalue { float, float } [[TMP15]], 1 -; VF4-NEXT: [[TMP21:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP19]], 1 +; VF4-NEXT: [[TMP21:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP55]], 1 ; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP20]], i32 1 -; VF4-NEXT: [[TMP23:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP19]], <4 x float> [[TMP22]], 1 +; VF4-NEXT: [[TMP23:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP55]], <4 x float> [[TMP22]], 1 ; VF4-NEXT: br label %[[PRED_CALL_CONTINUE2]] ; VF4: [[PRED_CALL_CONTINUE2]]: -; VF4-NEXT: [[TMP24:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP12]], %[[PRED_CALL_CONTINUE]] ], [ [[TMP19]], %[[PRED_CALL_IF1]] ] +; VF4-NEXT: [[TMP24:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP12]], %[[PRED_CALL_CONTINUE]] ], [ [[TMP55]], %[[PRED_CALL_IF1]] ] ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 ; VF4-NEXT: br i1 [[TMP25]], label %[[PRED_CALL_IF3:.*]], label %[[PRED_CALL_CONTINUE4:.*]] ; VF4: [[PRED_CALL_IF3]]: @@ -533,37 +533,35 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) { ; VF4-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; VF4-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF4: [[PRED_STORE_IF]]: -; VF4-NEXT: [[TMP52:%.*]] = add i64 [[INDEX]], 0 -; VF4-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP52]] -; VF4-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP50]], i32 0 -; VF4-NEXT: store float [[TMP54]], ptr [[TMP53]], align 8 +; VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x float> [[TMP50]], i32 0 +; VF4-NEXT: store float [[TMP52]], ptr [[TMP0]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]] ; VF4: [[PRED_STORE_CONTINUE]]: -; VF4-NEXT: [[TMP55:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; VF4-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; VF4-NEXT: [[TMP53:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; VF4-NEXT: br i1 [[TMP53]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] ; VF4: [[PRED_STORE_IF7]]: ; VF4-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], 1 -; VF4-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP56]] -; VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x float> [[TMP50]], i32 1 -; VF4-NEXT: store float [[TMP58]], ptr [[TMP57]], align 8 +; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP56]] +; VF4-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP50]], i32 1 +; VF4-NEXT: store float [[TMP54]], ptr [[TMP19]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE8]] ; VF4: [[PRED_STORE_CONTINUE8]]: -; VF4-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; VF4-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; VF4-NEXT: [[TMP57:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; VF4-NEXT: br i1 [[TMP57]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] ; VF4: [[PRED_STORE_IF9]]: -; VF4-NEXT: [[TMP60:%.*]] = add i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP60]] -; VF4-NEXT: [[TMP62:%.*]] = extractelement <4 x float> [[TMP50]], i32 2 -; VF4-NEXT: store float [[TMP62]], ptr [[TMP61]], align 8 +; VF4-NEXT: [[TMP58:%.*]] = add i64 [[INDEX]], 2 +; VF4-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP58]] +; VF4-NEXT: [[TMP60:%.*]] = extractelement <4 x float> [[TMP50]], i32 2 +; VF4-NEXT: store float [[TMP60]], ptr [[TMP59]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE10]] ; VF4: [[PRED_STORE_CONTINUE10]]: -; VF4-NEXT: [[TMP63:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; VF4-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]] +; VF4-NEXT: [[TMP61:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; VF4-NEXT: br i1 [[TMP61]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]] ; VF4: [[PRED_STORE_IF11]]: -; VF4-NEXT: [[TMP64:%.*]] = add i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP64]] -; VF4-NEXT: [[TMP66:%.*]] = extractelement <4 x float> [[TMP50]], i32 3 -; VF4-NEXT: store float [[TMP66]], ptr [[TMP65]], align 8 +; VF4-NEXT: [[TMP62:%.*]] = add i64 [[INDEX]], 3 +; VF4-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP62]] +; VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x float> [[TMP50]], i32 3 +; VF4-NEXT: store float [[TMP64]], ptr [[TMP63]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; VF4: [[PRED_STORE_CONTINUE12]]: ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -590,12 +588,9 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) { ; VF2IC2: [[PRED_STORE_IF]]: ; VF2IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 ; VF2IC2-NEXT: [[TMP6:%.*]] = tail call { float, float } @fn2(float [[TMP5]]) #[[ATTR3:[0-9]+]] -; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP6]], 0 -; VF2IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] -; VF2IC2-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 -; VF2IC2-NEXT: [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP10]] -; VF2IC2-NEXT: store float [[TMP11]], ptr [[TMP9]], align 8 +; VF2IC2-NEXT: [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP5]] +; VF2IC2-NEXT: store float [[TMP11]], ptr [[TMP0]], align 8 ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]] ; VF2IC2: [[PRED_STORE_CONTINUE]]: ; VF2IC2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 @@ -606,8 +601,7 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) { ; VF2IC2-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 1 ; VF2IC2-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP14]], 0 ; VF2IC2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]] -; VF2IC2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 -; VF2IC2-NEXT: [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP18]] +; VF2IC2-NEXT: [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP13]] ; VF2IC2-NEXT: store float [[TMP19]], ptr [[TMP17]], align 8 ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE3]] ; VF2IC2: [[PRED_STORE_CONTINUE3]]: @@ -619,8 +613,7 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) { ; VF2IC2-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 2 ; VF2IC2-NEXT: [[TMP24:%.*]] = extractvalue { float, float } [[TMP22]], 0 ; VF2IC2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]] -; VF2IC2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0 -; VF2IC2-NEXT: [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP26]] +; VF2IC2-NEXT: [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP21]] ; VF2IC2-NEXT: store float [[TMP27]], ptr [[TMP25]], align 8 ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE5]] ; VF2IC2: [[PRED_STORE_CONTINUE5]]: @@ -629,11 +622,10 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) { ; VF2IC2: [[PRED_STORE_IF6]]: ; VF2IC2-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1 ; VF2IC2-NEXT: [[TMP30:%.*]] = tail call { float, float } @fn2(float [[TMP29]]) #[[ATTR3]] -; VF2IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 3 +; VF2IC2-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 3 ; VF2IC2-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP30]], 0 -; VF2IC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] -; VF2IC2-NEXT: [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1 -; VF2IC2-NEXT: [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP34]] +; VF2IC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP26]] +; VF2IC2-NEXT: [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP29]] ; VF2IC2-NEXT: store float [[TMP35]], ptr [[TMP33]], align 8 ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE7]] ; VF2IC2: [[PRED_STORE_CONTINUE7]]: diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll index 70c6c7e900c51..a0a59d00bd951 100644 --- a/llvm/test/Transforms/LoopVectorize/struct-return.ll +++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll @@ -278,11 +278,8 @@ define void @scalarized_predicated_struct_return(ptr %a) { ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = tail call { i64, i64 } @bar_i64(i64 [[TMP3]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i64, i64 } [[TMP4]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] -; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = udiv i64 [[TMP5]], [[TMP3]] +; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP0]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 @@ -291,8 +288,7 @@ define void @scalarized_predicated_struct_return(ptr %a) { ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = tail call { i64, i64 } @bar_i64(i64 [[TMP11]]) #[[ATTR2]] ; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i64, i64 } [[TMP12]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = udiv i64 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = udiv i64 [[TMP13]], [[TMP11]] ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP16]] ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll index 9e523be618b44..b282c69d72813 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll @@ -9,49 +9,42 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56) ; CHECK-NEXT: [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP18]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 -; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP3]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 -; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP7]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] ; CHECK: [[PRED_STORE_IF3]]: -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 -; CHECK-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP11]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; CHECK: [[PRED_STORE_CONTINUE4]]: ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 ; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_IF5]]: -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP2]], 3 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 -; CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 +; CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP15]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_CONTINUE6]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll index 00e04c7daee51..eade1724c0b80 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll @@ -9,13 +9,12 @@ define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 14) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -23,12 +22,12 @@ define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]] ; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -60,13 +59,12 @@ define void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 -2) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -74,12 +72,12 @@ define void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]] ; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], splat (i8 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -111,13 +109,12 @@ define void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 256) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -125,12 +122,12 @@ define void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]] ; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 258 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -162,13 +159,12 @@ define void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 -2) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -176,12 +172,12 @@ define void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]] ; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -213,13 +209,12 @@ define void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 65536) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -227,12 +222,12 @@ define void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]] ; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65538 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -264,13 +259,12 @@ define void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 -2) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -278,12 +272,12 @@ define void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]] ; CHECK-NEXT: store i16 1, ptr [[TMP24]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967296 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -315,13 +309,12 @@ define void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 4294967296) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -329,12 +322,12 @@ define void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]] ; CHECK-NEXT: store i16 1, ptr [[TMP6]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[TMP8]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP8]] = add i64 [[TMP2]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP8]], 4294967298 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -366,13 +359,12 @@ define void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP2]] ; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -380,12 +372,12 @@ define void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]] ; CHECK-NEXT: store i16 1, ptr [[TMP6]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[TMP8]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP8]] = add i64 [[TMP2]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP8]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -417,13 +409,12 @@ define void @canonical_lower_limit_i128(ptr nocapture noundef writeonly %p) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i256 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i256 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i128> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i128> [[VEC_IND]], splat (i128 18446744073709551616) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP2:%.*]] = add i256 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP2]] ; CHECK-NEXT: store i16 1, ptr [[TMP3]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -431,12 +422,12 @@ define void @canonical_lower_limit_i128(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i256 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i256 [[TMP2]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[TMP5]] ; CHECK-NEXT: store i16 1, ptr [[TMP6]], align 2 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[TMP8]] = add i256 [[INDEX]], 2 +; CHECK-NEXT: [[TMP8]] = add i256 [[TMP2]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i128> [[VEC_IND]], splat (i128 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i256 [[TMP8]], 18446744073709551618 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll index b6f43aaa86e33..86a060fb50875 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll @@ -13,14 +13,13 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP5]] ; CHECK-NEXT: store i32 0, ptr [[TMP6]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -28,7 +27,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP5]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP8]] ; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] @@ -36,7 +35,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 ; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] ; CHECK: [[PRED_STORE_IF3]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP11]] ; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]] @@ -44,12 +43,12 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 ; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_IF5]]: -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP5]], 3 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP14]] ; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_CONTINUE6]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP5]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll index 541ee96ac760b..fbb9c5f14c86c 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -17,17 +17,16 @@ define void @VF1-VPlanExe(ptr %dst) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 14 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 14 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 14 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 14 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll index 586804bac9429..adb6b029a7112 100644 --- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll +++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll @@ -156,9 +156,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[IV]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]] -; CHECK-NEXT: store i32 1, ptr [[TMP9]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 @@ -267,9 +265,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[IV]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]] -; CHECK-NEXT: store i32 1, ptr [[TMP9]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index 24dc182fe24a1..3b7cc7e95c4a7 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -160,17 +160,16 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 2 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 3 -; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> poison, ptr [[A]], i32 0 ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x ptr> [[TMP35]], ptr [[TMP6]], i32 1 ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x ptr> [[TMP36]], ptr [[TMP7]], i32 2 ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x ptr> [[TMP37]], ptr [[TMP8]], i32 3 ; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; CHECK: [[PRED_LOAD_CONTINUE]]: @@ -201,7 +200,7 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) { ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i32 0 -; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll index f80d7b695e2af..7b27b16a5f261 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll @@ -149,28 +149,27 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 -; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -204,21 +203,20 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 -; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -253,28 +251,27 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 -; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -309,28 +306,27 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 -; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -364,28 +360,27 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 -; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -418,21 +413,20 @@ define void @ld_div3_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 -; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -606,28 +600,27 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -661,21 +654,20 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 -; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -710,28 +702,27 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -766,28 +757,27 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -822,28 +812,27 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -877,21 +866,20 @@ define void @ld_div3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 -; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -924,22 +912,21 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <2 x i32> [[VEC_IND]], [[VEC_IND]] -; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = udiv <2 x i16> [[TMP4]], splat (i16 6) -; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <2 x i32> [[VEC_IND]], [[VEC_IND]] +; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = udiv <2 x i16> [[TMP3]], splat (i16 6) +; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP8]] +; CHECK-NEXT: store i16 [[TMP0]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP9]], align 2 -; CHECK-NEXT: store i16 [[TMP2]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll index 0f191b2d8a278..9fc807e955f8b 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll @@ -149,28 +149,27 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 -; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -204,19 +203,18 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP6]], align 8 -; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -251,28 +249,27 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 -; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -306,28 +303,27 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 -; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -411,28 +407,27 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -467,28 +462,27 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -523,28 +517,27 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3) -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll index 7ff10c544f72a..8aa58a009169d 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll @@ -283,14 +283,14 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP2]] @@ -298,7 +298,7 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP10]], align 4 @@ -306,24 +306,24 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p ; CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP14]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP15]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> poison, i64 [[TMP16]], i32 0 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 1 -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 2 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x i64> [[TMP26]], i64 [[TMP19]], i32 3 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x i64> [[TMP27]], i64 [[TMP20]], i32 4 -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x i64> [[TMP28]], i64 [[TMP21]], i32 5 -; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i64> [[TMP29]], i64 [[TMP22]], i32 6 -; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x i64> [[TMP30]], i64 [[TMP23]], i32 7 -; CHECK-NEXT: [[TMP32:%.*]] = udiv <8 x i64> [[TMP31]], splat (i64 2) -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i64> [[TMP32]], i32 0 -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP32]], i32 1 -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP32]], i32 2 -; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP32]], i32 3 -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP32]], i32 4 -; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP32]], i32 5 -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i64> [[TMP32]], i32 6 -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <8 x i64> [[TMP32]], i32 7 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x i64> poison, i64 [[TMP15]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> [[TMP23]], i64 [[TMP16]], i32 1 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 2 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 3 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x i64> [[TMP26]], i64 [[TMP19]], i32 4 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x i64> [[TMP27]], i64 [[TMP20]], i32 5 +; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x i64> [[TMP28]], i64 [[TMP21]], i32 6 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i64> [[TMP29]], i64 [[TMP22]], i32 7 +; CHECK-NEXT: [[TMP31:%.*]] = udiv <8 x i64> [[TMP30]], splat (i64 2) +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <8 x i64> [[TMP31]], i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i64> [[TMP31]], i32 1 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP31]], i32 2 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP31]], i32 3 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP31]], i32 4 +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP31]], i32 5 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP31]], i32 6 +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i64> [[TMP31]], i32 7 +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP32]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP33]] ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP34]] ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP35]] @@ -331,7 +331,7 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP37]] ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP38]] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP39]] -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[SRC_B]], i64 [[TMP40]] +; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP40]], align 4 ; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP41]], align 4 ; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP42]], align 4 ; CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP43]], align 4 @@ -339,20 +339,19 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p ; CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP45]], align 4 ; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP46]], align 4 ; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP47]], align 4 -; CHECK-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP48]], align 4 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <8 x i32> poison, i32 [[TMP49]], i32 0 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <8 x i32> [[TMP57]], i32 [[TMP50]], i32 1 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <8 x i32> [[TMP58]], i32 [[TMP51]], i32 2 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <8 x i32> [[TMP59]], i32 [[TMP52]], i32 3 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <8 x i32> [[TMP60]], i32 [[TMP53]], i32 4 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <8 x i32> [[TMP61]], i32 [[TMP54]], i32 5 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <8 x i32> [[TMP62]], i32 [[TMP55]], i32 6 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <8 x i32> [[TMP63]], i32 [[TMP56]], i32 7 -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP0]] -; CHECK-NEXT: store <8 x i32> [[TMP64]], ptr [[TMP65]], align 4 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <8 x i32> poison, i32 [[TMP48]], i32 0 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <8 x i32> [[TMP56]], i32 [[TMP49]], i32 1 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <8 x i32> [[TMP57]], i32 [[TMP50]], i32 2 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <8 x i32> [[TMP58]], i32 [[TMP51]], i32 3 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <8 x i32> [[TMP59]], i32 [[TMP52]], i32 4 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <8 x i32> [[TMP60]], i32 [[TMP53]], i32 5 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <8 x i32> [[TMP61]], i32 [[TMP54]], i32 6 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <8 x i32> [[TMP62]], i32 [[TMP55]], i32 7 +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]] +; CHECK-NEXT: store <8 x i32> [[TMP63]], ptr [[TMP64]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP66:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP66]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP65:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP65]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll index a5bb07f1fd4ef..8f2f424305282 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll @@ -229,28 +229,27 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer -; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer +; VF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 -; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 -; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) -; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 -; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; VF2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 +; VF2-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) +; VF2-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 +; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 +; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8 ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 -; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) -; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: @@ -266,44 +265,43 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer -; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 -; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1 -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3 +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer +; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 +; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 +; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] -; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] +; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 -; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 -; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i32 0 -; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 -; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) -; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0 -; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1 -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 +; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0 +; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) +; VF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0 +; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1 +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3 +; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; VF4-NEXT: store i64 [[TMP21]], ptr [[TMP25]], align 8 ; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP26]], align 8 ; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 -; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) -; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF4-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; VF4-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: @@ -337,21 +335,20 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2: vector.body: ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 1 -; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] -; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 -; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) -; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF2-NEXT: [[TMP1:%.*]] = lshr i64 [[OFFSET_IDX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[TMP3:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) +; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8 ; VF2-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 -; VF2-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; VF2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: @@ -366,29 +363,28 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4: vector.body: ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0]], 1 -; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 -; VF4-NEXT: [[TMP6:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP3:%.*]] = lshr i64 [[OFFSET_IDX]], 1 +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; VF4-NEXT: [[TMP5:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; VF4-NEXT: store i64 [[TMP6]], ptr [[TMP10]], align 8 ; VF4-NEXT: store i64 [[TMP7]], ptr [[TMP11]], align 8 ; VF4-NEXT: store i64 [[TMP8]], ptr [[TMP12]], align 8 ; VF4-NEXT: store i64 [[TMP9]], ptr [[TMP13]], align 8 -; VF4-NEXT: store i64 [[TMP10]], ptr [[TMP14]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; VF4-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: @@ -423,28 +419,27 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer -; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer +; VF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 -; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 -; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) -; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 -; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; VF2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 +; VF2-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) +; VF2-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 +; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 +; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8 ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 -; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -459,44 +454,43 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer -; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 -; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1 -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3 +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 9 +; VF4-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[VEC_IND]], zeroinitializer +; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 +; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 +; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] -; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] +; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 -; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 -; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i32 0 -; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 -; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) -; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0 -; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1 -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 +; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0 +; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) +; VF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0 +; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1 +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3 +; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; VF4-NEXT: store i64 [[TMP21]], ptr [[TMP25]], align 8 ; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP26]], align 8 ; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 -; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VF4-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF4-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -530,28 +524,27 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 -; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF2-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 ; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 -; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 -; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 -; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) -; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 -; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; VF2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1 +; VF2-NEXT: [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42) +; VF2-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 +; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 +; VF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: store i64 [[TMP11]], ptr [[TMP13]], align 8 ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 -; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VF2-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF2-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -566,44 +559,43 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 -; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1 -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3 +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 9 +; VF4-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 +; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 +; VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] -; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] +; VF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8 ; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 -; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 -; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i32 0 -; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 -; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) -; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0 -; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1 -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 +; VF4-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0 +; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3 +; VF4-NEXT: [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42) +; VF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0 +; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1 +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP20]], i32 2 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP20]], i32 3 +; VF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; VF4-NEXT: store i64 [[TMP21]], ptr [[TMP25]], align 8 ; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP26]], align 8 ; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 -; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VF4-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF4-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -725,21 +717,20 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 1 -; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] -; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF2-NEXT: [[TMP2:%.*]] = lshr i64 [[OFFSET_IDX]], 1 +; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 +; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; VF2-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 ; VF2-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 -; VF2-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 +; VF2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -754,29 +745,28 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP1]], 1 -; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 -; VF4-NEXT: [[TMP7:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2 -; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 +; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP4:%.*]] = lshr i64 [[OFFSET_IDX]], 1 +; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; VF4-NEXT: [[TMP6:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 +; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] -; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] +; VF4-NEXT: store i64 [[TMP7]], ptr [[TMP11]], align 8 ; VF4-NEXT: store i64 [[TMP8]], ptr [[TMP12]], align 8 ; VF4-NEXT: store i64 [[TMP9]], ptr [[TMP13]], align 8 ; VF4-NEXT: store i64 [[TMP10]], ptr [[TMP14]], align 8 -; VF4-NEXT: store i64 [[TMP11]], ptr [[TMP15]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; VF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; VF4-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 +; VF4-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -811,28 +801,27 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 -; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) -; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 -; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 +; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 +; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) +; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -848,44 +837,43 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 +; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3 +; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] -; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] +; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 -; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 -; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 -; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) -; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0 -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 +; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i32 0 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 +; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) +; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0 +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 +; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] -; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] +; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP26]], align 8 ; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 -; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -920,28 +908,27 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 +; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 +; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] -; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] +; VF2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8 ; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 -; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 -; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 -; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) -; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 -; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; VF2-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0 +; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1 +; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) +; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 +; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 +; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) -; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -957,44 +944,43 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 +; VF4-NEXT: [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0 +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3 +; VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] -; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] +; VF4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 ; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 -; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 -; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 -; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) -; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0 -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 +; VF4-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i32 0 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3 +; VF4-NEXT: [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42) +; VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP21]], i32 0 +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 +; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] -; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] +; VF4-NEXT: store i64 [[TMP22]], ptr [[TMP26]], align 8 ; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 -; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) -; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index fb962c017156d..42d7a75b4b6bc 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -304,31 +304,30 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) -; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] -; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] +; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] +; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) -; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) +; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 -; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: @@ -345,47 +344,46 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) -; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]] +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] +; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 -; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 +; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 +; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 -; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: @@ -425,31 +423,30 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] -; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] +; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] +; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) -; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) +; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 -; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: @@ -466,47 +463,46 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) -; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]] +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] +; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 -; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 +; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 +; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 -; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: @@ -546,31 +542,30 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) -; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] -; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) +; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] +; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] +; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) -; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) +; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 -; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] ; VF2: exit: @@ -587,47 +582,46 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3) -; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3) +; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]] +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] +; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 -; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 +; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 +; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 -; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 +; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] ; VF4: exit: @@ -667,31 +661,30 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) -; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] -; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] +; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] +; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) -; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) +; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 -; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -707,47 +700,46 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) -; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 9 +; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]] +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] +; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 -; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 +; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 +; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 -; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -786,31 +778,30 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] -; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] +; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] +; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) -; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) +; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 -; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -826,47 +817,46 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) -; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 9 +; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]] +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] +; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 -; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 +; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 +; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 -; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -905,31 +895,30 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) -; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] -; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF2-NEXT: [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) +; VF2-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] +; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; VF2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] +; VF2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8 ; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 -; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 -; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) -; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1 +; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) +; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 +; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 -; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -945,47 +934,46 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3) -; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] -; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 9 +; VF4-NEXT: [[TMP3:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3) +; VF4-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP3]], [[TMP4]] +; VF4-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] +; VF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 -; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 -; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) -; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3 +; VF4-NEXT: [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42) +; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP22]], i32 0 +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 +; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] -; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] +; VF4-NEXT: store i64 [[TMP23]], ptr [[TMP27]], align 8 ; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 -; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1322,31 +1310,30 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) -; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] -; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 -; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 -; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 +; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 +; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 -; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1363,47 +1350,46 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) -; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2 -; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 +; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) +; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 +; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] -; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 -; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 -; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 +; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 +; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] -; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] +; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 -; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1443,31 +1429,30 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] -; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 -; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 -; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 +; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 +; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 -; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1484,47 +1469,46 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) -; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2 -; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 +; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) +; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 +; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] -; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 -; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 -; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 +; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 +; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] -; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] +; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 -; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1564,31 +1548,30 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) -; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] -; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 -; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 -; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 +; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 +; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 -; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1605,47 +1588,46 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3) -; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2 -; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 +; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3) +; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 +; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] -; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 -; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 -; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 +; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 +; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] -; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] +; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 -; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1685,31 +1667,30 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) -; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 1) +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] -; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 -; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 -; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 +; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 +; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 -; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1726,47 +1707,46 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) -; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2 -; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 1) +; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 +; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] -; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 -; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 -; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 +; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 +; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] -; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] +; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 -; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1806,31 +1786,30 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 2) +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] -; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 -; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 -; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 +; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 +; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 -; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1847,47 +1826,46 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) -; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2 -; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 2) +; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 +; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] -; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 -; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 -; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 +; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 +; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] -; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] +; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 -; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1927,31 +1905,30 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) -; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) -; VF2-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] -; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 -; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) +; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 +; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 +; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] -; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8 ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0 -; VF2-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) -; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 -; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 +; VF2-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0 +; VF2-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1 +; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) +; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 +; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] -; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] +; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 -; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add nsw <2 x i64> [[VEC_IND1]], splat (i64 2) -; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1968,47 +1945,46 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 -; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 -; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) -; VF4-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3) -; VF4-NEXT: [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]] -; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0 -; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 -; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2 -; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3 +; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 +; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 +; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 +; VF4-NEXT: [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3) +; VF4-NEXT: [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND1]], splat (i64 3) +; VF4-NEXT: [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]] +; VF4-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0 +; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 +; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 +; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 +; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] -; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; VF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 ; VF4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 ; VF4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8 ; VF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8 -; VF4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8 -; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> poison, i64 [[TMP16]], i32 0 -; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1 -; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2 -; VF4-NEXT: [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3 -; VF4-NEXT: [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42) -; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP24]], i32 0 -; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 -; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 -; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 +; VF4-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> poison, i64 [[TMP15]], i32 0 +; VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1 +; VF4-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2 +; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3 +; VF4-NEXT: [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42) +; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP23]], i32 0 +; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 +; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 +; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] -; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] +; VF4-NEXT: store i64 [[TMP24]], ptr [[TMP28]], align 8 ; VF4-NEXT: store i64 [[TMP25]], ptr [[TMP29]], align 8 ; VF4-NEXT: store i64 [[TMP26]], ptr [[TMP30]], align 8 ; VF4-NEXT: store i64 [[TMP27]], ptr [[TMP31]], align 8 -; VF4-NEXT: store i64 [[TMP28]], ptr [[TMP32]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) -; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll index 252f2942452b4..380a6ac860f39 100644 --- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll +++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll @@ -106,18 +106,13 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) { ; FORCED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; FORCED-TF: vector.body: ; FORCED-TF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE11:%.*]] ] -; FORCED-TF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; FORCED-TF-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; FORCED-TF-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; FORCED-TF-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; FORCED-TF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[TMP0]] +; FORCED-TF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[INDEX]] ; FORCED-TF-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]] ; FORCED-TF-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]] ; FORCED-TF-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]] -; FORCED-TF-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; FORCED-TF-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1 -; FORCED-TF-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[NEXT_GEP2]], i32 2 -; FORCED-TF-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 3 ; FORCED-TF-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 ; FORCED-TF-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer ; FORCED-TF-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], @@ -185,18 +180,13 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE11:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[NEXT_GEP2]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], diff --git a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll index 9f3744f4a5150..76a8bba86984c 100644 --- a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll @@ -112,9 +112,10 @@ for.end: ; VF8: vector.body: ; VF8-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ] ; VF8-NEXT: [[OFFSET_IDX:%.+]] = mul i64 [[INDEX]], %step -; VF8-NEXT: [[MUL0:%.+]] = mul i64 0, %step -; VF8-NEXT: [[ADD:%.+]] = add i64 [[OFFSET_IDX]], [[MUL0]] -; VF8: getelementptr inbounds i32, ptr %in, i64 [[ADD]] +; VF8-NEXT: [[MUL1:%.+]] = mul i64 1, %step +; VF8-NEXT: [[ADD1:%.+]] = add i64 [[OFFSET_IDX]], [[MUL1]] +; VF8: getelementptr inbounds i32, ptr %in, i64 [[OFFSET_IDX]] +; VF8: getelementptr inbounds i32, ptr %in, i64 [[ADD1]] ; VF8: middle.block: ; VF1-LABEL: @doit2 diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index 5da6fc3179043..cfeaaf88643d0 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -629,10 +629,7 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF8UF1-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 ; VF8UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF8UF1: [[PRED_STORE_IF]]: -; VF8UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]] -; VF8UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]] -; VF8UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]] -; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] +; VF8UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]] ; VF8UF1-NEXT: store i8 0, ptr [[TMP8]], align 1 ; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE]] ; VF8UF1: [[PRED_STORE_CONTINUE]]: @@ -733,10 +730,7 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF8UF2-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0 ; VF8UF2-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF8UF2: [[PRED_STORE_IF]]: -; VF8UF2-NEXT: [[TMP6:%.*]] = mul i64 0, [[STEP]] -; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 0, [[TMP6]] -; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], [[STEP]] -; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]] +; VF8UF2-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]] ; VF8UF2-NEXT: store i8 0, ptr [[TMP9]], align 1 ; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE]] ; VF8UF2: [[PRED_STORE_CONTINUE]]: @@ -916,10 +910,7 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF16UF1-NEXT: [[TMP4:%.*]] = extractelement <16 x i1> [[TMP3]], i32 0 ; VF16UF1-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF16UF1: [[PRED_STORE_IF]]: -; VF16UF1-NEXT: [[TMP5:%.*]] = mul i64 0, [[STEP]] -; VF16UF1-NEXT: [[TMP6:%.*]] = add i64 0, [[TMP5]] -; VF16UF1-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]] -; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] +; VF16UF1-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STEP]] ; VF16UF1-NEXT: store i8 0, ptr [[TMP8]], align 1 ; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE]] ; VF16UF1: [[PRED_STORE_CONTINUE]]: diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll index 8b9a526899041..c573ebaa51e9f 100644 --- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll +++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll @@ -141,9 +141,8 @@ define void @stride_poison(ptr %dst) mustprogress { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], poison -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], poison ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], poison -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: store i8 0, ptr [[TMP5]], align 1 ; CHECK-NEXT: store i8 0, ptr [[TMP6]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll index 6a6ae316f4e52..6cb76f86aeb5a 100644 --- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll @@ -157,11 +157,10 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]] @@ -235,11 +234,10 @@ define void @test_versioned_with_different_uses(i32 %offset, ptr noalias %dst.1, ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX2]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX2]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX2]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX2]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[OFFSET_IDX2]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index cf85f26992c2f..49b7b84af0edc 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -20,69 +20,69 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ] -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2> -; CHECK-NEXT: EMIT vp<[[STEP1:%.+]]> = extractelement vp<[[STEPS]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEP1]]> -; CHECK-NEXT: EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1> -; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]> -; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1 +; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, pred.store.continue{{.*}} ] +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>, ir<1>, ir<0> +; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]> ; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12> ; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13> ; CHECK-NEXT: EMIT vp<[[OR_CASES:%.+]]> = or vp<[[C1]]>, vp<[[C2]]> ; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK:%.+]]> = not vp<[[OR_CASES]]> -; CHECK-NEXT: Successor(s): pred.store -; CHECK-EMPTY: -; CHECK-NEXT: pred.store: { -; CHECK-NEXT: pred.store.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<[[C2]]> -; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR_VEC]]> -; CHECK-NEXT: Successor(s): pred.store.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): if.then.2.0 -; CHECK-EMPTY: -; CHECK-NEXT: if.then.2.0: -; CHECK-NEXT: Successor(s): pred.store -; CHECK-EMPTY: -; CHECK-NEXT: pred.store: { -; CHECK-NEXT: pred.store.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<[[C1]]> -; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR_VEC]]> -; CHECK-NEXT: Successor(s): pred.store.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): if.then.1.1 -; CHECK-EMPTY: -; CHECK-NEXT: if.then.1.1: -; CHECK-NEXT: Successor(s): pred.store -; CHECK-EMPTY: -; CHECK-NEXT: pred.store: { -; CHECK-NEXT: pred.store.entry: -; CHECK-NEXT: BRANCH-ON-MASK vp<[[DEFAULT_MASK]]> -; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR_VEC]]> -; CHECK-NEXT: Successor(s): pred.store.continue -; CHECK-EMPTY: -; CHECK-NEXT: pred.store.continue: -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): default.2 -; CHECK-EMPTY: -; CHECK-NEXT: default.2: +; CHECK-NEXT: EMIT vp<[[C2_LANE0:%.+]]> = extractelement vp<[[C2]]>, ir<0> +; CHECK-NEXT: EMIT branch-on-cond vp<[[C2_LANE0]]> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: CLONE store ir<0>, vp<[[PTR]]> +; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: EMIT vp<[[C2_LANE1:%.+]]> = extractelement vp<[[C2]]>, ir<1> +; CHECK-NEXT: EMIT branch-on-cond vp<[[C2_LANE1]]> +; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}} +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if{{.*}}: +; CHECK-NEXT: CLONE store ir<0>, vp<[[PTR]]>.1 +; CHECK-NEXT: Successor(s): pred.store.continue{{.*}} +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue{{.*}}: +; CHECK-NEXT: EMIT vp<[[C1_LANE0:%.+]]> = extractelement vp<[[C1]]>, ir<0> +; CHECK-NEXT: EMIT branch-on-cond vp<[[C1_LANE0]]> +; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}} +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if{{.*}}: +; CHECK-NEXT: CLONE store ir<42>, vp<[[PTR]]> +; CHECK-NEXT: Successor(s): pred.store.continue{{.*}} +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue{{.*}}: +; CHECK-NEXT: EMIT vp<[[C1_LANE1:%.+]]> = extractelement vp<[[C1]]>, ir<1> +; CHECK-NEXT: EMIT branch-on-cond vp<[[C1_LANE1]]> +; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}} +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if{{.*}}: +; CHECK-NEXT: CLONE store ir<42>, vp<[[PTR]]>.1 +; CHECK-NEXT: Successor(s): pred.store.continue{{.*}} +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue{{.*}}: +; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK_LANE0:%.+]]> = extractelement vp<[[DEFAULT_MASK]]>, ir<0> +; CHECK-NEXT: EMIT branch-on-cond vp<[[DEFAULT_MASK_LANE0]]> +; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}} +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if{{.*}}: +; CHECK-NEXT: CLONE store ir<2>, vp<[[PTR]]> +; CHECK-NEXT: Successor(s): pred.store.continue{{.*}} +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue{{.*}}: +; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK_LANE1:%.+]]> = extractelement vp<[[DEFAULT_MASK]]>, ir<1> +; CHECK-NEXT: EMIT branch-on-cond vp<[[DEFAULT_MASK_LANE1]]> +; CHECK-NEXT: Successor(s): pred.store.if{{.*}}, pred.store.continue{{.*}} +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if{{.*}}: +; CHECK-NEXT: CLONE store ir<2>, vp<[[PTR]]>.1 +; CHECK-NEXT: Successor(s): pred.store.continue{{.*}} +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue{{.*}}: ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, ir<2> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: Successor(s): middle.block, vector.body