Skip to content

Commit 6718349

Browse files
committed
[VPlan] Fix crash, add additional PredPHI test
1 parent 6a832c5 commit 6718349

File tree

8 files changed

+195
-89
lines changed

8 files changed

+195
-89
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7790,8 +7790,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77907790
OrigLoop->getHeader()->getContext());
77917791
VPlanTransforms::materializeBroadcasts(BestVPlan);
77927792
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
7793-
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType(),
7794-
OrigLoop->getHeader()->getDataLayout());
7793+
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
77957794
VPlanTransforms::narrowInterleaveGroups(
77967795
BestVPlan, BestVF,
77977796
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
@@ -9096,7 +9095,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
90969095
if (!HasScalarVF)
90979096
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
90989097
*Plan, CM.getMinimalBitwidths());
9099-
VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
9098+
VPlanTransforms::optimize(*Plan);
91009099
// TODO: try to put it close to addActiveLaneMask().
91019100
// Discard the plan if it is not EVL-compatible
91029101
if (CM.foldTailWithEVL() && !HasScalarVF &&

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1542,7 +1542,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
15421542
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
15431543
VFRange SubRange = {VF, MaxVFTimes2};
15441544
if (auto Plan = tryToBuildVPlan(SubRange)) {
1545-
VPlanTransforms::optimize(*Plan, OrigLoop->getHeader()->getDataLayout());
1545+
VPlanTransforms::optimize(*Plan);
15461546
// Update the name of the latch of the top-level vector loop region region
15471547
// after optimizations which includes block folding.
15481548
Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2755,8 +2755,15 @@ InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,
27552755

27562756
void VPPredInstPHIRecipe::execute(VPTransformState &State) {
27572757
assert(State.Lane && "Predicated instruction PHI works per instance.");
2758-
Instruction *ScalarPredInst =
2759-
cast<Instruction>(State.get(getOperand(0), *State.Lane));
2758+
Value *ScalarPred = State.get(getOperand(0), *State.Lane);
2759+
Instruction *ScalarPredInst = dyn_cast<Instruction>(ScalarPred);
2760+
if (!ScalarPredInst) {
2761+
if (State.hasScalarValue(this, *State.Lane))
2762+
State.reset(this, ScalarPred, *State.Lane);
2763+
else
2764+
State.set(this, ScalarPred, *State.Lane);
2765+
return;
2766+
}
27602767
BasicBlock *PredicatedBB = ScalarPredInst->getParent();
27612768
BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
27622769
assert(PredicatingBB && "Predicated block has no single predecessor.");

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 61 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -940,79 +940,72 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
940940
}
941941
}
942942

943-
class VPConstantFolder {
944-
TargetFolder Folder;
945-
VPTypeAnalysis TypeInfo;
946-
947-
public:
948-
VPConstantFolder(const DataLayout &DL, const VPTypeAnalysis &TypeInfo)
949-
: Folder(DL), TypeInfo(TypeInfo) {}
950-
951-
Value *tryToConstantFold(VPRecipeBase &R, unsigned Opcode,
952-
ArrayRef<VPValue *> Operands) {
953-
SmallVector<Value *, 4> Ops;
954-
for (VPValue *Op : Operands) {
955-
if (!Op->isLiveIn() || !Op->getLiveInIRValue())
956-
return nullptr;
957-
Ops.emplace_back(Op->getLiveInIRValue());
958-
}
959-
switch (Opcode) {
960-
case Instruction::BinaryOps::Add:
961-
case Instruction::BinaryOps::Sub:
962-
case Instruction::BinaryOps::Mul:
963-
case Instruction::BinaryOps::AShr:
964-
case Instruction::BinaryOps::LShr:
965-
case Instruction::BinaryOps::And:
966-
case Instruction::BinaryOps::Or:
967-
case Instruction::BinaryOps::Xor:
968-
return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode),
969-
Ops[0], Ops[1]);
970-
case VPInstruction::LogicalAnd:
971-
return Folder.FoldSelect(Ops[0], Ops[1],
972-
ConstantInt::getNullValue(Ops[1]->getType()));
973-
case VPInstruction::Not:
974-
return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
975-
Constant::getAllOnesValue(Ops[0]->getType()));
976-
case Instruction::Select:
977-
return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
978-
case Instruction::ICmp:
979-
case Instruction::FCmp:
980-
return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
943+
/// Try to fold \p R using TargetFolder to a constant. Will succeed for a
944+
/// handled \p Opcode if all \p Operands are constant.
945+
static Value *tryToConstantFold(const VPRecipeBase &R, unsigned Opcode,
946+
ArrayRef<VPValue *> Operands,
947+
const DataLayout &DL,
948+
VPTypeAnalysis &TypeInfo) {
949+
SmallVector<Value *, 4> Ops;
950+
for (VPValue *Op : Operands) {
951+
if (!Op->isLiveIn() || !Op->getLiveInIRValue())
952+
return nullptr;
953+
Ops.push_back(Op->getLiveInIRValue());
954+
}
955+
956+
TargetFolder Folder(DL);
957+
if (Instruction::isBinaryOp(Opcode))
958+
return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
981959
Ops[1]);
982-
case Instruction::GetElementPtr:
983-
case VPInstruction::PtrAdd:
984-
return Folder.FoldGEP(TypeInfo.inferScalarType(R.getVPSingleValue()),
985-
Ops[0], drop_begin(Ops),
986-
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
987-
case Instruction::InsertElement:
988-
return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
989-
case Instruction::ExtractElement:
990-
return Folder.FoldExtractElement(Ops[0], Ops[1]);
991-
case Instruction::CastOps::SExt:
992-
case Instruction::CastOps::ZExt:
993-
case Instruction::CastOps::Trunc:
994-
return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
995-
TypeInfo.inferScalarType(R.getVPSingleValue()));
996-
}
997-
return nullptr;
960+
if (Instruction::isCast(Opcode))
961+
return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
962+
TypeInfo.inferScalarType(R.getVPSingleValue()));
963+
switch (Opcode) {
964+
case VPInstruction::LogicalAnd:
965+
return Folder.FoldSelect(Ops[0], Ops[1],
966+
ConstantInt::getNullValue(Ops[1]->getType()));
967+
case VPInstruction::Not:
968+
return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
969+
Constant::getAllOnesValue(Ops[0]->getType()));
970+
case Instruction::Select:
971+
return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
972+
case Instruction::ICmp:
973+
case Instruction::FCmp:
974+
return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
975+
Ops[1]);
976+
case Instruction::GetElementPtr: {
977+
auto &RFlags = cast<VPRecipeWithIRFlags>(R);
978+
auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
979+
return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops),
980+
RFlags.getGEPNoWrapFlags());
981+
}
982+
case VPInstruction::PtrAdd:
983+
return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
984+
Ops[1],
985+
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
986+
case Instruction::InsertElement:
987+
return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
988+
case Instruction::ExtractElement:
989+
return Folder.FoldExtractElement(Ops[0], Ops[1]);
998990
}
999-
};
991+
return nullptr;
992+
}
1000993

1001994
/// Try to simplify recipe \p R.
1002-
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
1003-
const DataLayout &DL) {
995+
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1004996
using namespace llvm::VPlanPatternMatch;
1005997

1006998
// Constant folding.
1007-
VPConstantFolder Folder(DL, TypeInfo);
1008999
if (TypeSwitch<VPRecipeBase *, bool>(&R)
10091000
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
10101001
VPReplicateRecipe>([&](auto *I) {
10111002
VPlan *Plan = R.getParent()->getPlan();
1012-
Value *V =
1013-
Folder.tryToConstantFold(R, I->getOpcode(), I->operands());
1003+
const DataLayout &DL =
1004+
Plan->getScalarHeader()->getIRBasicBlock()->getDataLayout();
1005+
Value *V = tryToConstantFold(*I, I->getOpcode(), I->operands(), DL,
1006+
TypeInfo);
10141007
if (V)
1015-
R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
1008+
I->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
10161009
return V;
10171010
})
10181011
.Default([](auto *) { return false; }))
@@ -1150,14 +1143,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
11501143
}
11511144
}
11521145

1153-
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
1154-
const DataLayout &DL) {
1146+
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
11551147
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
11561148
Plan.getEntry());
11571149
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
11581150
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
11591151
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1160-
simplifyRecipe(R, TypeInfo, DL);
1152+
simplifyRecipe(R, TypeInfo);
11611153
}
11621154
}
11631155
}
@@ -1418,8 +1410,7 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
14181410

14191411
VPBlockUtils::connectBlocks(Preheader, Header);
14201412
VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
1421-
VPlanTransforms::simplifyRecipes(Plan, *CanIVTy,
1422-
PSE.getSE()->getDataLayout());
1413+
VPlanTransforms::simplifyRecipes(Plan, *CanIVTy);
14231414
} else {
14241415
// The vector region contains header phis for which we cannot remove the
14251416
// loop region yet.
@@ -1849,16 +1840,17 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
18491840
VPBB->back().eraseFromParent();
18501841
}
18511842
}
1852-
void VPlanTransforms::optimize(VPlan &Plan, const DataLayout &DL) {
1843+
1844+
void VPlanTransforms::optimize(VPlan &Plan) {
18531845
runPass(removeRedundantCanonicalIVs, Plan);
18541846
runPass(removeRedundantInductionCasts, Plan);
18551847

1856-
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
1848+
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
18571849
runPass(simplifyBlends, Plan);
18581850
runPass(removeDeadRecipes, Plan);
18591851
runPass(legalizeAndOptimizeInductions, Plan);
18601852
runPass(removeRedundantExpandSCEVRecipes, Plan);
1861-
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType(), DL);
1853+
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
18621854
runPass(removeBranchOnCondTrue, Plan);
18631855
runPass(removeDeadRecipes, Plan);
18641856

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ struct VPlanTransforms {
109109
/// Apply VPlan-to-VPlan optimizations to \p Plan, including induction recipe
110110
/// optimizations, dead recipe removal, replicate region optimizations and
111111
/// block merging.
112-
static void optimize(VPlan &Plan, const DataLayout &DL);
112+
static void optimize(VPlan &Plan);
113113

114114
/// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
115115
/// region block and remove the mask operand. Optimize the created regions by
@@ -190,8 +190,7 @@ struct VPlanTransforms {
190190

191191
/// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
192192
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
193-
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy,
194-
const DataLayout &DL);
193+
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
195194

196195
/// If there's a single exit block, optimize its phi recipes that use exiting
197196
/// IV values by feeding them precomputed end values instead, possibly taken

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) {
1313
; VF2: [[VECTOR_PH]]:
1414
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
1515
; VF2: [[VECTOR_BODY]]:
16-
; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 0, 1
17-
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
16+
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 0
1817
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
1918
; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
2019
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>

llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll

Lines changed: 85 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -673,29 +673,24 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
673673
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
674674
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]]
675675
; CHECK: [[PRED_SREM_IF]]:
676-
; CHECK-NEXT: [[TMP3:%.*]] = srem i64 3, 0
677676
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]]
678677
; CHECK: [[PRED_SREM_CONTINUE]]:
679-
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_SREM_IF]] ]
680678
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
681679
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]]
682680
; CHECK: [[PRED_SREM_IF1]]:
683-
; CHECK-NEXT: [[TMP6:%.*]] = srem i64 3, 0
684681
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]]
685682
; CHECK: [[PRED_SREM_CONTINUE2]]:
686683
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
687684
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]]
688685
; CHECK: [[PRED_SREM_IF3]]:
689-
; CHECK-NEXT: [[TMP8:%.*]] = srem i64 3, 0
690686
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]]
691687
; CHECK: [[PRED_SREM_CONTINUE4]]:
692688
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
693689
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]]
694690
; CHECK: [[PRED_SREM_IF5]]:
695-
; CHECK-NEXT: [[TMP10:%.*]] = srem i64 3, 0
696691
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]]
697692
; CHECK: [[PRED_SREM_CONTINUE6]]:
698-
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP4]], -3
693+
; CHECK-NEXT: [[TMP11:%.*]] = add i64 poison, -3
699694
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
700695
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP12]]
701696
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
@@ -738,6 +733,89 @@ exit:
738733
ret void
739734
}
740735

736+
; Variation of the above test with the poison value being used in all lanes.
737+
define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst, ptr noalias %aux) {
738+
; CHECK-LABEL: define void @recipe_without_underlying_instr_lanes_used(
739+
; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[DST:%.*]], ptr noalias [[AUX:%.*]]) {
740+
; CHECK-NEXT: [[ENTRY:.*:]]
741+
; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
742+
; CHECK: [[VECTOR_PH]]:
743+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
744+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
745+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
746+
; CHECK: [[VECTOR_BODY]]:
747+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
748+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
749+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
750+
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
751+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
752+
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
753+
; CHECK: [[PRED_STORE_IF]]:
754+
; CHECK-NEXT: store i64 poison, ptr [[AUX]], align 8
755+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
756+
; CHECK: [[PRED_STORE_CONTINUE]]:
757+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
758+
; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
759+
; CHECK: [[PRED_STORE_IF1]]:
760+
; CHECK-NEXT: store i64 poison, ptr [[AUX]], align 8
761+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
762+
; CHECK: [[PRED_STORE_CONTINUE2]]:
763+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
764+
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
765+
; CHECK: [[PRED_STORE_IF3]]:
766+
; CHECK-NEXT: store i64 poison, ptr [[AUX]], align 8
767+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
768+
; CHECK: [[PRED_STORE_CONTINUE4]]:
769+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
770+
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
771+
; CHECK: [[PRED_STORE_IF5]]:
772+
; CHECK-NEXT: store i64 poison, ptr [[AUX]], align 8
773+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
774+
; CHECK: [[PRED_STORE_CONTINUE6]]:
775+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 poison, -3
776+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], [[TMP6]]
777+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP7]]
778+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
779+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
780+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]]
781+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
782+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0
783+
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP11]], align 4
784+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
785+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
786+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
787+
; CHECK: [[MIDDLE_BLOCK]]:
788+
;
789+
790+
entry:
791+
br label %loop.header
792+
793+
loop.header:
794+
%iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ]
795+
%cmp = icmp eq i64 %iv, %n
796+
br i1 %cmp, label %loop.latch, label %then
797+
798+
then:
799+
%rem = srem i64 3, 0
800+
store i64 %rem, ptr %aux
801+
%add3 = add i64 %rem, -3
802+
%add5 = add i64 %iv, %add3
803+
%gep = getelementptr [5 x i8], ptr @c, i64 0, i64 %add5
804+
%l = load i8, ptr %gep, align 1
805+
br label %loop.latch
806+
807+
loop.latch:
808+
%sr = phi i8 [ 0, %loop.header ], [ %l , %then ]
809+
%gep.dst = getelementptr i8, ptr %dst, i64 %iv
810+
store i8 %sr, ptr %gep.dst, align 4
811+
%inc = add i64 %iv, 1
812+
%exitcond.not = icmp eq i64 %inc, 4
813+
br i1 %exitcond.not, label %exit, label %loop.header
814+
815+
exit:
816+
ret void
817+
}
818+
741819
; %B.gep.0 and pointers based on it can preserve inbounds, as the inbounds
742820
; versionused unconditionally in the store in the latch.
743821
; FIXME: at the moment, inbounds is dropped from both the GEP feeding the vector load ans tore
@@ -763,7 +841,7 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read
763841
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4
764842
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
765843
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
766-
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
844+
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
767845
; CHECK: [[MIDDLE_BLOCK]]:
768846
;
769847

0 commit comments

Comments
 (0)