Skip to content

Commit 8251975

Browse files
committed
LV: refine loop-invariance checks
After auditing LoopVectorize, it was found that it was using LoopInfo::isLoopInvariant in several places, skipping the more powerful SCEV isLoopInvariant check. LoopVectorizationLegality already has a routine called isInvariant, which in turn calls into LoopAccessAnalysis. Fix a deficiency in LAA's routine, and use it more widely in place of LoopInfo::isLoopInvariant to correctly find invariant values while vectorizing. There is additionally the LoopVectorizationCostModel routine shouldConsiderInvariant, which is even more powerful, but is not maximally used: fix this too.
1 parent a177be5 commit 8251975

File tree

8 files changed

+73
-92
lines changed

8 files changed

+73
-92
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2807,8 +2807,8 @@ LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
28072807

28082808
bool LoopAccessInfo::isInvariant(Value *V) const {
28092809
auto *SE = PSE->getSE();
2810-
// TODO: Is this really what we want? Even without FP SCEV, we may want some
2811-
// trivially loop-invariant FP values to be considered invariant.
2810+
if (TheLoop->isLoopInvariant(V))
2811+
return true;
28122812
if (!SE->isSCEVable(V->getType()))
28132813
return false;
28142814
const SCEV *S = SE->getSCEV(V);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1567,7 +1567,7 @@ class LoopVectorizationCostModel {
15671567

15681568
/// Returns true if \p Op should be considered invariant and if it is
15691569
/// trivially hoistable.
1570-
bool shouldConsiderInvariant(Value *Op);
1570+
bool shouldConsiderInvariant(Value *Op) const;
15711571

15721572
/// Return the value of vscale used for tuning the cost model.
15731573
std::optional<unsigned> getVScaleForTuning() const { return VScaleForTuning; }
@@ -1763,8 +1763,7 @@ class LoopVectorizationCostModel {
17631763
/// extracted.
17641764
bool needsExtract(Value *V, ElementCount VF) const {
17651765
Instruction *I = dyn_cast<Instruction>(V);
1766-
if (VF.isScalar() || !I || !TheLoop->contains(I) ||
1767-
TheLoop->isLoopInvariant(I) ||
1766+
if (VF.isScalar() || !I || shouldConsiderInvariant(I) ||
17681767
getWideningDecision(I, VF) == CM_Scalarize)
17691768
return false;
17701769

@@ -3118,7 +3117,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
31183117
// A helper that returns true if the given value is a getelementptr
31193118
// instruction contained in the loop.
31203119
auto IsLoopVaryingGEP = [&](Value *V) {
3121-
return isa<GetElementPtrInst>(V) && !TheLoop->isLoopInvariant(V);
3120+
return isa<GetElementPtrInst>(V) && !shouldConsiderInvariant(V);
31223121
};
31233122

31243123
// A helper that evaluates a memory access's use of a pointer. If the use will
@@ -3346,14 +3345,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
33463345
// is correct. The easiest form of the later is to require that all values
33473346
// stored are the same.
33483347
return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
3349-
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
3348+
Legal->isInvariant(cast<StoreInst>(I)->getValueOperand()));
33503349
}
33513350
case Instruction::UDiv:
33523351
case Instruction::SDiv:
33533352
case Instruction::SRem:
33543353
case Instruction::URem:
33553354
// If the divisor is loop-invariant no predication is needed.
3356-
return !TheLoop->isLoopInvariant(I->getOperand(1));
3355+
return !Legal->isInvariant(I->getOperand(1));
33573356
}
33583357
}
33593358

@@ -3410,7 +3409,7 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
34103409
Value *Op2 = I->getOperand(1);
34113410
auto Op2Info = TTI.getOperandInfo(Op2);
34123411
if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
3413-
Legal->isInvariant(Op2))
3412+
shouldConsiderInvariant(Op2))
34143413
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
34153414

34163415
SmallVector<const Value *, 4> Operands(I->operand_values());
@@ -3600,7 +3599,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
36003599
// assuming aliasing and ordering which have already been checked.
36013600
return true;
36023601
// Storing the same value on every iteration.
3603-
return TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand());
3602+
return Legal->isInvariant(cast<StoreInst>(I)->getValueOperand());
36043603
};
36053604

36063605
auto IsUniformDecision = [&](Instruction *I, ElementCount VF) {
@@ -5630,12 +5629,10 @@ static const SCEV *getAddressAccessSCEV(
56305629

56315630
// We are looking for a gep with all loop invariant indices except for one
56325631
// which should be an induction variable.
5633-
auto *SE = PSE.getSE();
56345632
unsigned NumOperands = Gep->getNumOperands();
56355633
for (unsigned Idx = 1; Idx < NumOperands; ++Idx) {
56365634
Value *Opd = Gep->getOperand(Idx);
5637-
if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
5638-
!Legal->isInductionVariable(Opd))
5635+
if (!Legal->isInvariant(Opd) && !Legal->isInductionVariable(Opd))
56395636
return nullptr;
56405637
}
56415638

@@ -5747,9 +5744,8 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
57475744
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy, {},
57485745
CostKind);
57495746
}
5750-
StoreInst *SI = cast<StoreInst>(I);
57515747

5752-
bool IsLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
5748+
bool IsLoopInvariantStoreValue = shouldConsiderInvariant(I);
57535749
return TTI.getAddressComputationCost(ValTy) +
57545750
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
57555751
CostKind) +
@@ -5900,7 +5896,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
59005896
match(Op0, m_ZExtOrSExt(m_Value())) &&
59015897
Op0->getOpcode() == Op1->getOpcode() &&
59025898
Op0->getOperand(0)->getType() == Op1->getOperand(0)->getType() &&
5903-
!TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1) &&
5899+
!shouldConsiderInvariant(Op0) && !shouldConsiderInvariant(Op1) &&
59045900
(Op0->getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
59055901

59065902
// Matched reduce.add(ext(mul(ext(A), ext(B)))
@@ -5927,7 +5923,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
59275923
RedCost < ExtCost * 2 + MulCost + Ext2Cost + BaseCost)
59285924
return I == RetI ? RedCost : 0;
59295925
} else if (RedOp && match(RedOp, m_ZExtOrSExt(m_Value())) &&
5930-
!TheLoop->isLoopInvariant(RedOp)) {
5926+
!shouldConsiderInvariant(RedOp)) {
59315927
// Matched reduce(ext(A))
59325928
bool IsUnsigned = isa<ZExtInst>(RedOp);
59335929
auto *ExtType = VectorType::get(RedOp->getOperand(0)->getType(), VectorTy);
@@ -5943,8 +5939,8 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
59435939
} else if (RedOp && RdxDesc.getOpcode() == Instruction::Add &&
59445940
match(RedOp, m_Mul(m_Instruction(Op0), m_Instruction(Op1)))) {
59455941
if (match(Op0, m_ZExtOrSExt(m_Value())) &&
5946-
Op0->getOpcode() == Op1->getOpcode() &&
5947-
!TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1)) {
5942+
Op0->getOpcode() == Op1->getOpcode() && !shouldConsiderInvariant(Op0) &&
5943+
!shouldConsiderInvariant(Op1)) {
59485944
bool IsUnsigned = isa<ZExtInst>(Op0);
59495945
Type *Op0Ty = Op0->getOperand(0)->getType();
59505946
Type *Op1Ty = Op1->getOperand(0)->getType();
@@ -6097,8 +6093,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
60976093

60986094
// A uniform store isn't neccessarily uniform-by-part
60996095
// and we can't assume scalarization.
6100-
auto &SI = cast<StoreInst>(I);
6101-
return TheLoop->isLoopInvariant(SI.getValueOperand());
6096+
return shouldConsiderInvariant(&I);
61026097
};
61036098

61046099
const InstructionCost GatherScatterCost =
@@ -6331,8 +6326,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
63316326
case VFParamKind::OMP_Uniform: {
63326327
Value *ScalarParam = CI->getArgOperand(Param.ParamPos);
63336328
// Make sure the scalar parameter in the loop is invariant.
6334-
if (!PSE.getSE()->isLoopInvariant(PSE.getSCEV(ScalarParam),
6335-
TheLoop))
6329+
if (!Legal->isInvariant(ScalarParam))
63366330
ParamsOk = false;
63376331
break;
63386332
}
@@ -6405,7 +6399,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
64056399
}
64066400
}
64076401

6408-
bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
6402+
bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) const {
64096403
if (!Legal->isInvariant(Op))
64106404
return false;
64116405
// Consider Op invariant, if it or its operands aren't predicated
@@ -6441,7 +6435,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
64416435
Type *RetTy = I->getType();
64426436
if (canTruncateToMinimalBitwidth(I, VF))
64436437
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
6444-
auto *SE = PSE.getSE();
64456438

64466439
auto HasSingleCopyAfterVectorization = [this](Instruction *I,
64476440
ElementCount VF) -> bool {
@@ -6687,8 +6680,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
66876680
}
66886681
case Instruction::Select: {
66896682
SelectInst *SI = cast<SelectInst>(I);
6690-
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
6691-
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
6683+
bool ScalarCond = shouldConsiderInvariant(SI->getCondition());
66926684

66936685
const Value *Op0, *Op1;
66946686
using namespace llvm::PatternMatch;

llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll

Lines changed: 32 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -7,60 +7,45 @@ define void @test(ptr %p, i64 %a, i8 %b) {
77
; CHECK-NEXT: entry:
88
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
99
; CHECK: vector.ph:
10-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0
11-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
12-
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], splat (i64 48)
13-
; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], splat (i64 52)
14-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
15-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
16-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
17-
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT2]] to <4 x i32>
10+
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
11+
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP12]], 2
12+
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1
13+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 3, [[TMP2]]
14+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
15+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
16+
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
17+
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 2
18+
; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[A]], 48
19+
; CHECK-NEXT: [[TMP6:%.*]] = ashr i64 [[TMP5]], 52
20+
; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
21+
; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[B]] to i32
22+
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
23+
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], splat (i32 1)
24+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP10]]
25+
; CHECK-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP4]]
26+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP11]], i64 0
27+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1828
; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
1929
; CHECK: vector.body:
20-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
21-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
30+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY1]] ]
31+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY1]] ]
2232
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
23-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3)
24-
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], splat (i32 2)
25-
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
26-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
27-
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
28-
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
29-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
30-
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
31-
; CHECK: pred.store.if:
32-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
33-
; CHECK-NEXT: store i8 [[TMP10]], ptr [[P]], align 1
34-
; CHECK-NEXT: br label [[VECTOR_BODY]]
35-
; CHECK: pred.store.continue:
36-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
37-
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
38-
; CHECK: pred.store.if3:
39-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
40-
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
41-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
42-
; CHECK: pred.store.continue4:
43-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
44-
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
45-
; CHECK: pred.store.if5:
46-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
47-
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
48-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
49-
; CHECK: pred.store.continue6:
50-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
51-
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
52-
; CHECK: pred.store.if7:
53-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
33+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP0]], i32 3)
34+
; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 2 x i32> [[VEC_IND]], splat (i32 2)
35+
; CHECK-NEXT: [[TMP14:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP13]], <vscale x 2 x i1> zeroinitializer
36+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[TMP14]], i32 0
37+
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP15]], i32 [[TMP8]], i32 [[TMP7]]
38+
; CHECK-NEXT: [[TMP17:%.*]] = shl i32 [[PREDPHI]], 8
39+
; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP17]] to i8
5440
; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
55-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
56-
; CHECK: pred.store.continue8:
57-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
58-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
59-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
41+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
42+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT]]
43+
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
44+
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
6045
; CHECK: middle.block:
6146
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
6247
; CHECK: scalar.ph:
63-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
48+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
6449
; CHECK-NEXT: br label [[FOR_COND:%.*]]
6550
; CHECK: for.cond:
6651
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ]

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,12 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
2727
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]]
2828
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0
2929
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr align 1 [[TMP6]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
30-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i16> @llvm.vp.zext.nxv1i16.nxv1i8(<vscale x 1 x i8> [[VP_OP_LOAD]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
31-
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> zeroinitializer, <vscale x 1 x i16> [[TMP7]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
32-
; CHECK-NEXT: [[VP_OP1:%.*]] = call <vscale x 1 x i16> @llvm.vp.lshr.nxv1i16(<vscale x 1 x i16> [[VP_OP]], <vscale x 1 x i16> trunc (<vscale x 1 x i32> splat (i32 1) to <vscale x 1 x i16>), <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
33-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 1 x i8> @llvm.vp.trunc.nxv1i8.nxv1i16(<vscale x 1 x i16> [[VP_OP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
34-
; CHECK-NEXT: call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> [[TMP8]], <vscale x 1 x ptr> align 1 zeroinitializer, <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
30+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 1 x i8> [[VP_OP_LOAD]], i32 0
31+
; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32
32+
; CHECK-NEXT: [[TMP12:%.*]] = mul i32 0, [[TMP8]]
33+
; CHECK-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 1
34+
; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8
35+
; CHECK-NEXT: store i8 [[TMP14]], ptr null, align 1
3536
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64
3637
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
3738
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]

llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,16 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count)
4545
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
4646
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]]
4747
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i32.nxv8i8(<vscale x 8 x i8> [[VP_OP_LOAD]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
48-
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
48+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <vscale x 8 x i32> [[TMP15]], i32 0
49+
; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP19]], 0
4950
; CHECK-NEXT: [[VP_OP2:%.*]] = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> [[TMP15]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
5051
; CHECK-NEXT: [[VP_OP3:%.*]] = call <vscale x 8 x i32> @llvm.vp.or.nxv8i32(<vscale x 8 x i32> [[VP_OP2]], <vscale x 8 x i32> zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
5152
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <vscale x 8 x i32> [[TMP15]], zeroinitializer
5253
; CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i32> [[VP_OP3]], <vscale x 8 x i32> zeroinitializer, i32 [[TMP11]])
5354
; CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 8 x i8> @llvm.vp.trunc.nxv8i8.nxv8i32(<vscale x 8 x i32> [[TMP17]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
5455
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP18]], <vscale x 8 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
55-
; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 8 x i16> @llvm.vp.trunc.nxv8i16.nxv8i32(<vscale x 8 x i32> [[VP_OP]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
56-
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> [[TMP19]], <vscale x 8 x ptr> align 2 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]])
56+
; CHECK-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i16
57+
; CHECK-NEXT: store i16 [[TMP24]], ptr null, align 2
5758
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
5859
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
5960
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]

0 commit comments

Comments
 (0)