Skip to content

Commit d76c74d

Browse files
authored
Merge branch 'main' into cov-unreachable
2 parents 95f394f + 8d57211 commit d76c74d

File tree

25 files changed

+1065
-180
lines changed

25 files changed

+1065
-180
lines changed

flang/test/Lower/OpenMP/infinite-loop-in-construct.f90

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
! CHECK: cf.cond_br %{{[0-9]+}}, ^bb1, ^bb2
99
! CHECK-NEXT: ^bb1: // pred: ^bb0
1010
! CHECK: cf.br ^bb2
11-
! CHECK-NEXT: ^bb2: // 3 preds: ^bb0, ^bb1, ^bb2
12-
! CHECK-NEXT: cf.br ^bb2
11+
! CHECK-NEXT: ^bb2: // 2 preds: ^bb0, ^bb1
12+
! CHECK: cf.br ^bb3
13+
! CHECK-NEXT: ^bb3: // 2 preds: ^bb2, ^bb3
14+
! CHECK: cf.br ^bb3
1315
! CHECK-NEXT: }
1416

1517
subroutine sb(ninter, numnod)

libclc/Maintainers.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,14 @@ The following people are the active maintainers for the project. Please reach
1010
out to them for code reviews, questions about their area of expertise, or other
1111
assistance.
1212

13-
Fraser Cormack \
14-
[email protected] (email), [frasercrmck](https://github.com/frasercrmck) (GitHub)
15-
1613
Tom Stellard \
1714
[email protected] (email), [tstellar](https://github.com/tstellar) (GitHub)
15+
16+
## Inactive Maintainers
17+
18+
The following people have graciously spent time performing maintainership
19+
responsibilities but are no longer active in that role. Thank you for all your
20+
help with the success of the project!
21+
22+
Fraser Cormack \
23+
[email protected] (email), [frasercrmck](https://github.com/frasercrmck) (GitHub)

llvm/include/llvm/Support/TrailingObjects.h

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -57,25 +57,9 @@
5757
namespace llvm {
5858

5959
namespace trailing_objects_internal {
60-
/// Helper template to calculate the max alignment requirement for a set of
61-
/// objects.
62-
template <typename First, typename... Rest> class AlignmentCalcHelper {
63-
private:
64-
enum {
65-
FirstAlignment = alignof(First),
66-
RestAlignment = AlignmentCalcHelper<Rest...>::Alignment,
67-
};
6860

69-
public:
70-
enum {
71-
Alignment = FirstAlignment > RestAlignment ? FirstAlignment : RestAlignment
72-
};
73-
};
74-
75-
template <typename First> class AlignmentCalcHelper<First> {
76-
public:
77-
enum { Alignment = alignof(First) };
78-
};
61+
template <typename... T>
62+
inline constexpr size_t MaxAlignment = std::max({alignof(T)...});
7963

8064
/// The base class for TrailingObjects* classes.
8165
class TrailingObjectsBase {
@@ -209,20 +193,19 @@ class alignas(Align) TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy>
209193
/// See the file comment for details on the usage of the
210194
/// TrailingObjects type.
211195
template <typename BaseTy, typename... TrailingTys>
212-
class TrailingObjects : private trailing_objects_internal::TrailingObjectsImpl<
213-
trailing_objects_internal::AlignmentCalcHelper<
214-
TrailingTys...>::Alignment,
215-
BaseTy, TrailingObjects<BaseTy, TrailingTys...>,
216-
BaseTy, TrailingTys...> {
196+
class TrailingObjects
197+
: private trailing_objects_internal::TrailingObjectsImpl<
198+
trailing_objects_internal::MaxAlignment<TrailingTys...>, BaseTy,
199+
TrailingObjects<BaseTy, TrailingTys...>, BaseTy, TrailingTys...> {
217200

218201
template <int A, typename B, typename T, typename P, typename... M>
219202
friend class trailing_objects_internal::TrailingObjectsImpl;
220203

221204
template <typename... Tys> class Foo {};
222205

223206
typedef trailing_objects_internal::TrailingObjectsImpl<
224-
trailing_objects_internal::AlignmentCalcHelper<TrailingTys...>::Alignment,
225-
BaseTy, TrailingObjects<BaseTy, TrailingTys...>, BaseTy, TrailingTys...>
207+
trailing_objects_internal::MaxAlignment<TrailingTys...>, BaseTy,
208+
TrailingObjects<BaseTy, TrailingTys...>, BaseTy, TrailingTys...>
226209
ParentType;
227210
using TrailingObjectsBase = trailing_objects_internal::TrailingObjectsBase;
228211

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 121 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -8086,13 +8086,76 @@ static SDValue getZT0FrameIndex(MachineFrameInfo &MFI,
80868086
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
80878087
}
80888088

8089+
// Emit a call to __arm_sme_save or __arm_sme_restore.
8090+
static SDValue emitSMEStateSaveRestore(const AArch64TargetLowering &TLI,
8091+
SelectionDAG &DAG,
8092+
AArch64FunctionInfo *Info, SDLoc DL,
8093+
SDValue Chain, bool IsSave) {
8094+
MachineFunction &MF = DAG.getMachineFunction();
8095+
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
8096+
FuncInfo->setSMESaveBufferUsed();
8097+
TargetLowering::ArgListTy Args;
8098+
Args.emplace_back(
8099+
DAG.getCopyFromReg(Chain, DL, Info->getSMESaveBufferAddr(), MVT::i64),
8100+
PointerType::getUnqual(*DAG.getContext()));
8101+
8102+
RTLIB::Libcall LC =
8103+
IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE;
8104+
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
8105+
TLI.getPointerTy(DAG.getDataLayout()));
8106+
auto *RetTy = Type::getVoidTy(*DAG.getContext());
8107+
TargetLowering::CallLoweringInfo CLI(DAG);
8108+
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
8109+
TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args));
8110+
return TLI.LowerCallTo(CLI).second;
8111+
}
8112+
8113+
static SDValue emitRestoreZALazySave(SDValue Chain, SDLoc DL,
8114+
const AArch64TargetLowering &TLI,
8115+
const AArch64RegisterInfo &TRI,
8116+
AArch64FunctionInfo &FuncInfo,
8117+
SelectionDAG &DAG) {
8118+
// Conditionally restore the lazy save using a pseudo node.
8119+
RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_RESTORE;
8120+
TPIDR2Object &TPIDR2 = FuncInfo.getTPIDR2Obj();
8121+
SDValue RegMask = DAG.getRegisterMask(TRI.getCallPreservedMask(
8122+
DAG.getMachineFunction(), TLI.getLibcallCallingConv(LC)));
8123+
SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
8124+
TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout()));
8125+
SDValue TPIDR2_EL0 = DAG.getNode(
8126+
ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Chain,
8127+
DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
8128+
// Copy the address of the TPIDR2 block into X0 before 'calling' the
8129+
// RESTORE_ZA pseudo.
8130+
SDValue Glue;
8131+
SDValue TPIDR2Block = DAG.getFrameIndex(
8132+
TPIDR2.FrameIndex,
8133+
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
8134+
Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, TPIDR2Block, Glue);
8135+
Chain =
8136+
DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other,
8137+
{Chain, TPIDR2_EL0, DAG.getRegister(AArch64::X0, MVT::i64),
8138+
RestoreRoutine, RegMask, Chain.getValue(1)});
8139+
// Finally reset the TPIDR2_EL0 register to 0.
8140+
Chain = DAG.getNode(
8141+
ISD::INTRINSIC_VOID, DL, MVT::Other, Chain,
8142+
DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
8143+
DAG.getConstant(0, DL, MVT::i64));
8144+
TPIDR2.Uses++;
8145+
return Chain;
8146+
}
8147+
80898148
SDValue AArch64TargetLowering::lowerEHPadEntry(SDValue Chain, SDLoc const &DL,
80908149
SelectionDAG &DAG) const {
80918150
assert(Chain.getOpcode() == ISD::EntryToken && "Unexpected Chain value");
80928151
SDValue Glue = Chain.getValue(1);
80938152

80948153
MachineFunction &MF = DAG.getMachineFunction();
8095-
SMEAttrs SMEFnAttrs = MF.getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
8154+
auto &FuncInfo = *MF.getInfo<AArch64FunctionInfo>();
8155+
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
8156+
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
8157+
8158+
SMEAttrs SMEFnAttrs = FuncInfo.getSMEFnAttrs();
80968159

80978160
// The following conditions are true on entry to an exception handler:
80988161
// - PSTATE.SM is 0.
@@ -8107,14 +8170,43 @@ SDValue AArch64TargetLowering::lowerEHPadEntry(SDValue Chain, SDLoc const &DL,
81078170
// These mode changes are usually optimized away in catch blocks as they
81088171
// occur before the __cxa_begin_catch (which is a non-streaming function),
81098172
// but are necessary in some cases (such as for cleanups).
8173+
//
8174+
// Additionally, if the function has ZA or ZT0 state, we must restore it.
81108175

8176+
// [COND_]SMSTART SM
81118177
if (SMEFnAttrs.hasStreamingInterfaceOrBody())
8112-
return changeStreamingMode(DAG, DL, /*Enable=*/true, Chain,
8113-
/*Glue*/ Glue, AArch64SME::Always);
8178+
Chain = changeStreamingMode(DAG, DL, /*Enable=*/true, Chain,
8179+
/*Glue*/ Glue, AArch64SME::Always);
8180+
else if (SMEFnAttrs.hasStreamingCompatibleInterface())
8181+
Chain = changeStreamingMode(DAG, DL, /*Enable=*/true, Chain, Glue,
8182+
AArch64SME::IfCallerIsStreaming);
81148183

8115-
if (SMEFnAttrs.hasStreamingCompatibleInterface())
8116-
return changeStreamingMode(DAG, DL, /*Enable=*/true, Chain, Glue,
8117-
AArch64SME::IfCallerIsStreaming);
8184+
if (getTM().useNewSMEABILowering())
8185+
return Chain;
8186+
8187+
if (SMEFnAttrs.hasAgnosticZAInterface()) {
8188+
// Restore full ZA
8189+
Chain = emitSMEStateSaveRestore(*this, DAG, &FuncInfo, DL, Chain,
8190+
/*IsSave=*/false);
8191+
} else if (SMEFnAttrs.hasZAState() || SMEFnAttrs.hasZT0State()) {
8192+
// SMSTART ZA
8193+
Chain = DAG.getNode(
8194+
AArch64ISD::SMSTART, DL, DAG.getVTList(MVT::Other, MVT::Glue), Chain,
8195+
DAG.getTargetConstant(int32_t(AArch64SVCR::SVCRZA), DL, MVT::i32));
8196+
8197+
// Restore ZT0
8198+
if (SMEFnAttrs.hasZT0State()) {
8199+
SDValue ZT0FrameIndex =
8200+
getZT0FrameIndex(MF.getFrameInfo(), FuncInfo, DAG);
8201+
Chain =
8202+
DAG.getNode(AArch64ISD::RESTORE_ZT, DL, DAG.getVTList(MVT::Other),
8203+
{Chain, DAG.getConstant(0, DL, MVT::i32), ZT0FrameIndex});
8204+
}
8205+
8206+
// Restore ZA
8207+
if (SMEFnAttrs.hasZAState())
8208+
Chain = emitRestoreZALazySave(Chain, DL, *this, TRI, FuncInfo, DAG);
8209+
}
81188210

81198211
return Chain;
81208212
}
@@ -9232,30 +9324,6 @@ SDValue AArch64TargetLowering::changeStreamingMode(
92329324
return GetCheckVL(SMChange.getValue(0), SMChange.getValue(1));
92339325
}
92349326

9235-
// Emit a call to __arm_sme_save or __arm_sme_restore.
9236-
static SDValue emitSMEStateSaveRestore(const AArch64TargetLowering &TLI,
9237-
SelectionDAG &DAG,
9238-
AArch64FunctionInfo *Info, SDLoc DL,
9239-
SDValue Chain, bool IsSave) {
9240-
MachineFunction &MF = DAG.getMachineFunction();
9241-
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
9242-
FuncInfo->setSMESaveBufferUsed();
9243-
TargetLowering::ArgListTy Args;
9244-
Args.emplace_back(
9245-
DAG.getCopyFromReg(Chain, DL, Info->getSMESaveBufferAddr(), MVT::i64),
9246-
PointerType::getUnqual(*DAG.getContext()));
9247-
9248-
RTLIB::Libcall LC =
9249-
IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE;
9250-
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
9251-
TLI.getPointerTy(DAG.getDataLayout()));
9252-
auto *RetTy = Type::getVoidTy(*DAG.getContext());
9253-
TargetLowering::CallLoweringInfo CLI(DAG);
9254-
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
9255-
TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args));
9256-
return TLI.LowerCallTo(CLI).second;
9257-
}
9258-
92599327
static AArch64SME::ToggleCondition
92609328
getSMToggleCondition(const SMECallAttrs &CallAttrs) {
92619329
if (!CallAttrs.caller().hasStreamingCompatibleInterface() ||
@@ -10015,33 +10083,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
1001510083
{Result, DAG.getConstant(0, DL, MVT::i32), ZTFrameIdx});
1001610084

1001710085
if (RequiresLazySave) {
10018-
// Conditionally restore the lazy save using a pseudo node.
10019-
RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_RESTORE;
10020-
TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
10021-
SDValue RegMask = DAG.getRegisterMask(
10022-
TRI->getCallPreservedMask(MF, getLibcallCallingConv(LC)));
10023-
SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
10024-
getLibcallName(LC), getPointerTy(DAG.getDataLayout()));
10025-
SDValue TPIDR2_EL0 = DAG.getNode(
10026-
ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result,
10027-
DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
10028-
// Copy the address of the TPIDR2 block into X0 before 'calling' the
10029-
// RESTORE_ZA pseudo.
10030-
SDValue Glue;
10031-
SDValue TPIDR2Block = DAG.getFrameIndex(
10032-
TPIDR2.FrameIndex,
10033-
DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
10034-
Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue);
10035-
Result =
10036-
DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other,
10037-
{Result, TPIDR2_EL0, DAG.getRegister(AArch64::X0, MVT::i64),
10038-
RestoreRoutine, RegMask, Result.getValue(1)});
10039-
// Finally reset the TPIDR2_EL0 register to 0.
10040-
Result = DAG.getNode(
10041-
ISD::INTRINSIC_VOID, DL, MVT::Other, Result,
10042-
DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32),
10043-
DAG.getConstant(0, DL, MVT::i64));
10044-
TPIDR2.Uses++;
10086+
Result = emitRestoreZALazySave(Result, DL, *this, *TRI, *FuncInfo, DAG);
1004510087
} else if (RequiresSaveAllZA) {
1004610088
Result = emitSMEStateSaveRestore(*this, DAG, FuncInfo, DL, Result,
1004710089
/*IsSave=*/false);
@@ -11736,6 +11778,28 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
1173611778
return DAG.getNode(ISD::AND, DL, VT, LHS, Shift);
1173711779
}
1173811780

11781+
// Check for sign bit test patterns that can use TST optimization.
11782+
// (SELECT_CC setlt, sign_extend_inreg, 0, tval, fval)
11783+
// -> TST %operand, sign_bit; CSEL
11784+
// (SELECT_CC setlt, sign_extend, 0, tval, fval)
11785+
// -> TST %operand, sign_bit; CSEL
11786+
if (CC == ISD::SETLT && RHSC && RHSC->isZero() && LHS.hasOneUse() &&
11787+
(LHS.getOpcode() == ISD::SIGN_EXTEND_INREG ||
11788+
LHS.getOpcode() == ISD::SIGN_EXTEND)) {
11789+
11790+
uint64_t SignBitPos;
11791+
std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
11792+
EVT TestVT = LHS.getValueType();
11793+
SDValue SignBitConst = DAG.getConstant(1ULL << SignBitPos, DL, TestVT);
11794+
SDValue TST =
11795+
DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(TestVT, MVT::i32),
11796+
LHS, SignBitConst);
11797+
11798+
SDValue Flags = TST.getValue(1);
11799+
return DAG.getNode(AArch64ISD::CSEL, DL, TVal.getValueType(), TVal, FVal,
11800+
DAG.getConstant(AArch64CC::NE, DL, MVT::i32), Flags);
11801+
}
11802+
1173911803
// Canonicalise absolute difference patterns:
1174011804
// select_cc lhs, rhs, sub(lhs, rhs), sub(rhs, lhs), cc ->
1174111805
// select_cc lhs, rhs, sub(lhs, rhs), neg(sub(lhs, rhs)), cc

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9559,3 +9559,20 @@ bool LoongArchTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
95599559
EVT ScalarVT = VecVT.getScalarType();
95609560
return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
95619561
}
9562+
9563+
bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
9564+
unsigned Index) const {
9565+
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
9566+
return false;
9567+
9568+
// Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9569+
return Index == 0;
9570+
}
9571+
9572+
bool LoongArchTargetLowering::isExtractVecEltCheap(EVT VT,
9573+
unsigned Index) const {
9574+
EVT EltVT = VT.getScalarType();
9575+
9576+
// Extract a scalar FP value from index 0 of a vector is free.
9577+
return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9578+
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,9 @@ class LoongArchTargetLowering : public TargetLowering {
338338
unsigned Depth) const override;
339339

340340
bool shouldScalarizeBinop(SDValue VecOp) const override;
341+
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
342+
unsigned Index) const override;
343+
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override;
341344

342345
/// Check if a constant splat can be generated using [x]vldi, where imm[12]
343346
/// is 1.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3902,7 +3902,8 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
39023902
if (VF.isScalar())
39033903
continue;
39043904

3905-
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind);
3905+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
3906+
*CM.PSE.getSE());
39063907
precomputeCosts(*Plan, VF, CostCtx);
39073908
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
39083909
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
@@ -4159,7 +4160,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
41594160

41604161
// Add on other costs that are modelled in VPlan, but not in the legacy
41614162
// cost model.
4162-
VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind);
4163+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind,
4164+
*CM.PSE.getSE());
41634165
VPRegionBlock *VectorRegion = P->getVectorLoopRegion();
41644166
assert(VectorRegion && "Expected to have a vector region!");
41654167
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -6834,7 +6836,7 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
68346836

68356837
InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
68366838
ElementCount VF) const {
6837-
VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind);
6839+
VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE());
68386840
InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);
68396841

68406842
// Now compute and add the VPlan-based cost.
@@ -7067,7 +7069,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
70677069
// simplifications not accounted for in the legacy cost model. If that's the
70687070
// case, don't trigger the assertion, as the extra simplifications may cause a
70697071
// different VF to be picked by the VPlan-based cost model.
7070-
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind);
7072+
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
7073+
*CM.PSE.getSE());
70717074
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
70727075
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
70737076
// with early exits and plans with additional VPlan simplifications. The
@@ -8597,7 +8600,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
85978600
// TODO: Enable following transform when the EVL-version of extended-reduction
85988601
// and mulacc-reduction are implemented.
85998602
if (!CM.foldTailWithEVL()) {
8600-
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind);
8603+
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
8604+
*CM.PSE.getSE());
86018605
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
86028606
CostCtx, Range);
86038607
}
@@ -10054,7 +10058,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1005410058
bool ForceVectorization =
1005510059
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
1005610060
VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM,
10057-
CM.CostKind);
10061+
CM.CostKind, *CM.PSE.getSE());
1005810062
if (!ForceVectorization &&
1005910063
!isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
1006010064
LVP.getPlanFor(VF.Width), SEL,

0 commit comments

Comments
 (0)