@@ -3098,61 +3098,6 @@ bool VPReplicateRecipe::shouldPack() const {
3098
3098
});
3099
3099
}
3100
3100
3101
- // / Returns true if \p Ptr is a pointer computation for which the legacy cost
3102
- // / model computes a SCEV expression when computing the address cost.
3103
- static bool shouldUseAddressAccessSCEV (const VPValue *Ptr) {
3104
- auto *PtrR = Ptr->getDefiningRecipe ();
3105
- if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) &&
3106
- cast<VPReplicateRecipe>(PtrR)->getOpcode () ==
3107
- Instruction::GetElementPtr) ||
3108
- isa<VPWidenGEPRecipe>(PtrR)))
3109
- return false ;
3110
-
3111
- // We are looking for a GEP where all indices are either loop invariant or
3112
- // inductions.
3113
- for (VPValue *Opd : drop_begin (PtrR->operands ())) {
3114
- if (!Opd->isDefinedOutsideLoopRegions () &&
3115
- !isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd))
3116
- return false ;
3117
- }
3118
-
3119
- return true ;
3120
- }
3121
-
3122
- // / Returns true if \p V is used as part of the address of another load or
3123
- // / store.
3124
- static bool isUsedByLoadStoreAddress (const VPUser *V) {
3125
- SmallPtrSet<const VPUser *, 4 > Seen;
3126
- SmallVector<const VPUser *> WorkList = {V};
3127
-
3128
- while (!WorkList.empty ()) {
3129
- auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val ());
3130
- if (!Cur || !Seen.insert (Cur).second )
3131
- continue ;
3132
-
3133
- for (VPUser *U : Cur->users ()) {
3134
- if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(U))
3135
- if (InterleaveR->getAddr () == Cur)
3136
- return true ;
3137
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(U)) {
3138
- if (RepR->getOpcode () == Instruction::Load &&
3139
- RepR->getOperand (0 ) == Cur)
3140
- return true ;
3141
- if (RepR->getOpcode () == Instruction::Store &&
3142
- RepR->getOperand (1 ) == Cur)
3143
- return true ;
3144
- }
3145
- if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) {
3146
- if (MemR->getAddr () == Cur && MemR->isConsecutive ())
3147
- return true ;
3148
- }
3149
- }
3150
-
3151
- append_range (WorkList, cast<VPSingleDefRecipe>(Cur)->users ());
3152
- }
3153
- return false ;
3154
- }
3155
-
3156
3101
InstructionCost VPReplicateRecipe::computeCost (ElementCount VF,
3157
3102
VPCostContext &Ctx) const {
3158
3103
Instruction *UI = cast<Instruction>(getUnderlyingValue ());
@@ -3260,58 +3205,21 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
3260
3205
}
3261
3206
case Instruction::Load:
3262
3207
case Instruction::Store: {
3263
- if (VF.isScalable () && !isSingleScalar ())
3264
- return InstructionCost::getInvalid ();
3265
-
3208
+ if (isSingleScalar ()) {
3209
+ bool IsLoad = UI->getOpcode () == Instruction::Load;
3210
+ Type *ValTy = Ctx.Types .inferScalarType (IsLoad ? this : getOperand (0 ));
3211
+ Type *ScalarPtrTy = Ctx.Types .inferScalarType (getOperand (IsLoad ? 0 : 1 ));
3212
+ const Align Alignment = getLoadStoreAlignment (UI);
3213
+ unsigned AS = getLoadStoreAddressSpace (UI);
3214
+ TTI::OperandValueInfo OpInfo = TTI::getOperandInfo (UI->getOperand (0 ));
3215
+ InstructionCost ScalarMemOpCost = Ctx.TTI .getMemoryOpCost (
3216
+ UI->getOpcode (), ValTy, Alignment, AS, Ctx.CostKind , OpInfo, UI);
3217
+ return ScalarMemOpCost + Ctx.TTI .getAddressComputationCost (
3218
+ ScalarPtrTy, nullptr , nullptr , Ctx.CostKind );
3219
+ }
3266
3220
// TODO: See getMemInstScalarizationCost for how to handle replicating and
3267
3221
// predicated cases.
3268
- const VPRegionBlock *ParentRegion = getParent ()->getParent ();
3269
- if (ParentRegion && ParentRegion->isReplicator ())
3270
- break ;
3271
-
3272
- bool IsLoad = UI->getOpcode () == Instruction::Load;
3273
- const VPValue *PtrOp = getOperand (!IsLoad);
3274
- // TODO: Handle cases where we need to pass a SCEV to
3275
- // getAddressComputationCost.
3276
- if (shouldUseAddressAccessSCEV (PtrOp))
3277
- break ;
3278
-
3279
- Type *ValTy = Ctx.Types .inferScalarType (IsLoad ? this : getOperand (0 ));
3280
- Type *ScalarPtrTy = Ctx.Types .inferScalarType (PtrOp);
3281
- const Align Alignment = getLoadStoreAlignment (UI);
3282
- unsigned AS = getLoadStoreAddressSpace (UI);
3283
- TTI::OperandValueInfo OpInfo = TTI::getOperandInfo (UI->getOperand (0 ));
3284
- InstructionCost ScalarMemOpCost = Ctx.TTI .getMemoryOpCost (
3285
- UI->getOpcode (), ValTy, Alignment, AS, Ctx.CostKind , OpInfo);
3286
-
3287
- Type *PtrTy = isSingleScalar () ? ScalarPtrTy : toVectorTy (ScalarPtrTy, VF);
3288
-
3289
- InstructionCost ScalarCost =
3290
- ScalarMemOpCost + Ctx.TTI .getAddressComputationCost (
3291
- PtrTy, &Ctx.SE , nullptr , Ctx.CostKind );
3292
- if (isSingleScalar ())
3293
- return ScalarCost;
3294
-
3295
- SmallVector<const VPValue *> OpsToScalarize;
3296
- Type *ResultTy = Type::getVoidTy (PtrTy->getContext ());
3297
- // Set ResultTy and OpsToScalarize, if scalarization is needed. Currently we
3298
- // don't assign scalarization overhead in general, if the target prefers
3299
- // vectorized addressing or the loaded value is used as part of an address
3300
- // of another load or store.
3301
- bool PreferVectorizedAddressing = Ctx.TTI .prefersVectorizedAddressing ();
3302
- if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress (this )) {
3303
- bool EfficientVectorLoadStore =
3304
- Ctx.TTI .supportsEfficientVectorElementLoadStore ();
3305
- if (!(IsLoad && !PreferVectorizedAddressing) &&
3306
- !(!IsLoad && EfficientVectorLoadStore))
3307
- append_range (OpsToScalarize, operands ());
3308
-
3309
- if (!EfficientVectorLoadStore)
3310
- ResultTy = Ctx.Types .inferScalarType (this );
3311
- }
3312
-
3313
- return (ScalarCost * VF.getFixedValue ()) +
3314
- Ctx.getScalarizationOverhead (ResultTy, OpsToScalarize, VF, true );
3222
+ break ;
3315
3223
}
3316
3224
}
3317
3225
0 commit comments