@@ -2861,10 +2861,10 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
28612861 return ScalarCallCost;
28622862}
28632863
2864- static Type *maybeVectorizeType (Type *Elt , ElementCount VF) {
2865- if (VF.isScalar () || (!Elt-> isIntOrPtrTy () && !Elt-> isFloatingPointTy () ))
2866- return Elt ;
2867- return VectorType::get (Elt , VF);
2864+ static Type *maybeVectorizeType (Type *Ty , ElementCount VF) {
2865+ if (VF.isScalar () || ! canWidenType (Ty ))
2866+ return Ty ;
2867+ return ToWideTy (Ty , VF);
28682868}
28692869
28702870InstructionCost
@@ -3635,9 +3635,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
36353635
36363636 // ExtractValue instructions must be uniform, because the operands are
36373637 // known to be loop-invariant.
3638- if (auto *EVI = dyn_cast<ExtractValueInst>(&I)) {
3639- assert (IsOutOfScope (EVI->getAggregateOperand ()) &&
3640- " Expected aggregate value to be loop invariant" );
3638+ if (auto *EVI = dyn_cast<ExtractValueInst>(&I);
3639+ EVI && IsOutOfScope (EVI->getAggregateOperand ())) {
36413640 AddToWorklistIfAllowed (EVI);
36423641 continue ;
36433642 }
@@ -5461,10 +5460,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
54615460 // and phi nodes.
54625461 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
54635462 if (isScalarWithPredication (I, VF) && !I->getType ()->isVoidTy ()) {
5464- ScalarCost += TTI.getScalarizationOverhead (
5465- cast<VectorType>(ToVectorTy (I->getType (), VF)),
5466- APInt::getAllOnes (VF.getFixedValue ()), /* Insert*/ true ,
5467- /* Extract*/ false , CostKind);
5463+ Type *WideTy = ToWideTy (I->getType (), VF);
5464+ for (Type *VectorTy : getContainedTypes (WideTy)) {
5465+ ScalarCost += TTI.getScalarizationOverhead (
5466+ cast<VectorType>(VectorTy), APInt::getAllOnes (VF.getFixedValue ()),
5467+ /* Insert*/ true ,
5468+ /* Extract*/ false , CostKind);
5469+ }
54685470 ScalarCost +=
54695471 VF.getFixedValue () * TTI.getCFInstrCost (Instruction::PHI, CostKind);
54705472 }
@@ -5953,13 +5955,17 @@ InstructionCost LoopVectorizationCostModel::getScalarizationOverhead(
59535955 return 0 ;
59545956
59555957 InstructionCost Cost = 0 ;
5956- Type *RetTy = ToVectorTy (I->getType (), VF);
5958+ Type *RetTy = ToWideTy (I->getType (), VF);
59575959 if (!RetTy->isVoidTy () &&
5958- (!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore ()))
5959- Cost += TTI.getScalarizationOverhead (
5960- cast<VectorType>(RetTy), APInt::getAllOnes (VF.getKnownMinValue ()),
5961- /* Insert*/ true ,
5962- /* Extract*/ false , CostKind);
5960+ (!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore ())) {
5961+
5962+ for (Type *VectorTy : getContainedTypes (RetTy)) {
5963+ Cost += TTI.getScalarizationOverhead (
5964+ cast<VectorType>(VectorTy), APInt::getAllOnes (VF.getKnownMinValue ()),
5965+ /* Insert*/ true ,
5966+ /* Extract*/ false , CostKind);
5967+ }
5968+ }
59635969
59645970 // Some targets keep addresses scalar.
59655971 if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing ())
@@ -6219,9 +6225,9 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
62196225
62206226 bool MaskRequired = Legal->isMaskRequired (CI);
62216227 // Compute corresponding vector type for return value and arguments.
6222- Type *RetTy = ToVectorTy (ScalarRetTy, VF);
6228+ Type *RetTy = ToWideTy (ScalarRetTy, VF);
62236229 for (Type *ScalarTy : ScalarTys)
6224- Tys.push_back (ToVectorTy (ScalarTy, VF));
6230+ Tys.push_back (ToWideTy (ScalarTy, VF));
62256231
62266232 // An in-loop reduction using an fmuladd intrinsic is a special case;
62276233 // we don't want the normal cost for that intrinsic.
@@ -6398,7 +6404,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
63986404 HasSingleCopyAfterVectorization (I, VF));
63996405 VectorTy = RetTy;
64006406 } else
6401- VectorTy = ToVectorTy (RetTy, VF);
6407+ VectorTy = ToWideTy (RetTy, VF);
64026408
64036409 if (VF.isVector () && VectorTy->isVectorTy () &&
64046410 !TTI.getNumberOfParts (VectorTy))
0 commit comments