@@ -2390,7 +2390,9 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23902390 VPReplicateRecipe *RepRecipe,
23912391 const VPLane &Lane,
23922392 VPTransformState &State) {
2393- assert (!Instr->getType ()->isAggregateType () && " Can't handle vectors" );
2393+ assert ((!Instr->getType ()->isAggregateType () ||
2394+ canVectorizeTy (Instr->getType ())) &&
2395+ " Expected vectorizable or non-aggregate type." );
23942396
23952397 // Does this instruction return a value ?
23962398 bool IsVoidRetTy = Instr->getType ()->isVoidTy ();
@@ -2900,10 +2902,10 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
29002902 return ScalarCallCost;
29012903}
29022904
2903- static Type *maybeVectorizeType (Type *Elt , ElementCount VF) {
2904- if (VF.isScalar () || (!Elt-> isIntOrPtrTy () && !Elt-> isFloatingPointTy () ))
2905- return Elt ;
2906- return VectorType::get (Elt , VF);
2905+ static Type *maybeVectorizeType (Type *Ty , ElementCount VF) {
2906+ if (VF.isScalar () || ! canVectorizeTy (Ty ))
2907+ return Ty ;
2908+ return toVectorizedTy (Ty , VF);
29072909}
29082910
29092911InstructionCost
@@ -3650,13 +3652,15 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
36503652 }
36513653 }
36523654
3653- // ExtractValue instructions must be uniform, because the operands are
3654- // known to be loop-invariant.
36553655 if (auto *EVI = dyn_cast<ExtractValueInst>(&I)) {
3656- assert (IsOutOfScope (EVI->getAggregateOperand ()) &&
3657- " Expected aggregate value to be loop invariant" );
3658- AddToWorklistIfAllowed (EVI);
3659- continue ;
3656+ if (IsOutOfScope (EVI->getAggregateOperand ())) {
3657+ AddToWorklistIfAllowed (EVI);
3658+ continue ;
3659+ }
3660+ // Only ExtractValue instructions where the aggregate value comes from a
3661+ // call are allowed to be non-uniform.
3662+ assert (isa<CallInst>(EVI->getAggregateOperand ()) &&
3663+ " Expected aggregate value to be call return value" );
36603664 }
36613665
36623666 // If there's no pointer operand, there's nothing to do.
@@ -4526,8 +4530,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
45264530 llvm_unreachable (" unhandled recipe" );
45274531 }
45284532
4529- auto WillWiden = [&TTI, VF](Type *ScalarTy) {
4530- Type *VectorTy = toVectorTy (ScalarTy, VF);
4533+ auto WillGenerateTargetVectors = [&TTI, VF](Type *VectorTy) {
45314534 unsigned NumLegalParts = TTI.getNumberOfParts (VectorTy);
45324535 if (!NumLegalParts)
45334536 return false ;
@@ -4539,7 +4542,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
45394542 // explicitly ask TTI about the register class uses for each part.
45404543 return NumLegalParts <= VF.getKnownMinValue ();
45414544 }
4542- // Two or more parts that share a register - are vectorized.
4545+ // Two or more elements that share a register - are vectorized.
45434546 return NumLegalParts < VF.getKnownMinValue ();
45444547 };
45454548
@@ -4558,7 +4561,8 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
45584561 Type *ScalarTy = TypeInfo.inferScalarType (ToCheck);
45594562 if (!Visited.insert ({ScalarTy}).second )
45604563 continue ;
4561- if (WillWiden (ScalarTy))
4564+ Type *WideTy = toVectorizedTy (ScalarTy, VF);
4565+ if (any_of (getContainedTypes (WideTy), WillGenerateTargetVectors))
45624566 return true ;
45634567 }
45644568 }
@@ -5515,10 +5519,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
55155519 // Compute the scalarization overhead of needed insertelement instructions
55165520 // and phi nodes.
55175521 if (isScalarWithPredication (I, VF) && !I->getType ()->isVoidTy ()) {
5518- ScalarCost += TTI.getScalarizationOverhead (
5519- cast<VectorType>(toVectorTy (I->getType (), VF)),
5520- APInt::getAllOnes (VF.getFixedValue ()), /* Insert*/ true ,
5521- /* Extract*/ false , CostKind);
5522+ Type *WideTy = toVectorizedTy (I->getType (), VF);
5523+ for (Type *VectorTy : getContainedTypes (WideTy)) {
5524+ ScalarCost += TTI.getScalarizationOverhead (
5525+ cast<VectorType>(VectorTy), APInt::getAllOnes (VF.getFixedValue ()),
5526+ /* Insert=*/ true ,
5527+ /* Extract=*/ false , CostKind);
5528+ }
55225529 ScalarCost +=
55235530 VF.getFixedValue () * TTI.getCFInstrCost (Instruction::PHI, CostKind);
55245531 }
@@ -5529,15 +5536,18 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
55295536 // overhead.
55305537 for (Use &U : I->operands ())
55315538 if (auto *J = dyn_cast<Instruction>(U.get ())) {
5532- assert (VectorType::isValidElementType (J->getType ()) &&
5539+ assert (canVectorizeTy (J->getType ()) &&
55335540 " Instruction has non-scalar type" );
55345541 if (CanBeScalarized (J))
55355542 Worklist.push_back (J);
55365543 else if (needsExtract (J, VF)) {
5537- ScalarCost += TTI.getScalarizationOverhead (
5538- cast<VectorType>(toVectorTy (J->getType (), VF)),
5539- APInt::getAllOnes (VF.getFixedValue ()), /* Insert*/ false ,
5540- /* Extract*/ true , CostKind);
5544+ Type *WideTy = toVectorizedTy (J->getType (), VF);
5545+ for (Type *VectorTy : getContainedTypes (WideTy)) {
5546+ ScalarCost += TTI.getScalarizationOverhead (
5547+ cast<VectorType>(VectorTy),
5548+ APInt::getAllOnes (VF.getFixedValue ()), /* Insert*/ false ,
5549+ /* Extract*/ true , CostKind);
5550+ }
55415551 }
55425552 }
55435553
@@ -6016,13 +6026,17 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
60166026 return 0 ;
60176027
60186028 InstructionCost Cost = 0 ;
6019- Type *RetTy = toVectorTy (I->getType (), VF);
6029+ Type *RetTy = toVectorizedTy (I->getType (), VF);
60206030 if (!RetTy->isVoidTy () &&
6021- (!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore ()))
6022- Cost += TTI.getScalarizationOverhead (
6023- cast<VectorType>(RetTy), APInt::getAllOnes (VF.getKnownMinValue ()),
6024- /* Insert*/ true ,
6025- /* Extract*/ false , CostKind);
6031+ (!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore ())) {
6032+
6033+ for (Type *VectorTy : getContainedTypes (RetTy)) {
6034+ Cost += TTI.getScalarizationOverhead (
6035+ cast<VectorType>(VectorTy), APInt::getAllOnes (VF.getKnownMinValue ()),
6036+ /* Insert=*/ true ,
6037+ /* Extract=*/ false , CostKind);
6038+ }
6039+ }
60266040
60276041 // Some targets keep addresses scalar.
60286042 if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing ())
@@ -6280,9 +6294,9 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
62806294
62816295 bool MaskRequired = Legal->isMaskRequired (CI);
62826296 // Compute corresponding vector type for return value and arguments.
6283- Type *RetTy = toVectorTy (ScalarRetTy, VF);
6297+ Type *RetTy = toVectorizedTy (ScalarRetTy, VF);
62846298 for (Type *ScalarTy : ScalarTys)
6285- Tys.push_back (toVectorTy (ScalarTy, VF));
6299+ Tys.push_back (toVectorizedTy (ScalarTy, VF));
62866300
62876301 // An in-loop reduction using an fmuladd intrinsic is a special case;
62886302 // we don't want the normal cost for that intrinsic.
@@ -6459,7 +6473,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
64596473 HasSingleCopyAfterVectorization (I, VF));
64606474 VectorTy = RetTy;
64616475 } else
6462- VectorTy = toVectorTy (RetTy, VF);
6476+ VectorTy = toVectorizedTy (RetTy, VF);
64636477
64646478 if (VF.isVector () && VectorTy->isVectorTy () &&
64656479 !TTI.getNumberOfParts (VectorTy))
@@ -8601,7 +8615,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
86018615 case Instruction::Shl:
86028616 case Instruction::Sub:
86038617 case Instruction::Xor:
8604- case Instruction::Freeze:
8618+ case Instruction::Freeze: {
86058619 SmallVector<VPValue *> NewOps (Operands);
86068620 if (Instruction::isBinaryOp (I->getOpcode ())) {
86078621 // The legacy cost model uses SCEV to check if some of the operands are
@@ -8626,6 +8640,16 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
86268640 NewOps[1 ] = GetConstantViaSCEV (NewOps[1 ]);
86278641 }
86288642 return new VPWidenRecipe (*I, make_range (NewOps.begin (), NewOps.end ()));
8643+ }
8644+ case Instruction::ExtractValue: {
8645+ SmallVector<VPValue *> NewOps (Operands);
8646+ Type *I32Ty = IntegerType::getInt32Ty (I->getContext ());
8647+ auto *EVI = cast<ExtractValueInst>(I);
8648+ assert (EVI->getNumIndices () == 1 && " Expected one extractvalue index" );
8649+ unsigned Idx = EVI->getIndices ()[0 ];
8650+ NewOps.push_back (Plan.getOrAddLiveIn (ConstantInt::get (I32Ty, Idx, false )));
8651+ return new VPWidenRecipe (*I, make_range (NewOps.begin (), NewOps.end ()));
8652+ }
86298653 };
86308654}
86318655
@@ -9928,7 +9952,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
99289952 VectorType::get (UI->getType (), State.VF ));
99299953 State.set (this , Poison);
99309954 }
9931- State.packScalarIntoVectorValue (this , *State.Lane );
9955+ State.packScalarIntoVectorizedValue (this , *State.Lane );
99329956 }
99339957 return ;
99349958 }
@@ -10445,13 +10469,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1044510469 return false ;
1044610470 }
1044710471
10448- if (LVL.hasStructVectorCall ()) {
10449- reportVectorizationFailure (" Auto-vectorization of calls that return struct "
10450- " types is not yet supported" ,
10451- " StructCallVectorizationUnsupported" , ORE, L);
10452- return false ;
10453- }
10454-
1045510472 // Entrance to the VPlan-native vectorization path. Outer loops are processed
1045610473 // here. They may require CFG and instruction level transformations before
1045710474 // even evaluating whether vectorization is profitable. Since we cannot modify
0 commit comments