@@ -662,6 +662,151 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
662662 }
663663}
664664
665+ // / Try to simplify recipe \p R.
666+ static void simplifyRecipe (VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
667+ using namespace llvm ::VPlanPatternMatch;
668+
669+ if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
670+ // Try to remove redundant blend recipes.
671+ SmallPtrSet<VPValue *, 4 > UniqueValues;
672+ if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
673+ UniqueValues.insert (Blend->getIncomingValue (0 ));
674+ for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
675+ if (!match (Blend->getMask (I), m_False ()))
676+ UniqueValues.insert (Blend->getIncomingValue (I));
677+
678+ if (UniqueValues.size () == 1 ) {
679+ Blend->replaceAllUsesWith (*UniqueValues.begin ());
680+ Blend->eraseFromParent ();
681+ return ;
682+ }
683+
684+ if (Blend->isNormalized ())
685+ return ;
686+
687+ // Normalize the blend so its first incoming value is used as the initial
688+ // value with the others blended into it.
689+
690+ unsigned StartIndex = 0 ;
691+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
692+ // If a value's mask is used only by the blend then is can be deadcoded.
693+ // TODO: Find the most expensive mask that can be deadcoded, or a mask
694+ // that's used by multiple blends where it can be removed from them all.
695+ VPValue *Mask = Blend->getMask (I);
696+ if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
697+ StartIndex = I;
698+ break ;
699+ }
700+ }
701+
702+ SmallVector<VPValue *, 4 > OperandsWithMask;
703+ OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
704+
705+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
706+ if (I == StartIndex)
707+ continue ;
708+ OperandsWithMask.push_back (Blend->getIncomingValue (I));
709+ OperandsWithMask.push_back (Blend->getMask (I));
710+ }
711+
712+ auto *NewBlend = new VPBlendRecipe (
713+ cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
714+ NewBlend->insertBefore (&R);
715+
716+ VPValue *DeadMask = Blend->getMask (StartIndex);
717+ Blend->replaceAllUsesWith (NewBlend);
718+ Blend->eraseFromParent ();
719+ recursivelyDeleteDeadRecipes (DeadMask);
720+ return ;
721+ }
722+
723+ VPValue *A;
724+ if (match (&R, m_Trunc (m_ZExtOrSExt (m_VPValue (A))))) {
725+ VPValue *Trunc = R.getVPSingleValue ();
726+ Type *TruncTy = TypeInfo.inferScalarType (Trunc);
727+ Type *ATy = TypeInfo.inferScalarType (A);
728+ if (TruncTy == ATy) {
729+ Trunc->replaceAllUsesWith (A);
730+ } else {
731+ // Don't replace a scalarizing recipe with a widened cast.
732+ if (isa<VPReplicateRecipe>(&R))
733+ return ;
734+ if (ATy->getScalarSizeInBits () < TruncTy->getScalarSizeInBits ()) {
735+
736+ unsigned ExtOpcode = match (R.getOperand (0 ), m_SExt (m_VPValue ()))
737+ ? Instruction::SExt
738+ : Instruction::ZExt;
739+ auto *VPC =
740+ new VPWidenCastRecipe (Instruction::CastOps (ExtOpcode), A, TruncTy);
741+ if (auto *UnderlyingExt = R.getOperand (0 )->getUnderlyingValue ()) {
742+ // UnderlyingExt has distinct return type, used to retain legacy cost.
743+ VPC->setUnderlyingValue (UnderlyingExt);
744+ }
745+ VPC->insertBefore (&R);
746+ Trunc->replaceAllUsesWith (VPC);
747+ } else if (ATy->getScalarSizeInBits () > TruncTy->getScalarSizeInBits ()) {
748+ auto *VPC = new VPWidenCastRecipe (Instruction::Trunc, A, TruncTy);
749+ VPC->insertBefore (&R);
750+ Trunc->replaceAllUsesWith (VPC);
751+ }
752+ }
753+ #ifndef NDEBUG
754+ // Verify that the cached type info is for both A and its users is still
755+ // accurate by comparing it to freshly computed types.
756+ VPTypeAnalysis TypeInfo2 (
757+ R.getParent ()->getPlan ()->getCanonicalIV ()->getScalarType ());
758+ assert (TypeInfo.inferScalarType (A) == TypeInfo2.inferScalarType (A));
759+ for (VPUser *U : A->users ()) {
760+ auto *R = cast<VPRecipeBase>(U);
761+ for (VPValue *VPV : R->definedValues ())
762+ assert (TypeInfo.inferScalarType (VPV) == TypeInfo2.inferScalarType (VPV));
763+ }
764+ #endif
765+ }
766+
767+ // Simplify (X && Y) || (X && !Y) -> X.
768+ // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
769+ // && (Y || Z) and (X || !X) into true. This requires queuing newly created
770+ // recipes to be visited during simplification.
771+ VPValue *X, *Y, *X1, *Y1;
772+ if (match (&R,
773+ m_c_BinaryOr (m_LogicalAnd (m_VPValue (X), m_VPValue (Y)),
774+ m_LogicalAnd (m_VPValue (X1), m_Not (m_VPValue (Y1))))) &&
775+ X == X1 && Y == Y1) {
776+ R.getVPSingleValue ()->replaceAllUsesWith (X);
777+ R.eraseFromParent ();
778+ return ;
779+ }
780+
781+ if (match (&R, m_c_Mul (m_VPValue (A), m_SpecificInt (1 ))))
782+ return R.getVPSingleValue ()->replaceAllUsesWith (A);
783+
784+ if (match (&R, m_Not (m_Not (m_VPValue (A)))))
785+ return R.getVPSingleValue ()->replaceAllUsesWith (A);
786+
787+ // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
788+ if ((match (&R,
789+ m_DerivedIV (m_SpecificInt (0 ), m_VPValue (A), m_SpecificInt (1 ))) ||
790+ match (&R,
791+ m_DerivedIV (m_SpecificInt (0 ), m_SpecificInt (0 ), m_VPValue ()))) &&
792+ TypeInfo.inferScalarType (R.getOperand (1 )) ==
793+ TypeInfo.inferScalarType (R.getVPSingleValue ()))
794+ return R.getVPSingleValue ()->replaceAllUsesWith (R.getOperand (1 ));
795+ }
796+
797+ // / Try to simplify the recipes in \p Plan
798+ static void simplifyRecipes (VPlan &Plan) {
799+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT (
800+ Plan.getEntry ());
801+ Type *CanonicalIVType = Plan.getCanonicalIV ()->getScalarType ();
802+ VPTypeAnalysis TypeInfo (CanonicalIVType);
803+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
804+ for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
805+ simplifyRecipe (R, TypeInfo);
806+ }
807+ }
808+ }
809+
665810void VPlanTransforms::optimizeForVFAndUF (VPlan &Plan, ElementCount BestVF,
666811 unsigned BestUF,
667812 PredicatedScalarEvolution &PSE) {
@@ -942,138 +1087,6 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
9421087 }
9431088}
9441089
945- // / Try to simplify recipe \p R.
946- static void simplifyRecipe (VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
947- using namespace llvm ::VPlanPatternMatch;
948-
949- if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
950- // Try to remove redundant blend recipes.
951- SmallPtrSet<VPValue *, 4 > UniqueValues;
952- if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
953- UniqueValues.insert (Blend->getIncomingValue (0 ));
954- for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
955- if (!match (Blend->getMask (I), m_False ()))
956- UniqueValues.insert (Blend->getIncomingValue (I));
957-
958- if (UniqueValues.size () == 1 ) {
959- Blend->replaceAllUsesWith (*UniqueValues.begin ());
960- Blend->eraseFromParent ();
961- return ;
962- }
963-
964- if (Blend->isNormalized ())
965- return ;
966-
967- // Normalize the blend so its first incoming value is used as the initial
968- // value with the others blended into it.
969-
970- unsigned StartIndex = 0 ;
971- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
972- // If a value's mask is used only by the blend then is can be deadcoded.
973- // TODO: Find the most expensive mask that can be deadcoded, or a mask
974- // that's used by multiple blends where it can be removed from them all.
975- VPValue *Mask = Blend->getMask (I);
976- if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
977- StartIndex = I;
978- break ;
979- }
980- }
981-
982- SmallVector<VPValue *, 4 > OperandsWithMask;
983- OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
984-
985- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
986- if (I == StartIndex)
987- continue ;
988- OperandsWithMask.push_back (Blend->getIncomingValue (I));
989- OperandsWithMask.push_back (Blend->getMask (I));
990- }
991-
992- auto *NewBlend = new VPBlendRecipe (
993- cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
994- NewBlend->insertBefore (&R);
995-
996- VPValue *DeadMask = Blend->getMask (StartIndex);
997- Blend->replaceAllUsesWith (NewBlend);
998- Blend->eraseFromParent ();
999- recursivelyDeleteDeadRecipes (DeadMask);
1000- return ;
1001- }
1002-
1003- VPValue *A;
1004- if (match (&R, m_Trunc (m_ZExtOrSExt (m_VPValue (A))))) {
1005- VPValue *Trunc = R.getVPSingleValue ();
1006- Type *TruncTy = TypeInfo.inferScalarType (Trunc);
1007- Type *ATy = TypeInfo.inferScalarType (A);
1008- if (TruncTy == ATy) {
1009- Trunc->replaceAllUsesWith (A);
1010- } else {
1011- // Don't replace a scalarizing recipe with a widened cast.
1012- if (isa<VPReplicateRecipe>(&R))
1013- return ;
1014- if (ATy->getScalarSizeInBits () < TruncTy->getScalarSizeInBits ()) {
1015-
1016- unsigned ExtOpcode = match (R.getOperand (0 ), m_SExt (m_VPValue ()))
1017- ? Instruction::SExt
1018- : Instruction::ZExt;
1019- auto *VPC =
1020- new VPWidenCastRecipe (Instruction::CastOps (ExtOpcode), A, TruncTy);
1021- if (auto *UnderlyingExt = R.getOperand (0 )->getUnderlyingValue ()) {
1022- // UnderlyingExt has distinct return type, used to retain legacy cost.
1023- VPC->setUnderlyingValue (UnderlyingExt);
1024- }
1025- VPC->insertBefore (&R);
1026- Trunc->replaceAllUsesWith (VPC);
1027- } else if (ATy->getScalarSizeInBits () > TruncTy->getScalarSizeInBits ()) {
1028- auto *VPC = new VPWidenCastRecipe (Instruction::Trunc, A, TruncTy);
1029- VPC->insertBefore (&R);
1030- Trunc->replaceAllUsesWith (VPC);
1031- }
1032- }
1033- #ifndef NDEBUG
1034- // Verify that the cached type info is for both A and its users is still
1035- // accurate by comparing it to freshly computed types.
1036- VPTypeAnalysis TypeInfo2 (
1037- R.getParent ()->getPlan ()->getCanonicalIV ()->getScalarType ());
1038- assert (TypeInfo.inferScalarType (A) == TypeInfo2.inferScalarType (A));
1039- for (VPUser *U : A->users ()) {
1040- auto *R = cast<VPRecipeBase>(U);
1041- for (VPValue *VPV : R->definedValues ())
1042- assert (TypeInfo.inferScalarType (VPV) == TypeInfo2.inferScalarType (VPV));
1043- }
1044- #endif
1045- }
1046-
1047- // Simplify (X && Y) || (X && !Y) -> X.
1048- // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
1049- // && (Y || Z) and (X || !X) into true. This requires queuing newly created
1050- // recipes to be visited during simplification.
1051- VPValue *X, *Y, *X1, *Y1;
1052- if (match (&R,
1053- m_c_BinaryOr (m_LogicalAnd (m_VPValue (X), m_VPValue (Y)),
1054- m_LogicalAnd (m_VPValue (X1), m_Not (m_VPValue (Y1))))) &&
1055- X == X1 && Y == Y1) {
1056- R.getVPSingleValue ()->replaceAllUsesWith (X);
1057- R.eraseFromParent ();
1058- return ;
1059- }
1060-
1061- if (match (&R, m_c_Mul (m_VPValue (A), m_SpecificInt (1 ))))
1062- return R.getVPSingleValue ()->replaceAllUsesWith (A);
1063-
1064- if (match (&R, m_Not (m_Not (m_VPValue (A)))))
1065- return R.getVPSingleValue ()->replaceAllUsesWith (A);
1066-
1067- // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
1068- if ((match (&R,
1069- m_DerivedIV (m_SpecificInt (0 ), m_VPValue (A), m_SpecificInt (1 ))) ||
1070- match (&R,
1071- m_DerivedIV (m_SpecificInt (0 ), m_SpecificInt (0 ), m_VPValue ()))) &&
1072- TypeInfo.inferScalarType (R.getOperand (1 )) ==
1073- TypeInfo.inferScalarType (R.getVPSingleValue ()))
1074- return R.getVPSingleValue ()->replaceAllUsesWith (R.getOperand (1 ));
1075- }
1076-
10771090// / Move loop-invariant recipes out of the vector loop region in \p Plan.
10781091static void licm (VPlan &Plan) {
10791092 VPBasicBlock *Preheader = Plan.getVectorPreheader ();
@@ -1108,19 +1121,6 @@ static void licm(VPlan &Plan) {
11081121 }
11091122}
11101123
1111- // / Try to simplify the recipes in \p Plan.
1112- static void simplifyRecipes (VPlan &Plan) {
1113- ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT (
1114- Plan.getEntry ());
1115- Type *CanonicalIVType = Plan.getCanonicalIV ()->getScalarType ();
1116- VPTypeAnalysis TypeInfo (CanonicalIVType);
1117- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1118- for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
1119- simplifyRecipe (R, TypeInfo);
1120- }
1121- }
1122- }
1123-
11241124void VPlanTransforms::truncateToMinimalBitwidths (
11251125 VPlan &Plan, const MapVector<Instruction *, uint64_t > &MinBWs) {
11261126#ifndef NDEBUG
0 commit comments