@@ -923,85 +923,16 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
923923}
924924
925925// / Try to simplify recipe \p R.
926- static void simplifyRecipe (VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
926+ static VPValue * simplifyRecipe (VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
927927 using namespace llvm ::VPlanPatternMatch;
928928
929- if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
930- // Try to remove redundant blend recipes.
931- SmallPtrSet<VPValue *, 4 > UniqueValues;
932- if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
933- UniqueValues.insert (Blend->getIncomingValue (0 ));
934- for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
935- if (!match (Blend->getMask (I), m_False ()))
936- UniqueValues.insert (Blend->getIncomingValue (I));
937-
938- if (UniqueValues.size () == 1 ) {
939- Blend->replaceAllUsesWith (*UniqueValues.begin ());
940- Blend->eraseFromParent ();
941- return ;
942- }
943-
944- if (Blend->isNormalized ())
945- return ;
946-
947- // Normalize the blend so its first incoming value is used as the initial
948- // value with the others blended into it.
949-
950- unsigned StartIndex = 0 ;
951- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
952- // If a value's mask is used only by the blend then is can be deadcoded.
953- // TODO: Find the most expensive mask that can be deadcoded, or a mask
954- // that's used by multiple blends where it can be removed from them all.
955- VPValue *Mask = Blend->getMask (I);
956- if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
957- StartIndex = I;
958- break ;
959- }
960- }
961-
962- SmallVector<VPValue *, 4 > OperandsWithMask;
963- OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
964-
965- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
966- if (I == StartIndex)
967- continue ;
968- OperandsWithMask.push_back (Blend->getIncomingValue (I));
969- OperandsWithMask.push_back (Blend->getMask (I));
970- }
971-
972- auto *NewBlend = new VPBlendRecipe (
973- cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
974- NewBlend->insertBefore (&R);
975-
976- VPValue *DeadMask = Blend->getMask (StartIndex);
977- Blend->replaceAllUsesWith (NewBlend);
978- Blend->eraseFromParent ();
979- recursivelyDeleteDeadRecipes (DeadMask);
980-
981- // / Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
982- VPValue *NewMask;
983- if (NewBlend->getNumOperands () == 3 &&
984- match (NewBlend->getMask (1 ), m_Not (m_VPValue (NewMask)))) {
985- VPValue *Inc0 = NewBlend->getOperand (0 );
986- VPValue *Inc1 = NewBlend->getOperand (1 );
987- VPValue *OldMask = NewBlend->getOperand (2 );
988- NewBlend->setOperand (0 , Inc1);
989- NewBlend->setOperand (1 , Inc0);
990- NewBlend->setOperand (2 , NewMask);
991- if (OldMask->getNumUsers () == 0 )
992- cast<VPInstruction>(OldMask)->eraseFromParent ();
993- }
994- return ;
995- }
996-
997929 // VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
998930 // part 0 can be replaced by their start value, if only the first lane is
999931 // demanded.
1000932 if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&R)) {
1001933 if (Steps->getParent ()->getPlan ()->isUnrolled () && Steps->isPart0 () &&
1002934 vputils::onlyFirstLaneUsed (Steps)) {
1003- Steps->replaceAllUsesWith (Steps->getOperand (0 ));
1004- return ;
935+ return Steps->getOperand (0 );
1005936 }
1006937 }
1007938
@@ -1011,11 +942,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1011942 Type *TruncTy = TypeInfo.inferScalarType (Trunc);
1012943 Type *ATy = TypeInfo.inferScalarType (A);
1013944 if (TruncTy == ATy) {
1014- Trunc-> replaceAllUsesWith (A) ;
945+ return A ;
1015946 } else {
1016947 // Don't replace a scalarizing recipe with a widened cast.
1017948 if (isa<VPReplicateRecipe>(&R))
1018- return ;
949+ return nullptr ;
1019950 if (ATy->getScalarSizeInBits () < TruncTy->getScalarSizeInBits ()) {
1020951
1021952 unsigned ExtOpcode = match (R.getOperand (0 ), m_SExt (m_VPValue ()))
@@ -1028,11 +959,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1028959 VPC->setUnderlyingValue (UnderlyingExt);
1029960 }
1030961 VPC->insertBefore (&R);
1031- Trunc-> replaceAllUsesWith ( VPC) ;
962+ return VPC;
1032963 } else if (ATy->getScalarSizeInBits () > TruncTy->getScalarSizeInBits ()) {
1033964 auto *VPC = new VPWidenCastRecipe (Instruction::Trunc, A, TruncTy);
1034965 VPC->insertBefore (&R);
1035- Trunc-> replaceAllUsesWith ( VPC) ;
966+ return VPC;
1036967 }
1037968 }
1038969#ifndef NDEBUG
@@ -1056,17 +987,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
1056987 VPValue *X, *Y;
1057988 if (match (&R,
1058989 m_c_BinaryOr (m_LogicalAnd (m_VPValue (X), m_VPValue (Y)),
1059- m_LogicalAnd (m_Deferred (X), m_Not (m_Deferred (Y)))))) {
1060- R.getVPSingleValue ()->replaceAllUsesWith (X);
1061- R.eraseFromParent ();
1062- return ;
1063- }
990+ m_LogicalAnd (m_Deferred (X), m_Not (m_Deferred (Y))))))
991+ return X;
1064992
1065993 if (match (&R, m_c_Mul (m_VPValue (A), m_SpecificInt (1 ))))
1066- return R. getVPSingleValue ()-> replaceAllUsesWith (A) ;
994+ return A ;
1067995
1068996 if (match (&R, m_Not (m_Not (m_VPValue (A)))))
1069- return R. getVPSingleValue ()-> replaceAllUsesWith (A) ;
997+ return A ;
1070998
1071999 // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
10721000 if ((match (&R,
@@ -1075,16 +1003,110 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10751003 m_DerivedIV (m_SpecificInt (0 ), m_SpecificInt (0 ), m_VPValue ()))) &&
10761004 TypeInfo.inferScalarType (R.getOperand (1 )) ==
10771005 TypeInfo.inferScalarType (R.getVPSingleValue ()))
1078- return R.getVPSingleValue ()->replaceAllUsesWith (R.getOperand (1 ));
1006+ return R.getOperand (1 );
1007+
1008+ return nullptr ;
10791009}
10801010
10811011void VPlanTransforms::simplifyRecipes (VPlan &Plan, Type &CanonicalIVTy) {
10821012 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT (
10831013 Plan.getEntry ());
10841014 VPTypeAnalysis TypeInfo (&CanonicalIVTy);
1015+ SetVector<VPRecipeBase *> Worklist;
1016+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
1017+ for (VPRecipeBase &R : make_early_inc_range (*VPBB))
1018+ Worklist.insert (&R);
1019+
1020+ while (!Worklist.empty ()) {
1021+ VPRecipeBase *R = Worklist.pop_back_val ();
1022+ if (VPValue *Result = simplifyRecipe (*R, TypeInfo)) {
1023+ R->getVPSingleValue ()->replaceAllUsesWith (Result);
1024+ R->eraseFromParent ();
1025+ if (VPRecipeBase *ResultR = Result->getDefiningRecipe ())
1026+ Worklist.insert (ResultR);
1027+ for (VPUser *U : Result->users ())
1028+ if (auto *UR = dyn_cast<VPRecipeBase>(U))
1029+ if (UR != R)
1030+ Worklist.insert (UR);
1031+ }
1032+ }
1033+ }
1034+
1035+ void VPlanTransforms::simplifyBlends (VPlan &Plan) {
1036+ using namespace llvm ::VPlanPatternMatch;
1037+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT (
1038+ Plan.getEntry ());
1039+ SetVector<VPRecipeBase *> Worklist;
10851040 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
10861041 for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
1087- simplifyRecipe (R, TypeInfo);
1042+ auto *Blend = dyn_cast<VPBlendRecipe>(&R);
1043+ if (!Blend)
1044+ continue ;
1045+
1046+ // Try to remove redundant blend recipes.
1047+ SmallPtrSet<VPValue *, 4 > UniqueValues;
1048+ if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
1049+ UniqueValues.insert (Blend->getIncomingValue (0 ));
1050+ for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
1051+ if (!match (Blend->getMask (I), m_False ()))
1052+ UniqueValues.insert (Blend->getIncomingValue (I));
1053+
1054+ if (UniqueValues.size () == 1 ) {
1055+ Blend->replaceAllUsesWith (*UniqueValues.begin ());
1056+ Blend->eraseFromParent ();
1057+ continue ;
1058+ }
1059+
1060+ if (Blend->isNormalized ())
1061+ continue ;
1062+
1063+ // Normalize the blend so its first incoming value is used as the initial
1064+ // value with the others blended into it.
1065+
1066+ unsigned StartIndex = 0 ;
1067+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
1068+ // If a value's mask is used only by the blend then is can be deadcoded.
1069+ // TODO: Find the most expensive mask that can be deadcoded, or a mask
1070+ // that's used by multiple blends where it can be removed from them all.
1071+ VPValue *Mask = Blend->getMask (I);
1072+ if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
1073+ StartIndex = I;
1074+ break ;
1075+ }
1076+ }
1077+
1078+ SmallVector<VPValue *, 4 > OperandsWithMask;
1079+ OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
1080+
1081+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
1082+ if (I == StartIndex)
1083+ continue ;
1084+ OperandsWithMask.push_back (Blend->getIncomingValue (I));
1085+ OperandsWithMask.push_back (Blend->getMask (I));
1086+ }
1087+
1088+ auto *NewBlend = new VPBlendRecipe (
1089+ cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
1090+ NewBlend->insertBefore (&R);
1091+
1092+ VPValue *DeadMask = Blend->getMask (StartIndex);
1093+ Blend->replaceAllUsesWith (NewBlend);
1094+ Blend->eraseFromParent ();
1095+ recursivelyDeleteDeadRecipes (DeadMask);
1096+
1097+ // / Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
1098+ VPValue *NewMask;
1099+ if (NewBlend->getNumOperands () == 3 &&
1100+ match (NewBlend->getMask (1 ), m_Not (m_VPValue (NewMask)))) {
1101+ VPValue *Inc0 = NewBlend->getOperand (0 );
1102+ VPValue *Inc1 = NewBlend->getOperand (1 );
1103+ VPValue *OldMask = NewBlend->getOperand (2 );
1104+ NewBlend->setOperand (0 , Inc1);
1105+ NewBlend->setOperand (1 , Inc0);
1106+ NewBlend->setOperand (2 , NewMask);
1107+ if (OldMask->getNumUsers () == 0 )
1108+ cast<VPInstruction>(OldMask)->eraseFromParent ();
1109+ }
10881110 }
10891111 }
10901112}
@@ -1684,6 +1706,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
16841706 runPass (removeRedundantInductionCasts, Plan);
16851707
16861708 runPass (simplifyRecipes, Plan, *Plan.getCanonicalIV ()->getScalarType ());
1709+ runPass (simplifyBlends, Plan);
16871710 runPass (removeDeadRecipes, Plan);
16881711 runPass (legalizeAndOptimizeInductions, Plan);
16891712 runPass (removeRedundantExpandSCEVRecipes, Plan);
0 commit comments