@@ -926,74 +926,6 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
926926static void simplifyRecipe (VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
927927 using namespace llvm ::VPlanPatternMatch;
928928
929- if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
930- // Try to remove redundant blend recipes.
931- SmallPtrSet<VPValue *, 4 > UniqueValues;
932- if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
933- UniqueValues.insert (Blend->getIncomingValue (0 ));
934- for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
935- if (!match (Blend->getMask (I), m_False ()))
936- UniqueValues.insert (Blend->getIncomingValue (I));
937-
938- if (UniqueValues.size () == 1 ) {
939- Blend->replaceAllUsesWith (*UniqueValues.begin ());
940- Blend->eraseFromParent ();
941- return ;
942- }
943-
944- if (Blend->isNormalized ())
945- return ;
946-
947- // Normalize the blend so its first incoming value is used as the initial
948- // value with the others blended into it.
949-
950- unsigned StartIndex = 0 ;
951- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
952- // If a value's mask is used only by the blend then is can be deadcoded.
953- // TODO: Find the most expensive mask that can be deadcoded, or a mask
954- // that's used by multiple blends where it can be removed from them all.
955- VPValue *Mask = Blend->getMask (I);
956- if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
957- StartIndex = I;
958- break ;
959- }
960- }
961-
962- SmallVector<VPValue *, 4 > OperandsWithMask;
963- OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
964-
965- for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
966- if (I == StartIndex)
967- continue ;
968- OperandsWithMask.push_back (Blend->getIncomingValue (I));
969- OperandsWithMask.push_back (Blend->getMask (I));
970- }
971-
972- auto *NewBlend = new VPBlendRecipe (
973- cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
974- NewBlend->insertBefore (&R);
975-
976- VPValue *DeadMask = Blend->getMask (StartIndex);
977- Blend->replaceAllUsesWith (NewBlend);
978- Blend->eraseFromParent ();
979- recursivelyDeleteDeadRecipes (DeadMask);
980-
981- // / Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
982- VPValue *NewMask;
983- if (NewBlend->getNumOperands () == 3 &&
984- match (NewBlend->getMask (1 ), m_Not (m_VPValue (NewMask)))) {
985- VPValue *Inc0 = NewBlend->getOperand (0 );
986- VPValue *Inc1 = NewBlend->getOperand (1 );
987- VPValue *OldMask = NewBlend->getOperand (2 );
988- NewBlend->setOperand (0 , Inc1);
989- NewBlend->setOperand (1 , Inc0);
990- NewBlend->setOperand (2 , NewMask);
991- if (OldMask->getNumUsers () == 0 )
992- cast<VPInstruction>(OldMask)->eraseFromParent ();
993- }
994- return ;
995- }
996-
997929 // VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
998930 // part 0 can be replaced by their start value, if only the first lane is
999931 // demanded.
@@ -1092,6 +1024,85 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
10921024 }
10931025}
10941026
1027+ // / Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes
1028+ // / to make sure the masks are simplified.
1029+ static void simplifyBlends (VPlan &Plan) {
1030+ using namespace llvm ::VPlanPatternMatch;
1031+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1032+ vp_depth_first_shallow (Plan.getVectorLoopRegion ()->getEntry ()))) {
1033+ for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
1034+ auto *Blend = dyn_cast<VPBlendRecipe>(&R);
1035+ if (!Blend)
1036+ continue ;
1037+
1038+ // Try to remove redundant blend recipes.
1039+ SmallPtrSet<VPValue *, 4 > UniqueValues;
1040+ if (Blend->isNormalized () || !match (Blend->getMask (0 ), m_False ()))
1041+ UniqueValues.insert (Blend->getIncomingValue (0 ));
1042+ for (unsigned I = 1 ; I != Blend->getNumIncomingValues (); ++I)
1043+ if (!match (Blend->getMask (I), m_False ()))
1044+ UniqueValues.insert (Blend->getIncomingValue (I));
1045+
1046+ if (UniqueValues.size () == 1 ) {
1047+ Blend->replaceAllUsesWith (*UniqueValues.begin ());
1048+ Blend->eraseFromParent ();
1049+ continue ;
1050+ }
1051+
1052+ if (Blend->isNormalized ())
1053+ continue ;
1054+
1055+ // Normalize the blend so its first incoming value is used as the initial
1056+ // value with the others blended into it.
1057+
1058+ unsigned StartIndex = 0 ;
1059+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
1060+ // If a value's mask is used only by the blend then is can be deadcoded.
1061+ // TODO: Find the most expensive mask that can be deadcoded, or a mask
1062+ // that's used by multiple blends where it can be removed from them all.
1063+ VPValue *Mask = Blend->getMask (I);
1064+ if (Mask->getNumUsers () == 1 && !match (Mask, m_False ())) {
1065+ StartIndex = I;
1066+ break ;
1067+ }
1068+ }
1069+
1070+ SmallVector<VPValue *, 4 > OperandsWithMask;
1071+ OperandsWithMask.push_back (Blend->getIncomingValue (StartIndex));
1072+
1073+ for (unsigned I = 0 ; I != Blend->getNumIncomingValues (); ++I) {
1074+ if (I == StartIndex)
1075+ continue ;
1076+ OperandsWithMask.push_back (Blend->getIncomingValue (I));
1077+ OperandsWithMask.push_back (Blend->getMask (I));
1078+ }
1079+
1080+ auto *NewBlend = new VPBlendRecipe (
1081+ cast<PHINode>(Blend->getUnderlyingValue ()), OperandsWithMask);
1082+ NewBlend->insertBefore (&R);
1083+
1084+ VPValue *DeadMask = Blend->getMask (StartIndex);
1085+ Blend->replaceAllUsesWith (NewBlend);
1086+ Blend->eraseFromParent ();
1087+ recursivelyDeleteDeadRecipes (DeadMask);
1088+
1089+ // / Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
1090+ VPValue *NewMask;
1091+ if (NewBlend->getNumOperands () == 3 &&
1092+ match (NewBlend->getMask (1 ), m_Not (m_VPValue (NewMask)))) {
1093+ VPValue *Inc0 = NewBlend->getOperand (0 );
1094+ VPValue *Inc1 = NewBlend->getOperand (1 );
1095+ VPValue *OldMask = NewBlend->getOperand (2 );
1096+ NewBlend->setOperand (0 , Inc1);
1097+ NewBlend->setOperand (1 , Inc0);
1098+ NewBlend->setOperand (2 , NewMask);
1099+ if (OldMask->getNumUsers () == 0 )
1100+ cast<VPInstruction>(OldMask)->eraseFromParent ();
1101+ }
1102+ }
1103+ }
1104+ }
1105+
10951106// / Optimize the width of vector induction variables in \p Plan based on a known
10961107// / constant Trip Count, \p BestVF and \p BestUF.
10971108static bool optimizeVectorInductionWidthForTCAndVFUF (VPlan &Plan,
@@ -1733,6 +1744,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
17331744 runPass (removeRedundantInductionCasts, Plan);
17341745
17351746 runPass (simplifyRecipes, Plan, *Plan.getCanonicalIV ()->getScalarType ());
1747+ runPass (simplifyBlends, Plan);
17361748 runPass (removeDeadRecipes, Plan);
17371749 runPass (legalizeAndOptimizeInductions, Plan);
17381750 runPass (removeRedundantExpandSCEVRecipes, Plan);
0 commit comments