@@ -142,6 +142,7 @@ class VectorCombine {
142
142
bool foldInterleaveIntrinsics (Instruction &I);
143
143
bool shrinkType (Instruction &I);
144
144
bool shrinkLoadForShuffles (Instruction &I);
145
+ bool shrinkPhiOfShuffles (Instruction &I);
145
146
146
147
void replaceValue (Value &Old, Value &New) {
147
148
LLVM_DEBUG (dbgs () << " VC: Replacing: " << Old << ' \n ' );
@@ -3994,6 +3995,101 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
3994
3995
return false ;
3995
3996
}
3996
3997
3998
+ // Attempt to narrow a phi of shufflevector instructions where the two incoming
3999
+ // values have the same operands but different masks. If the two shuffle masks
4000
+ // are offsets of one another we can use one branch to rotate the incoming
4001
+ // vector and perform one larger shuffle after the phi.
4002
+ bool VectorCombine::shrinkPhiOfShuffles (Instruction &I) {
4003
+ auto *Phi = dyn_cast<PHINode>(&I);
4004
+ if (!Phi || Phi->getNumIncomingValues () != 2u )
4005
+ return false ;
4006
+
4007
+ Value *Op = nullptr ;
4008
+ ArrayRef<int > Mask0;
4009
+ ArrayRef<int > Mask1;
4010
+
4011
+ if (!match (Phi->getOperand (0u ),
4012
+ m_OneUse (m_Shuffle (m_Value (Op), m_Poison (), m_Mask (Mask0)))) ||
4013
+ !match (Phi->getOperand (1u ),
4014
+ m_OneUse (m_Shuffle (m_Specific (Op), m_Poison (), m_Mask (Mask1)))))
4015
+ return false ;
4016
+
4017
+ auto *Shuf = cast<ShuffleVectorInst>(Phi->getOperand (0u ));
4018
+
4019
+ // Ensure result vectors are wider than the argument vector.
4020
+ auto *InputVT = cast<FixedVectorType>(Op->getType ());
4021
+ auto *ResultVT = cast<FixedVectorType>(Shuf->getType ());
4022
+ auto const InputNumElements = InputVT->getNumElements ();
4023
+
4024
+ if (InputNumElements >= ResultVT->getNumElements ())
4025
+ return false ;
4026
+
4027
+ // Take the difference of the two shuffle masks at each index. Ignore poison
4028
+ // values at the same index in both masks.
4029
+ SmallVector<int , 16 > NewMask;
4030
+ NewMask.reserve (Mask0.size ());
4031
+
4032
+ for (auto [M0, M1] : zip (Mask0, Mask1)) {
4033
+ if (M0 >= 0 && M1 >= 0 )
4034
+ NewMask.push_back (M0 - M1);
4035
+ else if (M0 == -1 && M1 == -1 )
4036
+ continue ;
4037
+ else
4038
+ return false ;
4039
+ }
4040
+
4041
+ // Ensure all elements of the new mask are equal. If the difference between
4042
+ // the incoming mask elements is the same, the two must be constant offsets
4043
+ // of one another.
4044
+ if (NewMask.empty () || !all_equal (NewMask))
4045
+ return false ;
4046
+
4047
+ // Create new mask using difference of the two incoming masks.
4048
+ int MaskOffset = NewMask[0u ];
4049
+ unsigned Index = (InputNumElements - MaskOffset) % InputNumElements;
4050
+ NewMask.clear ();
4051
+
4052
+ for (unsigned I = 0u ; I < InputNumElements; ++I) {
4053
+ NewMask.push_back (Index);
4054
+ Index = (Index + 1u ) % InputNumElements;
4055
+ }
4056
+
4057
+ // Calculate costs for worst cases and compare.
4058
+ auto const Kind = TTI::SK_PermuteSingleSrc;
4059
+ auto OldCost =
4060
+ std::max (TTI.getShuffleCost (Kind, ResultVT, InputVT, Mask0, CostKind),
4061
+ TTI.getShuffleCost (Kind, ResultVT, InputVT, Mask1, CostKind));
4062
+ auto NewCost = TTI.getShuffleCost (Kind, InputVT, InputVT, NewMask, CostKind) +
4063
+ TTI.getShuffleCost (Kind, ResultVT, InputVT, Mask1, CostKind);
4064
+
4065
+ LLVM_DEBUG (dbgs () << " Found a phi of mergeable shuffles: " << I
4066
+ << " \n OldCost: " << OldCost << " vs NewCost: " << NewCost
4067
+ << " \n " );
4068
+
4069
+ if (NewCost > OldCost)
4070
+ return false ;
4071
+
4072
+ // Create new shuffles and narrowed phi.
4073
+ auto Builder = IRBuilder (Shuf);
4074
+ Builder.SetCurrentDebugLocation (Shuf->getDebugLoc ());
4075
+ auto *PoisonVal = PoisonValue::get (InputVT);
4076
+ auto *NewShuf0 = Builder.CreateShuffleVector (Op, PoisonVal, NewMask);
4077
+ Worklist.push (cast<Instruction>(NewShuf0));
4078
+
4079
+ Builder.SetInsertPoint (Phi);
4080
+ Builder.SetCurrentDebugLocation (Phi->getDebugLoc ());
4081
+ auto *NewPhi = Builder.CreatePHI (NewShuf0->getType (), 2u );
4082
+ NewPhi->addIncoming (NewShuf0, Phi->getIncomingBlock (0u ));
4083
+ NewPhi->addIncoming (Op, Phi->getIncomingBlock (1u ));
4084
+
4085
+ Builder.SetInsertPoint (*NewPhi->getInsertionPointAfterDef ());
4086
+ PoisonVal = PoisonValue::get (NewPhi->getType ());
4087
+ auto *NewShuf1 = Builder.CreateShuffleVector (NewPhi, PoisonVal, Mask1);
4088
+
4089
+ replaceValue (*Phi, *NewShuf1);
4090
+ return true ;
4091
+ }
4092
+
3997
4093
// / This is the entry point for all transforms. Pass manager differences are
3998
4094
// / handled in the callers of this function.
3999
4095
bool VectorCombine::run () {
@@ -4081,6 +4177,9 @@ bool VectorCombine::run() {
4081
4177
case Instruction::Xor:
4082
4178
MadeChange |= foldBitOpOfCastops (I);
4083
4179
break ;
4180
+ case Instruction::PHI:
4181
+ MadeChange |= shrinkPhiOfShuffles (I);
4182
+ break ;
4084
4183
default :
4085
4184
MadeChange |= shrinkType (I);
4086
4185
break ;
0 commit comments