@@ -3701,37 +3701,35 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
3701
3701
if (!OldLoad || !OldLoad->isSimple ())
3702
3702
return false ;
3703
3703
3704
- auto *VecTy = dyn_cast<FixedVectorType>(OldLoad->getType ());
3705
- if (!VecTy )
3704
+ auto *OldLoadTy = dyn_cast<FixedVectorType>(OldLoad->getType ());
3705
+ if (!OldLoadTy )
3706
3706
return false ;
3707
3707
3708
+ unsigned const OldNumElements = OldLoadTy->getNumElements ();
3709
+
3708
3710
// Search all uses of load. If all uses are shufflevector instructions, and
3709
3711
// the second operands are all poison values, find the minimum and maximum
3710
3712
// indices of the vector elements referenced by all shuffle masks.
3711
3713
// Otherwise return `std::nullopt`.
3712
3714
using IndexRange = std::pair<int , int >;
3713
3715
auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
3714
- IndexRange OutputRange = IndexRange (VecTy-> getNumElements () , -1 );
3716
+ IndexRange OutputRange = IndexRange (OldNumElements , -1 );
3715
3717
for (auto &Use : I.uses ()) {
3716
3718
// Ensure all uses match the required pattern.
3717
3719
User *Shuffle = Use.getUser ();
3718
- Value *Op0 = nullptr ;
3719
3720
ArrayRef<int > Mask;
3720
3721
3721
- if (!match (Shuffle, m_Shuffle (m_Value (Op0), m_Undef (), m_Mask (Mask))))
3722
+ if (!match (Shuffle,
3723
+ m_Shuffle (m_Specific (OldLoad), m_Undef (), m_Mask (Mask))))
3722
3724
return std::nullopt;
3723
3725
3724
3726
// Ignore shufflevector instructions that have no uses.
3725
3727
if (Shuffle->use_empty ())
3726
3728
continue ;
3727
3729
3728
3730
// Find the min and max indices used by the shufflevector instruction.
3729
- FixedVectorType *Op0Ty = cast<FixedVectorType>(Op0->getType ());
3730
- int NumElems = static_cast <int >(Op0Ty->getNumElements ());
3731
-
3732
3731
for (int Index : Mask) {
3733
- if (Index >= 0 ) {
3734
- Index %= NumElems;
3732
+ if (Index >= 0 && Index < static_cast <int >(OldNumElements)) {
3735
3733
OutputRange.first = std::min (Index, OutputRange.first );
3736
3734
OutputRange.second = std::max (Index, OutputRange.second );
3737
3735
}
@@ -3746,34 +3744,29 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
3746
3744
3747
3745
// Get the range of vector elements used by shufflevector instructions.
3748
3746
if (auto Indices = GetIndexRangeInShuffles ()) {
3749
- unsigned OldSize = VecTy->getNumElements ();
3750
- unsigned NewSize = Indices->second + 1u ;
3747
+ unsigned const NewNumElements = Indices->second + 1u ;
3751
3748
3752
3749
// If the range of vector elements is smaller than the full load, attempt
3753
3750
// to create a smaller load.
3754
- if (NewSize < OldSize ) {
3751
+ if (NewNumElements < OldNumElements ) {
3755
3752
auto Builder = IRBuilder (&I);
3756
3753
Builder.SetCurrentDebugLocation (I.getDebugLoc ());
3757
3754
3758
- // Create new load of smaller vector.
3759
- auto *ElemTy = VecTy->getElementType ();
3760
- auto *NewVecTy = FixedVectorType::get (ElemTy, NewSize);
3761
- auto *PtrOp = OldLoad->getPointerOperand ();
3762
- auto *NewLoad = cast<LoadInst>(
3763
- Builder.CreateAlignedLoad (NewVecTy, PtrOp, OldLoad->getAlign ()));
3764
- NewLoad->copyMetadata (I);
3765
-
3766
3755
// Calculate costs of old and new ops.
3767
- auto OldCost = TTI.getMemoryOpCost (
3756
+ Type *ElemTy = OldLoadTy->getElementType ();
3757
+ FixedVectorType *NewLoadTy = FixedVectorType::get (ElemTy, NewNumElements);
3758
+ Value *PtrOp = OldLoad->getPointerOperand ();
3759
+
3760
+ InstructionCost OldCost = TTI.getMemoryOpCost (
3768
3761
Instruction::Load, OldLoad->getType (), OldLoad->getAlign (),
3769
3762
OldLoad->getPointerAddressSpace (), CostKind);
3770
- auto NewCost = TTI.getMemoryOpCost (
3771
- Instruction::Load, NewLoad-> getType (), NewLoad ->getAlign (),
3772
- NewLoad ->getPointerAddressSpace (), CostKind);
3763
+ InstructionCost NewCost = TTI.getMemoryOpCost (
3764
+ Instruction::Load, NewLoadTy, OldLoad ->getAlign (),
3765
+ OldLoad ->getPointerAddressSpace (), CostKind);
3773
3766
3774
3767
using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int >>;
3775
3768
auto NewUses = SmallVector<UseEntry, 4u >();
3776
- auto SizeDiff = OldSize - NewSize ;
3769
+ auto SizeDiff = OldNumElements - NewNumElements ;
3777
3770
3778
3771
for (auto &Use : I.uses ()) {
3779
3772
auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser ());
@@ -3783,19 +3776,22 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
3783
3776
NewUses.push_back ({Shuffle, {}});
3784
3777
auto &NewMask = NewUses.back ().second ;
3785
3778
for (auto Index : OldMask)
3786
- NewMask.push_back (Index >= int (OldSize ) ? Index - SizeDiff : Index);
3779
+ NewMask.push_back (Index >= int (NewNumElements ) ? Index - SizeDiff : Index);
3787
3780
3788
3781
// Update costs.
3789
- OldCost += TTI.getShuffleCost (TTI::SK_PermuteSingleSrc, VecTy, OldMask ,
3790
- CostKind);
3791
- NewCost += TTI.getShuffleCost (TTI::SK_PermuteSingleSrc, NewVecTy ,
3782
+ OldCost += TTI.getShuffleCost (TTI::SK_PermuteSingleSrc, OldLoadTy ,
3783
+ OldMask, CostKind);
3784
+ NewCost += TTI.getShuffleCost (TTI::SK_PermuteSingleSrc, NewLoadTy ,
3792
3785
NewMask, CostKind);
3793
3786
}
3794
3787
3795
- if (OldCost < NewCost || !NewCost.isValid ()) {
3796
- NewLoad->eraseFromParent ();
3788
+ if (OldCost < NewCost || !NewCost.isValid ())
3797
3789
return false ;
3798
- }
3790
+
3791
+ // Create new load of smaller vector.
3792
+ auto *NewLoad = cast<LoadInst>(
3793
+ Builder.CreateAlignedLoad (NewLoadTy, PtrOp, OldLoad->getAlign ()));
3794
+ NewLoad->copyMetadata (I);
3799
3795
3800
3796
// Replace all uses.
3801
3797
for (auto &Use : NewUses) {
@@ -3805,7 +3801,7 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
3805
3801
Builder.SetInsertPoint (Shuffle);
3806
3802
Builder.SetCurrentDebugLocation (Shuffle->getDebugLoc ());
3807
3803
auto *NewShuffle = Builder.CreateShuffleVector (
3808
- NewLoad, PoisonValue::get (NewVecTy ), NewMask);
3804
+ NewLoad, PoisonValue::get (NewLoadTy ), NewMask);
3809
3805
3810
3806
replaceValue (*Shuffle, *NewShuffle);
3811
3807
}
0 commit comments