@@ -3493,37 +3493,35 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
3493
3493
if (!OldLoad || !OldLoad->isSimple ())
3494
3494
return false ;
3495
3495
3496
- auto *VecTy = dyn_cast<FixedVectorType>(OldLoad->getType ());
3497
- if (!VecTy )
3496
+ auto *OldLoadTy = dyn_cast<FixedVectorType>(OldLoad->getType ());
3497
+ if (!OldLoadTy )
3498
3498
return false ;
3499
3499
3500
+ unsigned const OldNumElements = OldLoadTy->getNumElements ();
3501
+
3500
3502
// Search all uses of load. If all uses are shufflevector instructions, and
3501
3503
// the second operands are all poison values, find the minimum and maximum
3502
3504
// indices of the vector elements referenced by all shuffle masks.
3503
3505
// Otherwise return `std::nullopt`.
3504
3506
using IndexRange = std::pair<int , int >;
3505
3507
auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
3506
- IndexRange OutputRange = IndexRange (VecTy-> getNumElements () , -1 );
3508
+ IndexRange OutputRange = IndexRange (OldNumElements , -1 );
3507
3509
for (auto &Use : I.uses ()) {
3508
3510
// Ensure all uses match the required pattern.
3509
3511
User *Shuffle = Use.getUser ();
3510
- Value *Op0 = nullptr ;
3511
3512
ArrayRef<int > Mask;
3512
3513
3513
- if (!match (Shuffle, m_Shuffle (m_Value (Op0), m_Undef (), m_Mask (Mask))))
3514
+ if (!match (Shuffle,
3515
+ m_Shuffle (m_Specific (OldLoad), m_Undef (), m_Mask (Mask))))
3514
3516
return std::nullopt;
3515
3517
3516
3518
// Ignore shufflevector instructions that have no uses.
3517
3519
if (Shuffle->use_empty ())
3518
3520
continue ;
3519
3521
3520
3522
// Find the min and max indices used by the shufflevector instruction.
3521
- FixedVectorType *Op0Ty = cast<FixedVectorType>(Op0->getType ());
3522
- int NumElems = static_cast <int >(Op0Ty->getNumElements ());
3523
-
3524
3523
for (int Index : Mask) {
3525
- if (Index >= 0 ) {
3526
- Index %= NumElems;
3524
+ if (Index >= 0 && Index < static_cast <int >(OldNumElements)) {
3527
3525
OutputRange.first = std::min (Index, OutputRange.first );
3528
3526
OutputRange.second = std::max (Index, OutputRange.second );
3529
3527
}
@@ -3538,34 +3536,29 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
3538
3536
3539
3537
// Get the range of vector elements used by shufflevector instructions.
3540
3538
if (auto Indices = GetIndexRangeInShuffles ()) {
3541
- unsigned OldSize = VecTy->getNumElements ();
3542
- unsigned NewSize = Indices->second + 1u ;
3539
+ unsigned const NewNumElements = Indices->second + 1u ;
3543
3540
3544
3541
// If the range of vector elements is smaller than the full load, attempt
3545
3542
// to create a smaller load.
3546
- if (NewSize < OldSize ) {
3543
+ if (NewNumElements < OldNumElements ) {
3547
3544
auto Builder = IRBuilder (&I);
3548
3545
Builder.SetCurrentDebugLocation (I.getDebugLoc ());
3549
3546
3550
- // Create new load of smaller vector.
3551
- auto *ElemTy = VecTy->getElementType ();
3552
- auto *NewVecTy = FixedVectorType::get (ElemTy, NewSize);
3553
- auto *PtrOp = OldLoad->getPointerOperand ();
3554
- auto *NewLoad = cast<LoadInst>(
3555
- Builder.CreateAlignedLoad (NewVecTy, PtrOp, OldLoad->getAlign ()));
3556
- NewLoad->copyMetadata (I);
3557
-
3558
3547
// Calculate costs of old and new ops.
3559
- auto OldCost = TTI.getMemoryOpCost (
3548
+ Type *ElemTy = OldLoadTy->getElementType ();
3549
+ FixedVectorType *NewLoadTy = FixedVectorType::get (ElemTy, NewNumElements);
3550
+ Value *PtrOp = OldLoad->getPointerOperand ();
3551
+
3552
+ InstructionCost OldCost = TTI.getMemoryOpCost (
3560
3553
Instruction::Load, OldLoad->getType (), OldLoad->getAlign (),
3561
3554
OldLoad->getPointerAddressSpace (), CostKind);
3562
- auto NewCost = TTI.getMemoryOpCost (
3563
- Instruction::Load, NewLoad-> getType (), NewLoad ->getAlign (),
3564
- NewLoad ->getPointerAddressSpace (), CostKind);
3555
+ InstructionCost NewCost = TTI.getMemoryOpCost (
3556
+ Instruction::Load, NewLoadTy, OldLoad ->getAlign (),
3557
+ OldLoad ->getPointerAddressSpace (), CostKind);
3565
3558
3566
3559
using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int >>;
3567
3560
auto NewUses = SmallVector<UseEntry, 4u >();
3568
- auto SizeDiff = OldSize - NewSize ;
3561
+ auto SizeDiff = OldNumElements - NewNumElements ;
3569
3562
3570
3563
for (auto &Use : I.uses ()) {
3571
3564
auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser ());
@@ -3575,19 +3568,22 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
3575
3568
NewUses.push_back ({Shuffle, {}});
3576
3569
auto &NewMask = NewUses.back ().second ;
3577
3570
for (auto Index : OldMask)
3578
- NewMask.push_back (Index >= int (OldSize ) ? Index - SizeDiff : Index);
3571
+ NewMask.push_back (Index >= int (NewNumElements ) ? Index - SizeDiff : Index);
3579
3572
3580
3573
// Update costs.
3581
- OldCost += TTI.getShuffleCost (TTI::SK_PermuteSingleSrc, VecTy, OldMask ,
3582
- CostKind);
3583
- NewCost += TTI.getShuffleCost (TTI::SK_PermuteSingleSrc, NewVecTy ,
3574
+ OldCost += TTI.getShuffleCost (TTI::SK_PermuteSingleSrc, OldLoadTy ,
3575
+ OldMask, CostKind);
3576
+ NewCost += TTI.getShuffleCost (TTI::SK_PermuteSingleSrc, NewLoadTy ,
3584
3577
NewMask, CostKind);
3585
3578
}
3586
3579
3587
- if (OldCost < NewCost || !NewCost.isValid ()) {
3588
- NewLoad->eraseFromParent ();
3580
+ if (OldCost < NewCost || !NewCost.isValid ())
3589
3581
return false ;
3590
- }
3582
+
3583
+ // Create new load of smaller vector.
3584
+ auto *NewLoad = cast<LoadInst>(
3585
+ Builder.CreateAlignedLoad (NewLoadTy, PtrOp, OldLoad->getAlign ()));
3586
+ NewLoad->copyMetadata (I);
3591
3587
3592
3588
// Replace all uses.
3593
3589
for (auto &Use : NewUses) {
@@ -3597,7 +3593,7 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
3597
3593
Builder.SetInsertPoint (Shuffle);
3598
3594
Builder.SetCurrentDebugLocation (Shuffle->getDebugLoc ());
3599
3595
auto *NewShuffle = Builder.CreateShuffleVector (
3600
- NewLoad, PoisonValue::get (NewVecTy ), NewMask);
3596
+ NewLoad, PoisonValue::get (NewLoadTy ), NewMask);
3601
3597
3602
3598
replaceValue (*Shuffle, *NewShuffle);
3603
3599
}
0 commit comments