|
16 | 16 | #include "llvm/ADT/DenseMap.h"
|
17 | 17 | #include "llvm/ADT/STLExtras.h"
|
18 | 18 | #include "llvm/ADT/ScopeExit.h"
|
| 19 | +#include "llvm/ADT/SmallVector.h" |
19 | 20 | #include "llvm/ADT/Statistic.h"
|
20 | 21 | #include "llvm/Analysis/AssumptionCache.h"
|
21 | 22 | #include "llvm/Analysis/BasicAliasAnalysis.h"
|
|
29 | 30 | #include "llvm/IR/Dominators.h"
|
30 | 31 | #include "llvm/IR/Function.h"
|
31 | 32 | #include "llvm/IR/IRBuilder.h"
|
| 33 | +#include "llvm/IR/Instructions.h" |
32 | 34 | #include "llvm/IR/PatternMatch.h"
|
33 | 35 | #include "llvm/Support/CommandLine.h"
|
34 | 36 | #include "llvm/Transforms/Utils/Local.h"
|
35 | 37 | #include "llvm/Transforms/Utils/LoopUtils.h"
|
36 | 38 | #include <numeric>
|
| 39 | +#include <optional> |
37 | 40 | #include <queue>
|
38 | 41 | #include <set>
|
| 42 | +#include <tuple> |
39 | 43 |
|
40 | 44 | #define DEBUG_TYPE "vector-combine"
|
41 | 45 | #include "llvm/Transforms/Utils/InstructionWorklist.h"
|
@@ -137,6 +141,7 @@ class VectorCombine {
|
137 | 141 | bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
|
138 | 142 | bool foldInterleaveIntrinsics(Instruction &I);
|
139 | 143 | bool shrinkType(Instruction &I);
|
| 144 | + bool shrinkLoadForShuffles(Instruction &I); |
140 | 145 |
|
141 | 146 | void replaceValue(Value &Old, Value &New) {
|
142 | 147 | LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');
|
@@ -3861,6 +3866,126 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
|
3861 | 3866 | return true;
|
3862 | 3867 | }
|
3863 | 3868 |
|
| 3869 | +// Attempt to shrink loads that are only used by shufflevector instructions. |
| 3870 | +bool VectorCombine::shrinkLoadForShuffles(Instruction &I) { |
| 3871 | + auto *OldLoad = dyn_cast<LoadInst>(&I); |
| 3872 | + if (!OldLoad || !OldLoad->isSimple()) |
| 3873 | + return false; |
| 3874 | + |
| 3875 | + auto *OldLoadTy = dyn_cast<FixedVectorType>(OldLoad->getType()); |
| 3876 | + if (!OldLoadTy) |
| 3877 | + return false; |
| 3878 | + |
| 3879 | + unsigned const OldNumElements = OldLoadTy->getNumElements(); |
| 3880 | + |
| 3881 | + // Search all uses of load. If all uses are shufflevector instructions, and |
| 3882 | + // the second operands are all poison values, find the minimum and maximum |
| 3883 | + // indices of the vector elements referenced by all shuffle masks. |
| 3884 | + // Otherwise return `std::nullopt`. |
| 3885 | + using IndexRange = std::pair<int, int>; |
| 3886 | + auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> { |
| 3887 | + IndexRange OutputRange = IndexRange(OldNumElements, -1); |
| 3888 | + for (llvm::Use &Use : I.uses()) { |
| 3889 | + // Ensure all uses match the required pattern. |
| 3890 | + User *Shuffle = Use.getUser(); |
| 3891 | + ArrayRef<int> Mask; |
| 3892 | + |
| 3893 | + if (!match(Shuffle, |
| 3894 | + m_Shuffle(m_Specific(OldLoad), m_Undef(), m_Mask(Mask)))) |
| 3895 | + return std::nullopt; |
| 3896 | + |
| 3897 | + // Ignore shufflevector instructions that have no uses. |
| 3898 | + if (Shuffle->use_empty()) |
| 3899 | + continue; |
| 3900 | + |
| 3901 | + // Find the min and max indices used by the shufflevector instruction. |
| 3902 | + for (int Index : Mask) { |
| 3903 | + if (Index >= 0 && Index < static_cast<int>(OldNumElements)) { |
| 3904 | + OutputRange.first = std::min(Index, OutputRange.first); |
| 3905 | + OutputRange.second = std::max(Index, OutputRange.second); |
| 3906 | + } |
| 3907 | + } |
| 3908 | + } |
| 3909 | + |
| 3910 | + if (OutputRange.second < OutputRange.first) |
| 3911 | + return std::nullopt; |
| 3912 | + |
| 3913 | + return OutputRange; |
| 3914 | + }; |
| 3915 | + |
| 3916 | + // Get the range of vector elements used by shufflevector instructions. |
| 3917 | + if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) { |
| 3918 | + unsigned const NewNumElements = Indices->second + 1u; |
| 3919 | + |
| 3920 | + // If the range of vector elements is smaller than the full load, attempt |
| 3921 | + // to create a smaller load. |
| 3922 | + if (NewNumElements < OldNumElements) { |
| 3923 | + IRBuilder Builder(&I); |
| 3924 | + Builder.SetCurrentDebugLocation(I.getDebugLoc()); |
| 3925 | + |
| 3926 | + // Calculate costs of old and new ops. |
| 3927 | + Type *ElemTy = OldLoadTy->getElementType(); |
| 3928 | + FixedVectorType *NewLoadTy = FixedVectorType::get(ElemTy, NewNumElements); |
| 3929 | + Value *PtrOp = OldLoad->getPointerOperand(); |
| 3930 | + |
| 3931 | + InstructionCost OldCost = TTI.getMemoryOpCost( |
| 3932 | + Instruction::Load, OldLoad->getType(), OldLoad->getAlign(), |
| 3933 | + OldLoad->getPointerAddressSpace(), CostKind); |
| 3934 | + InstructionCost NewCost = |
| 3935 | + TTI.getMemoryOpCost(Instruction::Load, NewLoadTy, OldLoad->getAlign(), |
| 3936 | + OldLoad->getPointerAddressSpace(), CostKind); |
| 3937 | + |
| 3938 | + using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>; |
| 3939 | + SmallVector<UseEntry, 4u> NewUses; |
| 3940 | + |
| 3941 | + for (llvm::Use &Use : I.uses()) { |
| 3942 | + auto *Shuffle = cast<ShuffleVectorInst>(Use.getUser()); |
| 3943 | + ArrayRef<int> OldMask = Shuffle->getShuffleMask(); |
| 3944 | + |
| 3945 | + // Create entry for new use. |
| 3946 | + NewUses.push_back({Shuffle, OldMask}); |
| 3947 | + |
| 3948 | + // Update costs. |
| 3949 | + OldCost += |
| 3950 | + TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, Shuffle->getType(), |
| 3951 | + OldLoadTy, OldMask, CostKind); |
| 3952 | + NewCost += |
| 3953 | + TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, Shuffle->getType(), |
| 3954 | + NewLoadTy, OldMask, CostKind); |
| 3955 | + } |
| 3956 | + |
| 3957 | + LLVM_DEBUG( |
| 3958 | + dbgs() << "Found a load used only by shufflevector instructions: " |
| 3959 | + << I << "\n OldCost: " << OldCost |
| 3960 | + << " vs NewCost: " << NewCost << "\n"); |
| 3961 | + |
| 3962 | + if (OldCost < NewCost || !NewCost.isValid()) |
| 3963 | + return false; |
| 3964 | + |
| 3965 | + // Create new load of smaller vector. |
| 3966 | + auto *NewLoad = cast<LoadInst>( |
| 3967 | + Builder.CreateAlignedLoad(NewLoadTy, PtrOp, OldLoad->getAlign())); |
| 3968 | + NewLoad->copyMetadata(I); |
| 3969 | + |
| 3970 | + // Replace all uses. |
| 3971 | + for (UseEntry &Use : NewUses) { |
| 3972 | + ShuffleVectorInst *Shuffle = Use.first; |
| 3973 | + std::vector<int> &NewMask = Use.second; |
| 3974 | + |
| 3975 | + Builder.SetInsertPoint(Shuffle); |
| 3976 | + Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc()); |
| 3977 | + Value *NewShuffle = Builder.CreateShuffleVector( |
| 3978 | + NewLoad, PoisonValue::get(NewLoadTy), NewMask); |
| 3979 | + |
| 3980 | + replaceValue(*Shuffle, *NewShuffle); |
| 3981 | + } |
| 3982 | + |
| 3983 | + return true; |
| 3984 | + } |
| 3985 | + } |
| 3986 | + return false; |
| 3987 | +} |
| 3988 | + |
3864 | 3989 | /// This is the entry point for all transforms. Pass manager differences are
|
3865 | 3990 | /// handled in the callers of this function.
|
3866 | 3991 | bool VectorCombine::run() {
|
@@ -3937,6 +4062,9 @@ bool VectorCombine::run() {
|
3937 | 4062 | MadeChange |= foldSelectShuffle(I);
|
3938 | 4063 | MadeChange |= foldShuffleToIdentity(I);
|
3939 | 4064 | break;
|
| 4065 | + case Instruction::Load: |
| 4066 | + MadeChange |= shrinkLoadForShuffles(I); |
| 4067 | + break; |
3940 | 4068 | case Instruction::BitCast:
|
3941 | 4069 | MadeChange |= foldBitcastShuffle(I);
|
3942 | 4070 | break;
|
|
0 commit comments