1818
1919#include " ARMCommonInstCombineIntrinsic.h"
2020#include " llvm/IR/Constants.h"
21+ #include " llvm/IR/DerivedTypes.h"
2122#include " llvm/IR/IntrinsicInst.h"
2223#include " llvm/IR/Value.h"
2324#include " llvm/Transforms/InstCombine/InstCombiner.h"
@@ -28,41 +29,123 @@ using namespace llvm::PatternMatch;
2829namespace llvm {
2930namespace ARMCommon {
3031
31- // / Convert a table lookup to shufflevector if the mask is constant.
32- // / This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
33- // / which case we could lower the shufflevector with rev64 instructions
34- // / as it's actually a byte reverse.
35- Instruction *simplifyNeonTbl1 (IntrinsicInst &II, InstCombiner &IC) {
32+ // / Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
33+ // / at most two source operands are actually referenced.
34+ Instruction *simplifyNeonTbl (IntrinsicInst &II, InstCombiner &IC,
35+ bool IsExtension) {
3636 // Bail out if the mask is not a constant.
37- auto *C = dyn_cast<Constant>(II.getArgOperand (1 ));
37+ auto *C = dyn_cast<Constant>(II.getArgOperand (II. arg_size () - 1 ));
3838 if (!C)
3939 return nullptr ;
4040
41- auto *VecTy = cast<FixedVectorType>(II.getType ());
42- unsigned NumElts = VecTy ->getNumElements ();
41+ auto *RetTy = cast<FixedVectorType>(II.getType ());
42+ unsigned NumIndexes = RetTy ->getNumElements ();
4343
44- // Only perform this transformation for <8 x i8> vector types.
45- if (!VecTy->getElementType ()->isIntegerTy (8 ) || NumElts != 8 )
44+ // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
45+ // Even the language-level intrinsics that operate on u8/p8 should lower to an
46+ // LLVM intrinsic that operates on i8.
47+ if (!(RetTy->getElementType ()->isIntegerTy (8 ) &&
48+ (NumIndexes == 8 || NumIndexes == 16 )))
4649 return nullptr ;
4750
48- int Indexes[8 ];
51+ // For tbx instructions, the first argument is the "fallback" vector, which
52+ // has the same length as the mask and return type.
53+ unsigned int StartIndex = (unsigned )IsExtension;
54+ auto *SourceTy =
55+ cast<FixedVectorType>(II.getArgOperand (StartIndex)->getType ());
56+ // Note that the element count of each source vector does *not* need to be the
57+ // same as the element count of the return type and mask! All source vectors
58+ // must have the same element count as each other, though.
59+ unsigned NumElementsPerSource = SourceTy->getNumElements ();
60+
61+ // There are no tbl/tbx intrinsics for which the destination size exceeds the
62+ // source size. However, our definitions of the intrinsics, at least in
63+ // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
64+ // *could* technically happen.
65+ if (NumIndexes > NumElementsPerSource) {
66+ return nullptr ;
67+ }
68+
69+ // The tbl/tbx intrinsics take several source operands followed by a mask
70+ // operand.
71+ unsigned int NumSourceOperands = II.arg_size () - 1 - (unsigned )IsExtension;
72+
73+ // Map input operands to shuffle indices. This also helpfully deduplicates the
74+ // input arguments, in case the same value is passed as an argument multiple
75+ // times.
76+ SmallDenseMap<Value *, unsigned , 2 > ValueToShuffleSlot;
77+ Value *ShuffleOperands[2 ] = {PoisonValue::get (SourceTy),
78+ PoisonValue::get (SourceTy)};
4979
50- for (unsigned I = 0 ; I < NumElts; ++I) {
80+ int Indexes[16 ];
81+ for (unsigned I = 0 ; I < NumIndexes; ++I) {
5182 Constant *COp = C->getAggregateElement (I);
5283
53- if (!COp || !isa<ConstantInt>(COp))
84+ if (!COp || ( !isa<UndefValue>(COp) && !isa< ConstantInt>(COp) ))
5485 return nullptr ;
5586
56- Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue ();
87+ if (isa<UndefValue>(COp)) {
88+ Indexes[I] = -1 ;
89+ continue ;
90+ }
91+
92+ uint64_t Index = cast<ConstantInt>(COp)->getZExtValue ();
93+ // The index of the input argument that this index references (0 = first
94+ // source argument, etc).
95+ unsigned SourceOperandIndex = Index / NumElementsPerSource;
96+ // The index of the element at that source operand.
97+ unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
98+
99+ Value *SourceOperand;
100+ if (SourceOperandIndex >= NumSourceOperands) {
101+ // This index is out of bounds. Map it to index into either the fallback
102+ // vector (tbx) or vector of zeroes (tbl).
103+ SourceOperandIndex = NumSourceOperands;
104+ if (IsExtension) {
105+ // For out-of-bounds indices in tbx, choose the `I`th element of the
106+ // fallback.
107+ SourceOperand = II.getArgOperand (0 );
108+ SourceOperandElementIndex = I;
109+ } else {
110+ // Otherwise, choose some element from the dummy vector of zeroes (we'll
111+ // always choose the first).
112+ SourceOperand = Constant::getNullValue (SourceTy);
113+ SourceOperandElementIndex = 0 ;
114+ }
115+ } else {
116+ SourceOperand = II.getArgOperand (SourceOperandIndex + StartIndex);
117+ }
57118
58- // Make sure the mask indices are in range.
59- if ((unsigned )Indexes[I] >= NumElts)
119+ // The source operand may be the fallback vector, which may not have the
120+ // same number of elements as the source vector. In that case, we *could*
121+ // choose to extend its length with another shufflevector, but it's simpler
122+ // to just bail instead.
123+ if (cast<FixedVectorType>(SourceOperand->getType ())->getNumElements () !=
124+ NumElementsPerSource) {
60125 return nullptr ;
126+ }
127+
128+ // We now know the source operand referenced by this index. Make it a
129+ // shufflevector operand, if it isn't already.
130+ unsigned NumSlots = ValueToShuffleSlot.size ();
131+ // This shuffle references more than two sources, and hence cannot be
132+ // represented as a shufflevector.
133+ if (NumSlots == 2 && !ValueToShuffleSlot.contains (SourceOperand)) {
134+ return nullptr ;
135+ }
136+ auto [It, Inserted] =
137+ ValueToShuffleSlot.try_emplace (SourceOperand, NumSlots);
138+ if (Inserted) {
139+ ShuffleOperands[It->getSecond ()] = SourceOperand;
140+ }
141+
142+ unsigned RemappedIndex =
143+ (It->getSecond () * NumElementsPerSource) + SourceOperandElementIndex;
144+ Indexes[I] = RemappedIndex;
61145 }
62146
63- auto *V1 = II.getArgOperand (0 );
64- auto *V2 = Constant::getNullValue (V1->getType ());
65- Value *Shuf = IC.Builder .CreateShuffleVector (V1, V2, ArrayRef (Indexes));
147+ Value *Shuf = IC.Builder .CreateShuffleVector (
148+ ShuffleOperands[0 ], ShuffleOperands[1 ], ArrayRef (Indexes, NumIndexes));
66149 return IC.replaceInstUsesWith (II, Shuf);
67150}
68151
0 commit comments