1717
1818#include " llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h"
1919#include " llvm/IR/Constants.h"
20+ #include " llvm/IR/DerivedTypes.h"
2021#include " llvm/IR/IntrinsicInst.h"
2122#include " llvm/IR/Value.h"
2223#include " llvm/Transforms/InstCombine/InstCombiner.h"
@@ -27,41 +28,121 @@ using namespace llvm::PatternMatch;
2728namespace llvm {
2829namespace ARMCommon {
2930
30- // / Convert a table lookup to shufflevector if the mask is constant.
31- // / This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
32- // / which case we could lower the shufflevector with rev64 instructions
33- // / as it's actually a byte reverse.
34- Instruction *simplifyNeonTbl1 (IntrinsicInst &II, InstCombiner &IC) {
31+ // / Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
32+ // / at most two source operands are actually referenced.
33+ Instruction *simplifyNeonTbl (IntrinsicInst &II, InstCombiner &IC,
34+ bool IsExtension) {
3535 // Bail out if the mask is not a constant.
36- auto *C = dyn_cast<Constant>(II.getArgOperand (1 ));
36+ auto *C = dyn_cast<Constant>(II.getArgOperand (II. arg_size () - 1 ));
3737 if (!C)
3838 return nullptr ;
3939
40- auto *VecTy = cast<FixedVectorType>(II.getType ());
41- unsigned NumElts = VecTy ->getNumElements ();
40+ auto *RetTy = cast<FixedVectorType>(II.getType ());
41+ unsigned NumIndexes = RetTy ->getNumElements ();
4242
43- // Only perform this transformation for <8 x i8> vector types.
44- if (!VecTy->getElementType ()->isIntegerTy (8 ) || NumElts != 8 )
43+ // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
44+ if (!(RetTy->getElementType ()->isIntegerTy (8 ) &&
45+ (NumIndexes == 8 || NumIndexes == 16 )))
4546 return nullptr ;
4647
47- int Indexes[8 ];
48+ // For tbx instructions, the first argument is the "fallback" vector, which
49+ // has the same length as the mask and return type.
50+ unsigned int StartIndex = (unsigned )IsExtension;
51+ auto *SourceTy =
52+ cast<FixedVectorType>(II.getArgOperand (StartIndex)->getType ());
53+ // Note that the element count of each source vector does *not* need to be the
54+ // same as the element count of the return type and mask! All source vectors
55+ // must have the same element count as each other, though.
56+ unsigned NumElementsPerSource = SourceTy->getNumElements ();
57+
58+ // There are no tbl/tbx intrinsics for which the destination size exceeds the
59+ // source size. However, our definitions of the intrinsics, at least in
60+ // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
61+ // *could* technically happen.
62+ if (NumIndexes > NumElementsPerSource) {
63+ return nullptr ;
64+ }
65+
66+ // The tbl/tbx intrinsics take several source operands followed by a mask
67+ // operand.
68+ unsigned int NumSourceOperands = II.arg_size () - 1 - (unsigned )IsExtension;
69+
70+ // Map input operands to shuffle indices. This also helpfully deduplicates the
71+ // input arguments, in case the same value is passed as an argument multiple
72+ // times.
73+ SmallDenseMap<Value *, unsigned , 2 > ValueToShuffleSlot;
74+ Value *ShuffleOperands[2 ] = {PoisonValue::get (SourceTy),
75+ PoisonValue::get (SourceTy)};
4876
49- for (unsigned I = 0 ; I < NumElts; ++I) {
77+ int Indexes[16 ];
78+ for (unsigned I = 0 ; I < NumIndexes; ++I) {
5079 Constant *COp = C->getAggregateElement (I);
5180
52- if (!COp || !isa<ConstantInt>(COp))
81+ if (!COp || ( !isa<UndefValue>(COp) && !isa< ConstantInt>(COp) ))
5382 return nullptr ;
5483
55- Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue ();
84+ if (isa<UndefValue>(COp)) {
85+ Indexes[I] = -1 ;
86+ continue ;
87+ }
88+
89+ uint64_t Index = cast<ConstantInt>(COp)->getZExtValue ();
90+ // The index of the input argument that this index references (0 = first
91+ // source argument, etc).
92+ unsigned SourceOperandIndex = Index / NumElementsPerSource;
93+ // The index of the element at that source operand.
94+ unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
95+
96+ Value *SourceOperand;
97+ if (SourceOperandIndex >= NumSourceOperands) {
98+ // This index is out of bounds. Map it to index into either the fallback
99+ // vector (tbx) or vector of zeroes (tbl).
100+ SourceOperandIndex = NumSourceOperands;
101+ if (IsExtension) {
102+ // For out-of-bounds indices in tbx, choose the `I`th element of the
103+ // fallback.
104+ SourceOperand = II.getArgOperand (0 );
105+ SourceOperandElementIndex = I;
106+ } else {
107+ // Otherwise, choose some element from the dummy vector of zeroes (we'll
108+ // always choose the first).
109+ SourceOperand = Constant::getNullValue (SourceTy);
110+ SourceOperandElementIndex = 0 ;
111+ }
112+ } else {
113+ SourceOperand = II.getArgOperand (SourceOperandIndex + StartIndex);
114+ }
56115
57- // Make sure the mask indices are in range.
58- if ((unsigned )Indexes[I] >= NumElts)
116+ // The source operand may be the fallback vector, which may not have the
117+ // same number of elements as the source vector. In that case, we *could*
118+ // choose to extend its length with another shufflevector, but it's simpler
119+ // to just bail instead.
120+ if (cast<FixedVectorType>(SourceOperand->getType ())->getNumElements () !=
121+ NumElementsPerSource) {
59122 return nullptr ;
123+ }
124+
125+ // We now know the source operand referenced by this index. Make it a
126+ // shufflevector operand, if it isn't already.
127+ unsigned NumSlots = ValueToShuffleSlot.size ();
128+ // This shuffle references more than two sources, and hence cannot be
129+ // represented as a shufflevector.
130+ if (NumSlots == 2 && !ValueToShuffleSlot.contains (SourceOperand)) {
131+ return nullptr ;
132+ }
133+ auto [It, Inserted] =
134+ ValueToShuffleSlot.try_emplace (SourceOperand, NumSlots);
135+ if (Inserted) {
136+ ShuffleOperands[It->getSecond ()] = SourceOperand;
137+ }
138+
139+ unsigned RemappedIndex =
140+ (It->getSecond () * NumElementsPerSource) + SourceOperandElementIndex;
141+ Indexes[I] = RemappedIndex;
60142 }
61143
62- auto *V1 = II.getArgOperand (0 );
63- auto *V2 = Constant::getNullValue (V1->getType ());
64- Value *Shuf = IC.Builder .CreateShuffleVector (V1, V2, ArrayRef (Indexes));
144+ Value *Shuf = IC.Builder .CreateShuffleVector (
145+ ShuffleOperands[0 ], ShuffleOperands[1 ], ArrayRef (Indexes, NumIndexes));
65146 return IC.replaceInstUsesWith (II, Shuf);
66147}
67148
0 commit comments