|
16 | 16 | #include "llvm/ADT/DenseMap.h" |
17 | 17 | #include "llvm/ADT/STLExtras.h" |
18 | 18 | #include "llvm/ADT/ScopeExit.h" |
| 19 | +#include "llvm/ADT/SmallBitVector.h" |
19 | 20 | #include "llvm/ADT/Statistic.h" |
20 | 21 | #include "llvm/Analysis/AssumptionCache.h" |
21 | 22 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
@@ -125,6 +126,7 @@ class VectorCombine { |
125 | 126 | bool scalarizeLoadExtract(Instruction &I); |
126 | 127 | bool scalarizeExtExtract(Instruction &I); |
127 | 128 | bool foldConcatOfBoolMasks(Instruction &I); |
| 129 | + bool foldIntegerPackFromVector(Instruction &I); |
128 | 130 | bool foldPermuteOfBinops(Instruction &I); |
129 | 131 | bool foldShuffleOfBinops(Instruction &I); |
130 | 132 | bool foldShuffleOfSelects(Instruction &I); |
@@ -1957,6 +1959,126 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) { |
1957 | 1959 | return true; |
1958 | 1960 | } |
1959 | 1961 |
|
| 1962 | +/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns |
| 1963 | +/// which extract vector elements and pack them in the same relative positions. |
| 1964 | +static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, |
| 1965 | + uint64_t &VecOffset, |
| 1966 | + SmallBitVector &Mask) { |
| 1967 | + static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) { |
| 1968 | + ShlAmt = 0; |
| 1969 | + return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base); |
| 1970 | + }; |
| 1971 | + |
| 1972 | + // First try to match extractelement -> zext -> shl |
| 1973 | + uint64_t VecIdx, ShlAmt; |
| 1974 | + if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt( |
| 1975 | + m_Value(Vec), m_ConstantInt(VecIdx))), |
| 1976 | + ShlAmt))) { |
| 1977 | + auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType()); |
| 1978 | + if (!VecTy) |
| 1979 | + return false; |
| 1980 | + auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType()); |
| 1981 | + if (!EltTy) |
| 1982 | + return false; |
| 1983 | + |
| 1984 | + const unsigned EltBitWidth = EltTy->getBitWidth(); |
| 1985 | + const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth(); |
| 1986 | + if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0) |
| 1987 | + return false; |
| 1988 | + const unsigned ShlEltAmt = ShlAmt / EltBitWidth; |
| 1989 | + |
| 1990 | + if (ShlEltAmt > VecIdx) |
| 1991 | + return false; |
| 1992 | + VecOffset = VecIdx - ShlEltAmt; |
| 1993 | + Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth); |
| 1994 | + Mask.set(ShlEltAmt); |
| 1995 | + return true; |
| 1996 | + } |
| 1997 | + |
| 1998 | + // Now try to match shufflevector -> bitcast |
| 1999 | + Value *Lhs, *Rhs; |
| 2000 | + ArrayRef<int> ShuffleMask; |
| 2001 | + if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs), |
| 2002 | + m_Mask(ShuffleMask))))) |
| 2003 | + return false; |
| 2004 | + Mask.resize(ShuffleMask.size()); |
| 2005 | + |
| 2006 | + if (isa<Constant>(Lhs)) |
| 2007 | + std::swap(Lhs, Rhs); |
| 2008 | + |
| 2009 | + auto *RhsConst = dyn_cast<Constant>(Rhs); |
| 2010 | + if (!RhsConst) |
| 2011 | + return false; |
| 2012 | + |
| 2013 | + auto *LhsTy = dyn_cast<FixedVectorType>(Lhs->getType()); |
| 2014 | + if (!LhsTy) |
| 2015 | + return false; |
| 2016 | + |
| 2017 | + Vec = Lhs; |
| 2018 | + const unsigned NumLhsElts = LhsTy->getNumElements(); |
| 2019 | + bool FoundVecOffset = false; |
| 2020 | + for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) { |
| 2021 | + if (ShuffleMask[Idx] == PoisonMaskElem) |
| 2022 | + return false; |
| 2023 | + const unsigned ShuffleIdx = ShuffleMask[Idx]; |
| 2024 | + if (ShuffleIdx >= NumLhsElts) { |
| 2025 | + const unsigned RhsIdx = ShuffleIdx - NumLhsElts; |
| 2026 | + auto *RhsElt = |
| 2027 | + dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx)); |
| 2028 | + if (!RhsElt || RhsElt->getZExtValue() != 0) |
| 2029 | + return false; |
| 2030 | + continue; |
| 2031 | + } |
| 2032 | + |
| 2033 | + if (FoundVecOffset) { |
| 2034 | + if (VecOffset + Idx != ShuffleIdx) |
| 2035 | + return false; |
| 2036 | + } else { |
| 2037 | + if (ShuffleIdx < Idx) |
| 2038 | + return false; |
| 2039 | + VecOffset = ShuffleIdx - Idx; |
| 2040 | + FoundVecOffset = true; |
| 2041 | + } |
| 2042 | + Mask.set(Idx); |
| 2043 | + } |
| 2044 | + return FoundVecOffset; |
| 2045 | +} |
| 2046 | +/// Try to fold the or of two scalar integers whose contents are packed elements |
| 2047 | +/// of the same vector. |
| 2048 | +bool VectorCombine::foldIntegerPackFromVector(Instruction &I) { |
| 2049 | + assert(I.getOpcode() == Instruction::Or); |
| 2050 | + Value *LhsVec, *RhsVec; |
| 2051 | + uint64_t LhsVecOffset, RhsVecOffset; |
| 2052 | + SmallBitVector Mask; |
| 2053 | + if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset, |
| 2054 | + Mask)) |
| 2055 | + return false; |
| 2056 | + if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset, |
| 2057 | + Mask)) |
| 2058 | + return false; |
| 2059 | + if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset) |
| 2060 | + return false; |
| 2061 | + |
| 2062 | + // Convert into shufflevector -> bitcast |
| 2063 | + SmallVector<int> ShuffleMask; |
| 2064 | + ShuffleMask.reserve(Mask.size()); |
| 2065 | + const unsigned ZeroVecIdx = |
| 2066 | + cast<FixedVectorType>(LhsVec->getType())->getNumElements(); |
| 2067 | + for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) { |
| 2068 | + if (Mask.test(Idx)) |
| 2069 | + ShuffleMask.push_back(LhsVecOffset + Idx); |
| 2070 | + else |
| 2071 | + ShuffleMask.push_back(ZeroVecIdx); |
| 2072 | + } |
| 2073 | + |
| 2074 | + Value *MaskedVec = Builder.CreateShuffleVector( |
| 2075 | + LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask, |
| 2076 | + LhsVec->getName() + ".extract"); |
| 2077 | + Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName()); |
| 2078 | + replaceValue(I, *CastedVec); |
| 2079 | + return true; |
| 2080 | +} |
| 2081 | + |
1960 | 2082 | /// Try to convert "shuffle (binop (shuffle, shuffle)), undef" |
1961 | 2083 | /// --> "binop (shuffle), (shuffle)". |
1962 | 2084 | bool VectorCombine::foldPermuteOfBinops(Instruction &I) { |
@@ -3742,6 +3864,9 @@ bool VectorCombine::run() { |
3742 | 3864 | if (Opcode == Instruction::Store) |
3743 | 3865 | MadeChange |= foldSingleElementStore(I); |
3744 | 3866 |
|
| 3867 | + if (isa<IntegerType>(I.getType()) && Opcode == Instruction::Or) |
| 3868 | + MadeChange |= foldIntegerPackFromVector(I); |
| 3869 | + |
3745 | 3870 | // If this is an early pipeline invocation of this pass, we are done. |
3746 | 3871 | if (TryEarlyFoldsOnly) |
3747 | 3872 | return; |
|
0 commit comments