|
11 | 11 | //===----------------------------------------------------------------------===//
|
12 | 12 |
|
13 | 13 | #include "InstCombineInternal.h"
|
| 14 | +#include "llvm/ADT/SmallBitVector.h" |
14 | 15 | #include "llvm/Analysis/CmpInstAnalysis.h"
|
15 | 16 | #include "llvm/Analysis/FloatingPointPredicateUtils.h"
|
16 | 17 | #include "llvm/Analysis/InstructionSimplify.h"
|
17 | 18 | #include "llvm/IR/ConstantRange.h"
|
| 19 | +#include "llvm/IR/DerivedTypes.h" |
| 20 | +#include "llvm/IR/Instructions.h" |
18 | 21 | #include "llvm/IR/Intrinsics.h"
|
19 | 22 | #include "llvm/IR/PatternMatch.h"
|
20 | 23 | #include "llvm/Transforms/InstCombine/InstCombiner.h"
|
@@ -3589,6 +3592,154 @@ static Value *foldOrOfInversions(BinaryOperator &I,
|
3589 | 3592 | return nullptr;
|
3590 | 3593 | }
|
3591 | 3594 |
|
| 3595 | +/// Match \p V as "shufflevector -> bitcast" or "extractelement -> zext -> shl" |
| 3596 | +/// patterns, which extract vector elements and pack them in the same relative |
| 3597 | +/// positions. |
| 3598 | +/// |
| 3599 | +/// \p Vec is the underlying vector being extracted from. |
| 3600 | +/// \p Mask is a bitmask identifying which packed elements are obtained from the |
| 3601 | +/// vector. |
| 3602 | +/// \p VecOffset is the vector element corresponding to index 0 of the |
| 3603 | +/// mask. |
| 3604 | +static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, |
| 3605 | + int64_t &VecOffset, |
| 3606 | + SmallBitVector &Mask, |
| 3607 | + const DataLayout &DL) { |
| 3608 | + static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) { |
| 3609 | + ShlAmt = 0; |
| 3610 | + return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base); |
| 3611 | + }; |
| 3612 | + |
| 3613 | + // First try to match extractelement -> zext -> shl |
| 3614 | + uint64_t VecIdx, ShlAmt; |
| 3615 | + if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt( |
| 3616 | + m_Value(Vec), m_ConstantInt(VecIdx))), |
| 3617 | + ShlAmt))) { |
| 3618 | + auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType()); |
| 3619 | + if (!VecTy) |
| 3620 | + return false; |
| 3621 | + auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType()); |
| 3622 | + if (!EltTy) |
| 3623 | + return false; |
| 3624 | + |
| 3625 | + const unsigned EltBitWidth = EltTy->getBitWidth(); |
| 3626 | + const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth(); |
| 3627 | + if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0) |
| 3628 | + return false; |
| 3629 | + const unsigned TargetEltWidth = TargetBitWidth / EltBitWidth; |
| 3630 | + const unsigned ShlEltAmt = ShlAmt / EltBitWidth; |
| 3631 | + |
| 3632 | + const unsigned MaskIdx = |
| 3633 | + DL.isLittleEndian() ? ShlEltAmt : TargetEltWidth - ShlEltAmt - 1; |
| 3634 | + |
| 3635 | + VecOffset = static_cast<int64_t>(VecIdx) - static_cast<int64_t>(MaskIdx); |
| 3636 | + Mask.resize(TargetEltWidth); |
| 3637 | + Mask.set(MaskIdx); |
| 3638 | + return true; |
| 3639 | + } |
| 3640 | + |
| 3641 | + // Now try to match a bitcasted subvector. |
| 3642 | + Instruction *SrcVecI; |
| 3643 | + if (!match(V, m_BitCast(m_Instruction(SrcVecI)))) |
| 3644 | + return false; |
| 3645 | + |
| 3646 | + auto *SrcTy = dyn_cast<FixedVectorType>(SrcVecI->getType()); |
| 3647 | + if (!SrcTy) |
| 3648 | + return false; |
| 3649 | + |
| 3650 | + Mask.resize(SrcTy->getNumElements()); |
| 3651 | + |
| 3652 | + // First check for a subvector obtained from a shufflevector. |
| 3653 | + if (isa<ShuffleVectorInst>(SrcVecI)) { |
| 3654 | + Constant *ConstVec; |
| 3655 | + ArrayRef<int> ShuffleMask; |
| 3656 | + if (!match(SrcVecI, m_Shuffle(m_Value(Vec), m_Constant(ConstVec), |
| 3657 | + m_Mask(ShuffleMask)))) |
| 3658 | + return false; |
| 3659 | + |
| 3660 | + auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType()); |
| 3661 | + if (!VecTy) |
| 3662 | + return false; |
| 3663 | + |
| 3664 | + const unsigned NumVecElts = VecTy->getNumElements(); |
| 3665 | + bool FoundVecOffset = false; |
| 3666 | + for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) { |
| 3667 | + if (ShuffleMask[Idx] == PoisonMaskElem) |
| 3668 | + return false; |
| 3669 | + const unsigned ShuffleIdx = ShuffleMask[Idx]; |
| 3670 | + if (ShuffleIdx >= NumVecElts) { |
| 3671 | + const unsigned ConstIdx = ShuffleIdx - NumVecElts; |
| 3672 | + auto *ConstElt = |
| 3673 | + dyn_cast<ConstantInt>(ConstVec->getAggregateElement(ConstIdx)); |
| 3674 | + if (!ConstElt || !ConstElt->isNullValue()) |
| 3675 | + return false; |
| 3676 | + continue; |
| 3677 | + } |
| 3678 | + |
| 3679 | + if (FoundVecOffset) { |
| 3680 | + if (VecOffset + Idx != ShuffleIdx) |
| 3681 | + return false; |
| 3682 | + } else { |
| 3683 | + if (ShuffleIdx < Idx) |
| 3684 | + return false; |
| 3685 | + VecOffset = ShuffleIdx - Idx; |
| 3686 | + FoundVecOffset = true; |
| 3687 | + } |
| 3688 | + Mask.set(Idx); |
| 3689 | + } |
| 3690 | + return FoundVecOffset; |
| 3691 | + } |
| 3692 | + |
| 3693 | + // Check for a subvector obtained as an (insertelement V, 0, idx) |
| 3694 | + uint64_t InsertIdx; |
| 3695 | + if (!match(SrcVecI, |
| 3696 | + m_InsertElt(m_Value(Vec), m_Zero(), m_ConstantInt(InsertIdx)))) |
| 3697 | + return false; |
| 3698 | + |
| 3699 | + auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType()); |
| 3700 | + if (!VecTy) |
| 3701 | + return false; |
| 3702 | + VecOffset = 0; |
| 3703 | + bool AlreadyInsertedMaskedElt = Mask.test(InsertIdx); |
| 3704 | + Mask.set(); |
| 3705 | + if (!AlreadyInsertedMaskedElt) |
| 3706 | + Mask.reset(InsertIdx); |
| 3707 | + return true; |
| 3708 | +} |
| 3709 | + |
| 3710 | +/// Try to fold the join of two scalar integers whose contents are packed |
| 3711 | +/// elements of the same vector. |
| 3712 | +static Instruction *foldIntegerPackFromVector(Instruction &I, |
| 3713 | + InstCombiner::BuilderTy &Builder, |
| 3714 | + const DataLayout &DL) { |
| 3715 | + assert(I.getOpcode() == Instruction::Or); |
| 3716 | + Value *LhsVec, *RhsVec; |
| 3717 | + int64_t LhsVecOffset, RhsVecOffset; |
| 3718 | + SmallBitVector Mask; |
| 3719 | + if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset, |
| 3720 | + Mask, DL)) |
| 3721 | + return nullptr; |
| 3722 | + if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset, |
| 3723 | + Mask, DL)) |
| 3724 | + return nullptr; |
| 3725 | + if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset) |
| 3726 | + return nullptr; |
| 3727 | + |
| 3728 | + // Convert into shufflevector -> bitcast; |
| 3729 | + const unsigned ZeroVecIdx = |
| 3730 | + cast<FixedVectorType>(LhsVec->getType())->getNumElements(); |
| 3731 | + SmallVector<int> ShuffleMask(Mask.size(), ZeroVecIdx); |
| 3732 | + for (unsigned Idx : Mask.set_bits()) { |
| 3733 | + assert(LhsVecOffset + Idx >= 0); |
| 3734 | + ShuffleMask[Idx] = LhsVecOffset + Idx; |
| 3735 | + } |
| 3736 | + |
| 3737 | + Value *MaskedVec = Builder.CreateShuffleVector( |
| 3738 | + LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask, |
| 3739 | + I.getName() + ".v"); |
| 3740 | + return CastInst::Create(Instruction::BitCast, MaskedVec, I.getType()); |
| 3741 | +} |
| 3742 | + |
3592 | 3743 | // A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
|
3593 | 3744 | // track these properities for preservation. Note that we can decompose
|
3594 | 3745 | // equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask *
|
@@ -3766,6 +3917,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
|
3766 | 3917 | if (Instruction *X = foldComplexAndOrPatterns(I, Builder))
|
3767 | 3918 | return X;
|
3768 | 3919 |
|
| 3920 | + if (Instruction *X = foldIntegerPackFromVector(I, Builder, DL)) |
| 3921 | + return X; |
| 3922 | + |
3769 | 3923 | // (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D
|
3770 | 3924 | // (A & B) | (C & D) -> A ^ C where A == ~D && B == ~C
|
3771 | 3925 | if (Value *V = foldOrOfInversions(I, Builder))
|
|
0 commit comments