Skip to content

Commit d98ec01

Browse files
committed
Added pattern for folding packed integer constructions.
1 parent 424abcb commit d98ec01

File tree

2 files changed

+233
-0
lines changed

2 files changed

+233
-0
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/DenseMap.h"
1717
#include "llvm/ADT/STLExtras.h"
1818
#include "llvm/ADT/ScopeExit.h"
19+
#include "llvm/ADT/SmallBitVector.h"
1920
#include "llvm/ADT/Statistic.h"
2021
#include "llvm/Analysis/AssumptionCache.h"
2122
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -125,6 +126,7 @@ class VectorCombine {
125126
bool scalarizeLoadExtract(Instruction &I);
126127
bool scalarizeExtExtract(Instruction &I);
127128
bool foldConcatOfBoolMasks(Instruction &I);
129+
bool foldIntegerPackFromVector(Instruction &I);
128130
bool foldPermuteOfBinops(Instruction &I);
129131
bool foldShuffleOfBinops(Instruction &I);
130132
bool foldShuffleOfSelects(Instruction &I);
@@ -1957,6 +1959,126 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) {
19571959
return true;
19581960
}
19591961

1962+
/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
1963+
/// which extract vector elements and pack them in the same relative positions.
1964+
static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
1965+
uint64_t &VecOffset,
1966+
SmallBitVector &Mask) {
1967+
static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
1968+
ShlAmt = 0;
1969+
return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
1970+
};
1971+
1972+
// First try to match extractelement -> zext -> shl
1973+
uint64_t VecIdx, ShlAmt;
1974+
if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
1975+
m_Value(Vec), m_ConstantInt(VecIdx))),
1976+
ShlAmt))) {
1977+
auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
1978+
if (!VecTy)
1979+
return false;
1980+
auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
1981+
if (!EltTy)
1982+
return false;
1983+
1984+
const unsigned EltBitWidth = EltTy->getBitWidth();
1985+
const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
1986+
if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
1987+
return false;
1988+
const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
1989+
1990+
if (ShlEltAmt > VecIdx)
1991+
return false;
1992+
VecOffset = VecIdx - ShlEltAmt;
1993+
Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
1994+
Mask.set(ShlEltAmt);
1995+
return true;
1996+
}
1997+
1998+
// Now try to match shufflevector -> bitcast
1999+
Value *Lhs, *Rhs;
2000+
ArrayRef<int> ShuffleMask;
2001+
if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs),
2002+
m_Mask(ShuffleMask)))))
2003+
return false;
2004+
Mask.resize(ShuffleMask.size());
2005+
2006+
if (isa<Constant>(Lhs))
2007+
std::swap(Lhs, Rhs);
2008+
2009+
auto *RhsConst = dyn_cast<Constant>(Rhs);
2010+
if (!RhsConst)
2011+
return false;
2012+
2013+
auto *LhsTy = dyn_cast<FixedVectorType>(Lhs->getType());
2014+
if (!LhsTy)
2015+
return false;
2016+
2017+
Vec = Lhs;
2018+
const unsigned NumLhsElts = LhsTy->getNumElements();
2019+
bool FoundVecOffset = false;
2020+
for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
2021+
if (ShuffleMask[Idx] == PoisonMaskElem)
2022+
return false;
2023+
const unsigned ShuffleIdx = ShuffleMask[Idx];
2024+
if (ShuffleIdx >= NumLhsElts) {
2025+
const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
2026+
auto *RhsElt =
2027+
dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
2028+
if (!RhsElt || RhsElt->getZExtValue() != 0)
2029+
return false;
2030+
continue;
2031+
}
2032+
2033+
if (FoundVecOffset) {
2034+
if (VecOffset + Idx != ShuffleIdx)
2035+
return false;
2036+
} else {
2037+
if (ShuffleIdx < Idx)
2038+
return false;
2039+
VecOffset = ShuffleIdx - Idx;
2040+
FoundVecOffset = true;
2041+
}
2042+
Mask.set(Idx);
2043+
}
2044+
return FoundVecOffset;
2045+
}
2046+
/// Try to fold the or of two scalar integers whose contents are packed elements
2047+
/// of the same vector.
2048+
bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
2049+
assert(I.getOpcode() == Instruction::Or);
2050+
Value *LhsVec, *RhsVec;
2051+
uint64_t LhsVecOffset, RhsVecOffset;
2052+
SmallBitVector Mask;
2053+
if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
2054+
Mask))
2055+
return false;
2056+
if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
2057+
Mask))
2058+
return false;
2059+
if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
2060+
return false;
2061+
2062+
// Convert into shufflevector -> bitcast
2063+
SmallVector<int> ShuffleMask;
2064+
ShuffleMask.reserve(Mask.size());
2065+
const unsigned ZeroVecIdx =
2066+
cast<FixedVectorType>(LhsVec->getType())->getNumElements();
2067+
for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
2068+
if (Mask.test(Idx))
2069+
ShuffleMask.push_back(LhsVecOffset + Idx);
2070+
else
2071+
ShuffleMask.push_back(ZeroVecIdx);
2072+
}
2073+
2074+
Value *MaskedVec = Builder.CreateShuffleVector(
2075+
LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
2076+
LhsVec->getName() + ".extract");
2077+
Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName());
2078+
replaceValue(I, *CastedVec);
2079+
return true;
2080+
}
2081+
19602082
/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
19612083
/// --> "binop (shuffle), (shuffle)".
19622084
bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
@@ -3742,6 +3864,9 @@ bool VectorCombine::run() {
37423864
if (Opcode == Instruction::Store)
37433865
MadeChange |= foldSingleElementStore(I);
37443866

3867+
if (isa<IntegerType>(I.getType()) && Opcode == Instruction::Or)
3868+
MadeChange |= foldIntegerPackFromVector(I);
3869+
37453870
// If this is an early pipeline invocation of this pass, we are done.
37463871
if (TryEarlyFoldsOnly)
37473872
return;
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=vector-combine %s | FileCheck %s
3+
4+
define i32 @bitcast.v2i(<4 x i8> %v) {
5+
; CHECK-LABEL: define i32 @bitcast.v2i(
6+
; CHECK-SAME: <4 x i8> [[V:%.*]]) {
7+
; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
8+
; CHECK-NEXT: ret i32 [[X_3]]
9+
;
10+
%v.0 = extractelement <4 x i8> %v, i32 0
11+
%z.0 = zext i8 %v.0 to i32
12+
13+
%v.1 = extractelement <4 x i8> %v, i32 1
14+
%z.1 = zext i8 %v.1 to i32
15+
%s.1 = shl i32 %z.1, 8
16+
%x.1 = or i32 %z.0, %s.1
17+
18+
%v.2 = extractelement <4 x i8> %v, i32 2
19+
%z.2 = zext i8 %v.2 to i32
20+
%s.2 = shl i32 %z.2, 16
21+
%x.2 = or i32 %x.1, %s.2
22+
23+
%v.3 = extractelement <4 x i8> %v, i32 3
24+
%z.3 = zext i8 %v.3 to i32
25+
%s.3 = shl i32 %z.3, 24
26+
%x.3 = or i32 %x.2, %s.3
27+
28+
ret i32 %x.3
29+
}
30+
31+
define i32 @bitcast.v2i.tree(<4 x i8> %v) {
32+
; CHECK-LABEL: define i32 @bitcast.v2i.tree(
33+
; CHECK-SAME: <4 x i8> [[V:%.*]]) {
34+
; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
35+
; CHECK-NEXT: ret i32 [[X]]
36+
;
37+
%v.0 = extractelement <4 x i8> %v, i32 0
38+
%z.0 = zext i8 %v.0 to i32
39+
40+
%v.1 = extractelement <4 x i8> %v, i32 1
41+
%z.1 = zext i8 %v.1 to i32
42+
%s.1 = shl i32 %z.1, 8
43+
%x.1 = or i32 %z.0, %s.1
44+
45+
%v.2 = extractelement <4 x i8> %v, i32 2
46+
%z.2 = zext i8 %v.2 to i32
47+
%s.2 = shl i32 %z.2, 16
48+
49+
%v.3 = extractelement <4 x i8> %v, i32 3
50+
%z.3 = zext i8 %v.3 to i32
51+
%s.3 = shl i32 %z.3, 24
52+
%x.3 = or i32 %s.2, %s.3
53+
54+
%x = or i32 %x.1, %x.3
55+
56+
ret i32 %x
57+
}
58+
59+
define i32 @extract.i32(<8 x i8> %v) {
60+
; CHECK-LABEL: define i32 @extract.i32(
61+
; CHECK-SAME: <8 x i8> [[V:%.*]]) {
62+
; CHECK-NEXT: [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
63+
; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32
64+
; CHECK-NEXT: ret i32 [[X_3]]
65+
;
66+
%v.0 = extractelement <8 x i8> %v, i32 3
67+
%z.0 = zext i8 %v.0 to i32
68+
69+
%v.1 = extractelement <8 x i8> %v, i32 4
70+
%z.1 = zext i8 %v.1 to i32
71+
%s.1 = shl i32 %z.1, 8
72+
%x.1 = or i32 %z.0, %s.1
73+
74+
%v.2 = extractelement <8 x i8> %v, i32 5
75+
%z.2 = zext i8 %v.2 to i32
76+
%s.2 = shl i32 %z.2, 16
77+
%x.2 = or i32 %x.1, %s.2
78+
79+
%v.3 = extractelement <8 x i8> %v, i32 6
80+
%z.3 = zext i8 %v.3 to i32
81+
%s.3 = shl i32 %z.3, 24
82+
%x.3 = or i32 %x.2, %s.3
83+
84+
ret i32 %x.3
85+
}
86+
87+
define i32 @partial(<4 x i8> %v) {
88+
; CHECK-LABEL: define i32 @partial(
89+
; CHECK-SAME: <4 x i8> [[V:%.*]]) {
90+
; CHECK-NEXT: [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
91+
; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32
92+
; CHECK-NEXT: ret i32 [[X_3]]
93+
;
94+
%v.0 = extractelement <4 x i8> %v, i32 0
95+
%z.0 = zext i8 %v.0 to i32
96+
97+
%v.1 = extractelement <4 x i8> %v, i32 1
98+
%z.1 = zext i8 %v.1 to i32
99+
%s.1 = shl i32 %z.1, 8
100+
%x.1 = or i32 %z.0, %s.1
101+
102+
%v.3 = extractelement <4 x i8> %v, i32 3
103+
%z.3 = zext i8 %v.3 to i32
104+
%s.3 = shl i32 %z.3, 24
105+
%x.3 = or i32 %x.1, %s.3
106+
107+
ret i32 %x.3
108+
}

0 commit comments

Comments
 (0)