Skip to content

Commit 2540b12

Browse files
committed
[VectorCombine] Add foldShuffleToIdentity
This patch adds a basic version of a combine that attempts to fold away shuffles that when combines simplify away to an identity shuffle. For example: %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> %abt = fneg <4 x half> %at %abb = fneg <4 x half> %ab %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> By looking through the shuffles, it can be simplified to: %r = fneg <8 x half> %a The code tracks each lane starting from the original shuffle, keeping a track of a vector of {src, idx}. As we propagate up through the instructions we will either look through intermediate instructions (binops and unops) or see a collections of lanes that all have the same src and incrementing idx (an identity). We can also see a single value with identical lanes, which we can treat like a splat. Only the basic version is added here, handling identites, splats, binops and unops. In follow-up patches other instructions can be added such as constants, intrinsics, cmp/sel and zext/sext/trunc.
1 parent b099dd6 commit 2540b12

File tree

5 files changed

+177
-130
lines changed

5 files changed

+177
-130
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ class VectorCombine {
113113
bool scalarizeLoadExtract(Instruction &I);
114114
bool foldShuffleOfBinops(Instruction &I);
115115
bool foldShuffleOfCastops(Instruction &I);
116+
bool foldShuffleToIdentity(Instruction &I);
116117
bool foldShuffleFromReductions(Instruction &I);
117118
bool foldTruncFromReductions(Instruction &I);
118119
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
@@ -1552,6 +1553,148 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
15521553
return true;
15531554
}
15541555

1556+
// Starting from a shuffle, look up through operands tracking the shuffled index
1557+
// of each lane. If we can simplify away the shuffles to identities then
1558+
// do so.
1559+
bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
1560+
FixedVectorType *Ty = dyn_cast<FixedVectorType>(I.getType());
1561+
if (!Ty || !isa<Instruction>(I.getOperand(0)) ||
1562+
!isa<Instruction>(I.getOperand(1)))
1563+
return false;
1564+
1565+
using InstLane = std::pair<Value *, int>;
1566+
1567+
auto LookThroughShuffles = [](Value *V, int Lane) -> InstLane {
1568+
while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
1569+
unsigned NumElts =
1570+
cast<FixedVectorType>(SV->getOperand(0)->getType())->getNumElements();
1571+
int M = SV->getMaskValue(Lane);
1572+
if (M < 0)
1573+
return {nullptr, -1};
1574+
else if (M < (int)NumElts) {
1575+
V = SV->getOperand(0);
1576+
Lane = M;
1577+
} else {
1578+
V = SV->getOperand(1);
1579+
Lane = M - NumElts;
1580+
}
1581+
}
1582+
return InstLane{V, Lane};
1583+
};
1584+
1585+
auto GenerateInstLaneVectorFromOperand =
1586+
[&LookThroughShuffles](const SmallVector<InstLane> &Item, int Op) {
1587+
SmallVector<InstLane> NItem;
1588+
for (InstLane V : Item) {
1589+
NItem.emplace_back(
1590+
!V.first
1591+
? InstLane{nullptr, -1}
1592+
: LookThroughShuffles(
1593+
cast<Instruction>(V.first)->getOperand(Op), V.second));
1594+
}
1595+
return NItem;
1596+
};
1597+
1598+
SmallVector<InstLane> Start;
1599+
for (unsigned M = 0; M < Ty->getNumElements(); ++M)
1600+
Start.push_back(LookThroughShuffles(&I, M));
1601+
1602+
SmallVector<SmallVector<InstLane>> Worklist;
1603+
Worklist.push_back(Start);
1604+
SmallPtrSet<Value *, 4> IdentityLeafs, SplatLeafs;
1605+
unsigned NumVisited = 0;
1606+
1607+
while (!Worklist.empty()) {
1608+
SmallVector<InstLane> Item = Worklist.pop_back_val();
1609+
if (++NumVisited > MaxInstrsToScan)
1610+
return false;
1611+
1612+
// If we found an undef first lane then bail out to keep things simple.
1613+
if (!Item[0].first)
1614+
return false;
1615+
1616+
// Look for an identity value.
1617+
if (Item[0].second == 0 && Item[0].first->getType() == Ty &&
1618+
all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
1619+
return !E.value().first || (E.value().first == Item[0].first &&
1620+
E.value().second == (int)E.index());
1621+
})) {
1622+
IdentityLeafs.insert(Item[0].first);
1623+
continue;
1624+
}
1625+
// Look for a splat value.
1626+
if (all_of(drop_begin(Item), [&](InstLane &IL) {
1627+
return !IL.first ||
1628+
(IL.first == Item[0].first && IL.second == Item[0].second);
1629+
})) {
1630+
SplatLeafs.insert(Item[0].first);
1631+
continue;
1632+
}
1633+
1634+
// We need each element to be the same type of value, and check that each
1635+
// element has a single use.
1636+
if (!all_of(drop_begin(Item), [&](InstLane IL) {
1637+
if (!IL.first)
1638+
return true;
1639+
if (isa<Instruction>(IL.first) &&
1640+
!cast<Instruction>(IL.first)->hasOneUse())
1641+
return false;
1642+
return IL.first->getValueID() == Item[0].first->getValueID() &&
1643+
(!isa<IntrinsicInst>(IL.first) ||
1644+
cast<IntrinsicInst>(IL.first)->getIntrinsicID() ==
1645+
cast<IntrinsicInst>(Item[0].first)->getIntrinsicID());
1646+
}))
1647+
return false;
1648+
1649+
// Check the operator is one that we support.
1650+
if (isa<BinaryOperator>(Item[0].first)) {
1651+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1652+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 1));
1653+
} else if (isa<UnaryOperator>(Item[0].first)) {
1654+
Worklist.push_back(GenerateInstLaneVectorFromOperand(Item, 0));
1655+
} else {
1656+
return false;
1657+
}
1658+
}
1659+
1660+
// If we got this far, we know the shuffles are superfluous and can be
1661+
// removed. Scan through again and generate the new tree of instructions.
1662+
std::function<Value *(const SmallVector<InstLane> &)> generate =
1663+
[&](const SmallVector<InstLane> &Item) -> Value * {
1664+
if (IdentityLeafs.contains(Item[0].first) &&
1665+
all_of(drop_begin(enumerate(Item)), [&](const auto &E) {
1666+
return !E.value().first || (E.value().first == Item[0].first &&
1667+
E.value().second == (int)E.index());
1668+
})) {
1669+
return Item[0].first;
1670+
} else if (SplatLeafs.contains(Item[0].first)) {
1671+
if (auto ILI = dyn_cast<Instruction>(Item[0].first))
1672+
Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef());
1673+
else if (isa<Argument>(Item[0].first))
1674+
Builder.SetInsertPointPastAllocas(I.getParent()->getParent());
1675+
SmallVector<int, 16> Mask(Ty->getNumElements(), Item[0].second);
1676+
return Builder.CreateShuffleVector(Item[0].first, Mask);
1677+
}
1678+
1679+
auto *I = cast<Instruction>(Item[0].first);
1680+
SmallVector<Value *> Ops;
1681+
unsigned E = I->getNumOperands();
1682+
for (unsigned Idx = 0; Idx < E; Idx++)
1683+
Ops.push_back(generate(GenerateInstLaneVectorFromOperand(Item, Idx)));
1684+
Builder.SetInsertPoint(I);
1685+
if (auto BI = dyn_cast<BinaryOperator>(I))
1686+
return Builder.CreateBinOp((Instruction::BinaryOps)BI->getOpcode(),
1687+
Ops[0], Ops[1]);
1688+
if (auto UI = dyn_cast<UnaryOperator>(I))
1689+
return Builder.CreateUnOp((Instruction::UnaryOps)UI->getOpcode(), Ops[0]);
1690+
llvm_unreachable("Unhandled instruction in generate");
1691+
};
1692+
1693+
Value *V = generate(Start);
1694+
replaceValue(I, *V);
1695+
return true;
1696+
}
1697+
15551698
/// Given a commutative reduction, the order of the input lanes does not alter
15561699
/// the results. We can use this to remove certain shuffles feeding the
15571700
/// reduction, removing the need to shuffle at all.
@@ -2108,6 +2251,7 @@ bool VectorCombine::run() {
21082251
MadeChange |= foldShuffleOfBinops(I);
21092252
MadeChange |= foldShuffleOfCastops(I);
21102253
MadeChange |= foldSelectShuffle(I);
2254+
MadeChange |= foldShuffleToIdentity(I);
21112255
break;
21122256
case Instruction::BitCast:
21132257
MadeChange |= foldBitcastShuffle(I);

0 commit comments

Comments
 (0)