Skip to content

Commit 907b1e4

Browse files
committed
Allow sinking of free vector ops
1 parent b14a59c commit 907b1e4

File tree

3 files changed

+1198
-1102
lines changed

3 files changed

+1198
-1102
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,90 @@ bool GCNTTIImpl::isProfitableToSinkOperands(Instruction *I,
12221222

12231223
if (match(&Op, m_FAbs(m_Value())) || match(&Op, m_FNeg(m_Value())))
12241224
Ops.push_back(&Op);
1225+
1226+
// Zero cost vector instructions (e.g. extractelement 0 of i32 vectors)
1227+
// will be optimized away, and sinking them can help SDAG combines.
1228+
DataLayout DL = I->getModule()->getDataLayout();
1229+
auto IsFreeExtractInsert = [&DL, this](VectorType *VecType,
1230+
unsigned VecIndex) {
1231+
unsigned EltSize = DL.getTypeSizeInBits(VecType->getElementType());
1232+
return EltSize >= 32 ||
1233+
(EltSize == 16 && VecIndex == 0 && ST->has16BitInsts());
1234+
};
1235+
1236+
uint64_t VecIndex;
1237+
Value *Vec;
1238+
if (match(Op.get(), m_ExtractElt(m_Value(Vec), m_ConstantInt(VecIndex)))) {
1239+
Instruction *VecOpInst =
1240+
dyn_cast<Instruction>(cast<Instruction>(Op.get())->getOperand(0));
1241+
// If a zero cost extractvector instruction is the only use of the vector,
1242+
// then it may be combined with the def.
1243+
if (VecOpInst && VecOpInst->hasOneUse())
1244+
continue;
1245+
1246+
if (IsFreeExtractInsert(cast<VectorType>(Vec->getType()), VecIndex))
1247+
Ops.push_back(&Op);
1248+
1249+
continue;
1250+
}
1251+
1252+
if (match(Op.get(),
1253+
m_InsertElt(m_Value(Vec), m_Value(), m_ConstantInt(VecIndex)))) {
1254+
if (IsFreeExtractInsert(cast<VectorType>(Vec->getType()), VecIndex))
1255+
Ops.push_back(&Op);
1256+
1257+
continue;
1258+
}
1259+
1260+
if (auto *Shuffle = dyn_cast<ShuffleVectorInst>(Op.get())) {
1261+
if (Shuffle->isIdentity()) {
1262+
Ops.push_back(&Op);
1263+
continue;
1264+
}
1265+
1266+
unsigned EltSize = DL.getTypeSizeInBits(
1267+
cast<VectorType>(cast<VectorType>(Shuffle->getType()))
1268+
->getElementType());
1269+
1270+
// For i32 (or greater) shufflevectors, these will be lowered into a
1271+
// series of insert / extract elements, which will be coalesced away.
1272+
if (EltSize >= 32) {
1273+
Ops.push_back(&Op);
1274+
continue;
1275+
}
1276+
1277+
if (EltSize < 16 || !ST->has16BitInsts())
1278+
continue;
1279+
1280+
int NumSubElts, SubIndex;
1281+
if (Shuffle->changesLength()) {
1282+
if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) {
1283+
Ops.push_back(&Op);
1284+
continue;
1285+
}
1286+
1287+
if (Shuffle->isExtractSubvectorMask(SubIndex) ||
1288+
Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) {
1289+
if (!(SubIndex % 2)) {
1290+
Ops.push_back(&Op);
1291+
continue;
1292+
}
1293+
}
1294+
}
1295+
1296+
if (Shuffle->isReverse() || Shuffle->isZeroEltSplat() ||
1297+
Shuffle->isSingleSource()) {
1298+
Ops.push_back(&Op);
1299+
continue;
1300+
}
1301+
1302+
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) {
1303+
if (!(SubIndex % 2)) {
1304+
Ops.push_back(&Op);
1305+
continue;
1306+
}
1307+
}
1308+
}
12251309
}
12261310

12271311
return !Ops.empty();

0 commit comments

Comments
 (0)