@@ -1222,6 +1222,90 @@ bool GCNTTIImpl::isProfitableToSinkOperands(Instruction *I,
1222
1222
1223
1223
if (match (&Op, m_FAbs (m_Value ())) || match (&Op, m_FNeg (m_Value ())))
1224
1224
Ops.push_back (&Op);
1225
+
1226
+ // Zero cost vector instructions (e.g. extractelement 0 of i32 vectors)
1227
+ // will be optimized away, and sinking them can help SDAG combines.
1228
+ DataLayout DL = I->getModule ()->getDataLayout ();
1229
+ auto IsFreeExtractInsert = [&DL, this ](VectorType *VecType,
1230
+ unsigned VecIndex) {
1231
+ unsigned EltSize = DL.getTypeSizeInBits (VecType->getElementType ());
1232
+ return EltSize >= 32 ||
1233
+ (EltSize == 16 && VecIndex == 0 && ST->has16BitInsts ());
1234
+ };
1235
+
1236
+ uint64_t VecIndex;
1237
+ Value *Vec;
1238
+ if (match (Op.get (), m_ExtractElt (m_Value (Vec), m_ConstantInt (VecIndex)))) {
1239
+ Instruction *VecOpInst =
1240
+ dyn_cast<Instruction>(cast<Instruction>(Op.get ())->getOperand (0 ));
1241
+ // If a zero cost extractvector instruction is the only use of the vector,
1242
+ // then it may be combined with the def.
1243
+ if (VecOpInst && VecOpInst->hasOneUse ())
1244
+ continue ;
1245
+
1246
+ if (IsFreeExtractInsert (cast<VectorType>(Vec->getType ()), VecIndex))
1247
+ Ops.push_back (&Op);
1248
+
1249
+ continue ;
1250
+ }
1251
+
1252
+ if (match (Op.get (),
1253
+ m_InsertElt (m_Value (Vec), m_Value (), m_ConstantInt (VecIndex)))) {
1254
+ if (IsFreeExtractInsert (cast<VectorType>(Vec->getType ()), VecIndex))
1255
+ Ops.push_back (&Op);
1256
+
1257
+ continue ;
1258
+ }
1259
+
1260
+ if (auto *Shuffle = dyn_cast<ShuffleVectorInst>(Op.get ())) {
1261
+ if (Shuffle->isIdentity ()) {
1262
+ Ops.push_back (&Op);
1263
+ continue ;
1264
+ }
1265
+
1266
+ unsigned EltSize = DL.getTypeSizeInBits (
1267
+ cast<VectorType>(cast<VectorType>(Shuffle->getType ()))
1268
+ ->getElementType ());
1269
+
1270
+ // For i32 (or greater) shufflevectors, these will be lowered into a
1271
+ // series of insert / extract elements, which will be coalesced away.
1272
+ if (EltSize >= 32 ) {
1273
+ Ops.push_back (&Op);
1274
+ continue ;
1275
+ }
1276
+
1277
+ if (EltSize < 16 || !ST->has16BitInsts ())
1278
+ continue ;
1279
+
1280
+ int NumSubElts, SubIndex;
1281
+ if (Shuffle->changesLength ()) {
1282
+ if (Shuffle->increasesLength () && Shuffle->isIdentityWithPadding ()) {
1283
+ Ops.push_back (&Op);
1284
+ continue ;
1285
+ }
1286
+
1287
+ if (Shuffle->isExtractSubvectorMask (SubIndex) ||
1288
+ Shuffle->isInsertSubvectorMask (NumSubElts, SubIndex)) {
1289
+ if (!(SubIndex % 2 )) {
1290
+ Ops.push_back (&Op);
1291
+ continue ;
1292
+ }
1293
+ }
1294
+ }
1295
+
1296
+ if (Shuffle->isReverse () || Shuffle->isZeroEltSplat () ||
1297
+ Shuffle->isSingleSource ()) {
1298
+ Ops.push_back (&Op);
1299
+ continue ;
1300
+ }
1301
+
1302
+ if (Shuffle->isInsertSubvectorMask (NumSubElts, SubIndex)) {
1303
+ if (!(SubIndex % 2 )) {
1304
+ Ops.push_back (&Op);
1305
+ continue ;
1306
+ }
1307
+ }
1308
+ }
1225
1309
}
1226
1310
1227
1311
return !Ops.empty ();
0 commit comments