@@ -581,7 +581,7 @@ void VariableReuseAnalysis::visitExtractElementInst(ExtractElementInst &I) {
581
581
// Valid vec alias and add it into alias map
582
582
addVecAlias (EEI_nv, vec_nv, vecVal, iIdx);
583
583
584
- // Mark this inst as noop inst
584
+ // Mark this inst as no-op inst
585
585
m_HasBecomeNoopInsts[EEI] = 1 ;
586
586
}
587
587
@@ -657,9 +657,13 @@ void VariableReuseAnalysis::printAlias(raw_ostream &OS, const Function *F) const
657
657
for (auto VI : BV->Aliasers ) {
658
658
SSubVecDesc *aSV = VI;
659
659
Value *aliaser = aSV->Aliaser ;
660
+ bool HasBeenNoop = false ;
661
+ if (Instruction *AliaserInst = dyn_cast<Instruction>(aliaser))
662
+ HasBeenNoop = (m_HasBecomeNoopInsts.count (AliaserInst) > 0 );
660
663
bool isSinglVal = m_DeSSA ? m_DeSSA->isSingleValued (aliaser) : true ;
664
+ const char *Noop = HasBeenNoop ? " [no-op]" : " " ;
661
665
const char *inCC = !isSinglVal ? " .inDessaCC" : " " ;
662
- OS << " " << *aliaser << " [" << aSV->StartElementOffset << " ]" << inCC << " \n " ;
666
+ OS << " " << *aliaser << " [" << aSV->StartElementOffset << " ]" << inCC << Noop << " \n " ;
663
667
}
664
668
OS << " \n " ;
665
669
}
@@ -887,16 +891,16 @@ bool VariableReuseAnalysis::getAllInsEltsIfAvailable(InsertElementInst *FirstIEI
887
891
IGC_ASSERT_MESSAGE (IEI_ix < nelts, " ICE: IEI's index out of bound!" );
888
892
SVecInsEltInfo &InsEltInfo = AllIEIs[IEI_ix];
889
893
if (InsEltInfo.IEI ) {
890
- // One element is inserted more than once, skip.
894
+ // This element is inserted more than once, skip.
891
895
return false ;
892
896
}
893
897
InsEltInfo.IEI = I;
894
898
InsEltInfo.Elt = E;
895
899
InsEltInfo.FromVec = V;
896
900
InsEltInfo.FromVec_eltIx = V_ix;
897
- if (E) {
898
- InsEltInfo. EEI = dyn_cast<ExtractElementInst>(E);
899
- }
901
+
902
+ // So far, E is never nullptr (could be in the future)
903
+ InsEltInfo. EEI = dyn_cast_or_null<ExtractElementInst>(E);
900
904
901
905
if (!I->hasOneUse ()) {
902
906
break ;
@@ -923,19 +927,26 @@ bool VariableReuseAnalysis::getAllInsEltsIfAvailable(InsertElementInst *FirstIEI
923
927
if (tV == nullptr )
924
928
return false ;
925
929
926
- // Expect node values for all IEIs are identical. In general, if they
927
- // are in the same DeSSA CC, that would be fine.
930
+ // Expect all IEIs are in the same DeSSA CC (DeSSA special-handles IEIs)
928
931
Value *tV_nv = m_DeSSA->getNodeValue (tV);
929
932
if (V_root != getRootValue (tV_nv))
930
933
return false ;
931
934
932
935
Value *E = AllIEIs[i].Elt ;
936
+ if (!E || isa<Constant>(E)) {
937
+ // constant is okay for either non-uniform or uniform.
938
+ // (Note: if any E is constant, this chain of IEI cannot be
939
+ // a sub-vector of another larger vector).
940
+ continue ;
941
+ }
933
942
Value *FromVec = AllIEIs[i].FromVec ;
934
- Value *FromVec_nv = m_DeSSA->getNodeValue (FromVec);
935
- // check if FromVec has been coalesced with IEI already by DeSSA.
936
- // (Wouldn't happen under current DeSSA, but might happen in future)
937
- if (V_root == getRootValue (FromVec_nv))
938
- return false ;
943
+ if (FromVec) {
944
+ Value *FromVec_nv = m_DeSSA->getNodeValue (FromVec);
945
+ // check if FromVec has been coalesced with IEI already by DeSSA.
946
+ // (Wouldn't happen under current DeSSA, but might happen in future)
947
+ if (V_root == getRootValue (FromVec_nv))
948
+ return false ;
949
+ }
939
950
940
951
// Make sure FromVec or E have the same uniformness as V.
941
952
if ((E && V_dep != m_WIA->whichDepend (E)) || (FromVec && V_dep != m_WIA->whichDepend (FromVec)))
@@ -946,7 +957,7 @@ bool VariableReuseAnalysis::getAllInsEltsIfAvailable(InsertElementInst *FirstIEI
946
957
947
958
Value *VariableReuseAnalysis::traceAliasValue (Value *V) {
948
959
if (CastInst *CastI = dyn_cast_or_null<CastInst>(V)) {
949
- // Only handle Noop cast inst. For example,
960
+ // Only handle no-op cast inst. For example,
950
961
// dst = bitcast <3 x i32> src to <3 x float>,
951
962
// it is okay, but the following isn't.
952
963
// dst = bitcast <3 x i64> src to <6 x i32>
@@ -969,17 +980,13 @@ Value *VariableReuseAnalysis::traceAliasValue(Value *V) {
969
980
}
970
981
971
982
//
972
- // Returns true if the following is true
983
+ // Returns true if there is the following pattern; otherwise return false.
973
984
// IEI = insertElement <vectorType> Vec, S, <constant IEI_ix>
974
- // Return false, otherwise.
975
- //
976
- // When the above condition is true, V and V_ix are used for the
977
- // following cases:
978
- // 1. S is from another vector V.
979
- // S = extractElement <vectorType> V, <constant V_ix>
980
- // S is the element denoted by (V, V_ix)
981
- // 2. otherwise, V=nullptr, V_ix=0.
982
- // S is a candidate inserted and could be alias to the vector.
985
+ // 1. S is from another vector V.
986
+ // S = extractElement <vectorType> V, <constant V_ix>
987
+ // In this case, S is the element denoted by (V, V_ix)
988
+ // 2. otherwise, V=nullptr, V_ix=0.
989
+ // S is some value other than a vector element.
983
990
//
984
991
// Input: IEI
985
992
// Output: IEI_ix, S, V, V_ix
@@ -999,9 +1006,9 @@ bool VariableReuseAnalysis::getElementValue(InsertElementInst *IEI, int &IEI_ix,
999
1006
IEI_ix = (int )CI->getZExtValue ();
1000
1007
1001
1008
Value *elem0 = IEI->getOperand (1 );
1002
- if (hasBeenPayloadCoalesced (elem0) || isa<Constant>(elem0) || isOrCoalescedWithArg (elem0)) {
1003
- // If elem0 has been payload-coalesced, is constant,
1004
- // or it has been aliased to an argument, skip it.
1009
+ if (hasBeenPayloadCoalesced (elem0) || isOrCoalescedWithArg (elem0)) {
1010
+ // If elem0 has been payload-coalesced or it has been aliased to
1011
+ // an argument, skip it.
1005
1012
return false ;
1006
1013
}
1007
1014
@@ -1046,11 +1053,10 @@ void VariableReuseAnalysis::InsertElementAliasing(Function *F) {
1046
1053
1047
1054
// IGC Key VectorAlias controls vectorAlias optimiation.
1048
1055
//
1049
- // Do it if VectorAlias != 0.
1050
- // VectorAlias=0x1: subvec aliasing for isolated values
1051
- // (getRootValue()=null)
1052
- // =0x2: subvec aliasing for both isolated and non-isolated
1053
- // value)
1056
+ // VectorAlias (also from m_pCtx->getVectorCoalescingControl())
1057
+ // 0x0: disable vector aliasing
1058
+ // 0x1: subvec aliasing for isolated values (getRootValue()=null)
1059
+ // 0x2: subvec aliasing for both isolated and non-isolated value)
1054
1060
const auto control = (m_pCtx->getVectorCoalescingControl () & 0x3 );
1055
1061
// To avoid increasing GRF pressure, skip if F is too large or not an entry
1056
1062
const int32_t NumBBThreshold = IGC_GET_FLAG_VALUE (VectorAliasBBThreshold);
@@ -1078,9 +1084,9 @@ void VariableReuseAnalysis::InsertElementAliasing(Function *F) {
1078
1084
// In this case, 's' becomes a part of 'b'. In LLVM IR,
1079
1085
// there are a chain of extElt and insElt instructions for
1080
1086
// doing so.
1081
- // 2. insertTo: sub- vector is used to create a base vector.
1087
+ // 2. insertTo: small vector is used to create a larger base vector.
1082
1088
// For example:
1083
- // given sub- vector int4 s0, s1; int8 vector b is created like:
1089
+ // given small vector int4 s0, s1; int8 vector b is created like:
1084
1090
// b = (int8) (s0, s1)
1085
1091
// In this case, both s0 and s1 become part of b.
1086
1092
@@ -1095,12 +1101,12 @@ void VariableReuseAnalysis::InsertElementAliasing(Function *F) {
1095
1101
continue ;
1096
1102
}
1097
1103
1098
- // Check if this is an extractFrom pattern, if so, add alias.
1104
+ // Case 1: check if this is an extractFrom pattern, if so, add alias.
1099
1105
if (processExtractFrom (AllIEIs)) {
1100
1106
continue ;
1101
1107
}
1102
1108
1103
- // Check if this is an insertTo pattern, if so add alias.
1109
+ // Case 2: check if this is an insertTo pattern, if so add alias.
1104
1110
if (processInsertTo (BB, AllIEIs)) {
1105
1111
continue ;
1106
1112
}
@@ -1123,6 +1129,8 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy &AllIEIs) {
1123
1129
}
1124
1130
1125
1131
for (int i = 1 ; i < nelts; ++i) {
1132
+ // If any of AllIEIs[i] has a constant element (IEI's opeand 1), this check
1133
+ // will be true, thus AllIEIs cannot be a sub-vector
1126
1134
if (AllIEIs[i].FromVec != BaseVec || AllIEIs[i].FromVec_eltIx != (BaseStartIx + i))
1127
1135
return false ;
1128
1136
}
@@ -1187,9 +1195,9 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy &AllIEIs) {
1187
1195
// add alias
1188
1196
addVecAlias (Sub_nv, Base_nv, BaseVec, BaseStartIx, BaseAlign);
1189
1197
1190
- // Make sure noop insts are in the map so they won't be emitted later.
1198
+ // Make sure no-op insts are in the map so they won't be emitted later.
1191
1199
for (int i = 0 , sz = nelts; i < sz; ++i) {
1192
- // IEI chain is coalesced by DeSSA, so it's safe to mark it as noop
1200
+ // IEI chain is coalesced by DeSSA, so it's safe to mark it as no-op
1193
1201
InsertElementInst *IEI = AllIEIs[i].IEI ;
1194
1202
if (!m_DeSSA->isNoopAliaser (IEI)) {
1195
1203
m_HasBecomeNoopInsts[IEI] = 1 ;
@@ -1198,7 +1206,7 @@ bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy &AllIEIs) {
1198
1206
ExtractElementInst *EEI = AllIEIs[i].EEI ;
1199
1207
IGC_ASSERT (EEI);
1200
1208
if (!m_DeSSA->isNoopAliaser (EEI)) {
1201
- // Set EEI as an aliser, thus it become noop .
1209
+ // Set EEI as an aliser, thus it become no-op .
1202
1210
Value *EEI_nv = m_DeSSA->getNodeValue (EEI);
1203
1211
addVecAlias (EEI_nv, Base_nv, BaseVec, AllIEIs[i].FromVec_eltIx , EALIGN_AUTO);
1204
1212
m_HasBecomeNoopInsts[EEI] = 1 ;
@@ -1253,9 +1261,22 @@ bool VariableReuseAnalysis::processInsertTo(BasicBlock *BB, VecInsEltInfoTy &All
1253
1261
isSubCandidate = false ;
1254
1262
}
1255
1263
1256
- if (Elt && Sub == nullptr && skipScalarAliaser (BB, Elt)) {
1257
- // Skip scalar coalescing
1258
- isSubCandidate = false ;
1264
+ // Check scalar
1265
+ if (isSubCandidate && Elt && Sub == nullptr ) {
1266
+ if (isa<Constant>(Elt)) {
1267
+ // Skip as alias is b/w two variables
1268
+ isSubCandidate = false ;
1269
+ } else if (Instruction *TmpInst = dyn_cast<Instruction>(Elt)) {
1270
+ // This is to skip inst such as @llvm.genx.GenISA.simdSize(),
1271
+ // which is specially handled during EmitCodeGen
1272
+ if (m_PatternMatch->SIMDConstExpr (TmpInst))
1273
+ isSubCandidate = false ;
1274
+ }
1275
+
1276
+ if (isSubCandidate && skipScalarAliaser (BB, Elt)) {
1277
+ // Skip scalar coalescing
1278
+ isSubCandidate = false ;
1279
+ }
1259
1280
}
1260
1281
1261
1282
// If Sub == nullptr or NextSub != Sub, this is the last element
@@ -1329,9 +1350,9 @@ bool VariableReuseAnalysis::processInsertTo(BasicBlock *BB, VecInsEltInfoTy &All
1329
1350
1330
1351
int V_sz = getNumElts (V);
1331
1352
if (V_sz > 1 ) {
1332
- // set up Noop inst map to skip emitting them later.
1353
+ // set up No-op inst map to skip emitting them later.
1333
1354
for (int j = V_ix, sz = V_ix + V_sz; j < sz; ++j) {
1334
- // Safe to mark IEI as noop as IEI chain's coalesced by DeSSA
1355
+ // Safe to mark IEI as no-op as its aliaser will set it
1335
1356
InsertElementInst *IEI = AllIEIs[j].IEI ;
1336
1357
if (!m_DeSSA->isNoopAliaser (IEI)) {
1337
1358
m_HasBecomeNoopInsts[IEI] = 1 ;
@@ -1341,15 +1362,15 @@ bool VariableReuseAnalysis::processInsertTo(BasicBlock *BB, VecInsEltInfoTy &All
1341
1362
IGC_ASSERT (EEI);
1342
1363
// Sub-vector
1343
1364
if (!m_DeSSA->isNoopAliaser (EEI)) {
1344
- // EEI should be in alias map so it can be marked as noop
1365
+ // Safe to set EEI to no-op
1345
1366
Value *EEI_nv = m_DeSSA->getNodeValue (EEI);
1346
1367
addVecAlias (EEI_nv, Base_nv, FirstIEI, j);
1347
1368
m_HasBecomeNoopInsts[EEI] = 1 ;
1348
1369
}
1349
1370
}
1350
1371
} else {
1351
1372
// scalar
1352
- // Safe to mark IEI as noop as IEI chain's coalesced by DeSSA
1373
+ // Safe to mark IEI as no-op
1353
1374
InsertElementInst *IEI = AllIEIs[V_ix].IEI ;
1354
1375
if (m_DeSSA->isNoopAliaser (IEI))
1355
1376
continue ;
@@ -1433,8 +1454,11 @@ VariableReuseAnalysis::AState VariableReuseAnalysis::getCandidateStateUse(Value
1433
1454
}
1434
1455
} else if (StoreInst *SI = dyn_cast<StoreInst>(Val)) {
1435
1456
retSt = AState::TARGET;
1436
- } else if (isa<CallInst>(Val)) {
1437
- return AState::SKIP;
1457
+ } else if (CallInst *CallI = dyn_cast<CallInst>(Val)) {
1458
+ if (CallI->isInlineAsm ())
1459
+ retSt = AState::TARGET;
1460
+ else
1461
+ return AState::SKIP;
1438
1462
}
1439
1463
}
1440
1464
return retSt;
@@ -1460,15 +1484,17 @@ VariableReuseAnalysis::AState VariableReuseAnalysis::getCandidateStateDef(Value
1460
1484
}
1461
1485
} else if (LoadInst *SI = dyn_cast<LoadInst>(Val)) {
1462
1486
return AState::TARGET;
1463
- } else if (isa<CallInst>(Val)) {
1487
+ } else if (CallInst *CallI = dyn_cast<CallInst>(Val)) {
1488
+ if (CallI->isInlineAsm ())
1489
+ return AState::TARGET;
1464
1490
return AState::SKIP;
1465
1491
}
1466
1492
return AState::OK;
1467
1493
}
1468
1494
1469
1495
// Vector alias disables extractMask optimization. This function
1470
1496
// checks if extractMask optim can be applied. And the caller
1471
- // will decide whether to favor extractMask optimization.
1497
+ // will decide whether to favor extractMask optimization or not .
1472
1498
bool VariableReuseAnalysis::isExtractMaskCandidate (Value *V) const {
1473
1499
auto BIT = [](int n) { return (uint32_t )(1 << n); };
1474
1500
0 commit comments