@@ -440,6 +440,21 @@ static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp) {
440440 SqrtOp->getType ()->isHalfTy ();
441441}
442442
443+ // / Return true if we can easily prove that use U is uniform.
444+ static bool isTriviallyUniform (const Use &U) {
445+ Value *V = U.get ();
446+ if (isa<Constant>(V))
447+ return true ;
448+ if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
449+ if (!AMDGPU::isIntrinsicAlwaysUniform (II->getIntrinsicID ()))
450+ return false ;
451+ // If II and U are in different blocks then there is a possibility of
452+ // temporal divergence.
453+ return II->getParent () == cast<Instruction>(U.getUser ())->getParent ();
454+ }
455+ return false ;
456+ }
457+
443458std::optional<Instruction *>
444459GCNTTIImpl::instCombineIntrinsic (InstCombiner &IC, IntrinsicInst &II) const {
445460 Intrinsic::ID IID = II.getIntrinsicID ();
@@ -1060,46 +1075,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
10601075 return IC.replaceOperand (II, 0 , UndefValue::get (VDstIn->getType ()));
10611076 }
10621077 case Intrinsic::amdgcn_permlane64:
1063- // A constant value is trivially uniform.
1064- if (Constant *C = dyn_cast<Constant>(II.getArgOperand (0 ))) {
1065- return IC.replaceInstUsesWith (II, C);
1066- }
1067- break ;
10681078 case Intrinsic::amdgcn_readfirstlane:
10691079 case Intrinsic::amdgcn_readlane: {
1070- // A constant value is trivially uniform.
1071- if (Constant *C = dyn_cast<Constant>(II.getArgOperand (0 ))) {
1072- return IC.replaceInstUsesWith (II, C);
1073- }
1074-
1075- // The rest of these may not be safe if the exec may not be the same between
1076- // the def and use.
1077- Value *Src = II.getArgOperand (0 );
1078- Instruction *SrcInst = dyn_cast<Instruction>(Src);
1079- if (SrcInst && SrcInst->getParent () != II.getParent ())
1080- break ;
1081-
1082- // readfirstlane (readfirstlane x) -> readfirstlane x
1083- // readlane (readfirstlane x), y -> readfirstlane x
1084- if (match (Src,
1085- PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
1086- return IC.replaceInstUsesWith (II, Src);
1087- }
1088-
1089- if (IID == Intrinsic::amdgcn_readfirstlane) {
1090- // readfirstlane (readlane x, y) -> readlane x, y
1091- if (match (Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
1092- return IC.replaceInstUsesWith (II, Src);
1093- }
1094- } else {
1095- // readlane (readlane x, y), y -> readlane x, y
1096- if (match (Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
1097- PatternMatch::m_Value (),
1098- PatternMatch::m_Specific (II.getArgOperand (1 ))))) {
1099- return IC.replaceInstUsesWith (II, Src);
1100- }
1101- }
1102-
1080+ // If the first argument is uniform these intrinsics return it unchanged.
1081+ const Use &Src = II.getArgOperandUse (0 );
1082+ if (isTriviallyUniform (Src))
1083+ return IC.replaceInstUsesWith (II, Src.get ());
11031084 break ;
11041085 }
11051086 case Intrinsic::amdgcn_trig_preop: {
0 commit comments