Skip to content

Commit 583094e

Browse files
store newly inserted inst and its uniformity
1 parent 0c9ff2f commit 583094e

File tree

1 file changed

+24
-8
lines changed

1 file changed

+24
-8
lines changed

llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,18 +41,30 @@ using namespace llvm;
4141
using namespace llvm::AMDGPU;
4242
using namespace llvm::PatternMatch;
4343

44+
/// Map for newly created IR instructions and their uniformity.
45+
using NewUniformityMap = DenseMap<const Value *, bool>;
46+
47+
/// Wrapper for querying uniformity info that first checks new instructions.
48+
static bool isDivergentUseWithNew(const Use &U, const UniformityInfo &UI,
49+
const NewUniformityMap &NewUMap) {
50+
Value *V = U.get();
51+
if (auto It = NewUMap.find(V); It != NewUMap.end())
52+
return !It->second; // divergent if marked false
53+
return UI.isDivergentUse(U);
54+
}
55+
4456
/// Optimizes uniform intrinsics.
4557
static bool optimizeUniformIntrinsic(IntrinsicInst &II,
46-
const UniformityInfo &UI) {
58+
const UniformityInfo &UI,
59+
NewUniformityMap &NewUMap) {
4760
llvm::Intrinsic::ID IID = II.getIntrinsicID();
4861

4962
switch (IID) {
5063
case Intrinsic::amdgcn_permlane64:
5164
case Intrinsic::amdgcn_readfirstlane:
5265
case Intrinsic::amdgcn_readlane: {
5366
Value *Src = II.getArgOperand(0);
54-
// Check if the argument use is divergent
55-
if (UI.isDivergentUse(II.getOperandUse(0)))
67+
if (isDivergentUseWithNew(II.getOperandUse(0), UI, NewUMap))
5668
return false;
5769
LLVM_DEBUG(dbgs() << "Replacing " << II << " with " << *Src << '\n');
5870
II.replaceAllUsesWith(Src);
@@ -61,7 +73,7 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
6173
}
6274
case Intrinsic::amdgcn_ballot: {
6375
Value *Src = II.getArgOperand(0);
64-
if (UI.isDivergentUse(II.getOperandUse(0)))
76+
if (isDivergentUseWithNew(II.getOperandUse(0), UI, NewUMap))
6577
return false;
6678
LLVM_DEBUG(dbgs() << "Found uniform ballot intrinsic: " << II << '\n');
6779

@@ -78,15 +90,17 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
7890
Value *OtherOp = Op0 == &II ? Op1 : Op0;
7991

8092
if (Pred == ICmpInst::ICMP_EQ && match(OtherOp, m_Zero())) {
81-
// Case (icmp eq %ballot, 0) --> xor %ballot_arg, 1
93+
// Case: (icmp eq %ballot, 0) -> xor %ballot_arg, 1
8294
Instruction *NotOp =
8395
BinaryOperator::CreateNot(Src, "", ICmp->getIterator());
96+
// Record uniformity: Src is uniform, and NOT preserves uniformity.
97+
NewUMap[NotOp] = true;
8498
LLVM_DEBUG(dbgs() << "Replacing ICMP_EQ: " << *NotOp << '\n');
8599
ICmp->replaceAllUsesWith(NotOp);
86100
ICmp->eraseFromParent();
87101
Changed = true;
88102
} else if (Pred == ICmpInst::ICMP_NE && match(OtherOp, m_Zero())) {
89-
// (icmp ne %ballot, 0) --> %ballot_arg
103+
// Case: (icmp ne %ballot, 0) -> %ballot_arg
90104
LLVM_DEBUG(dbgs() << "Replacing ICMP_NE with ballot argument: "
91105
<< *Src << '\n');
92106
ICmp->replaceAllUsesWith(Src);
@@ -106,9 +120,11 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
106120
return false;
107121
}
108122

109-
/// Iterate over the Intrinsics use in the Module to optimise.
123+
/// Iterate over intrinsics in the module to optimise.
110124
static bool runUniformIntrinsicCombine(Module &M, ModuleAnalysisManager &AM) {
111125
bool IsChanged = false;
126+
NewUniformityMap NewUMap;
127+
112128
FunctionAnalysisManager &FAM =
113129
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
114130
for (Function &F : M) {
@@ -129,7 +145,7 @@ static bool runUniformIntrinsicCombine(Module &M, ModuleAnalysisManager &AM) {
129145
continue;
130146

131147
const auto &UI = FAM.getResult<UniformityInfoAnalysis>(*ParentF);
132-
IsChanged |= optimizeUniformIntrinsic(*II, UI);
148+
IsChanged |= optimizeUniformIntrinsic(*II, UI, NewUMap);
133149
}
134150
}
135151
return IsChanged;

0 commit comments

Comments
 (0)