@@ -41,18 +41,30 @@ using namespace llvm;
41
41
using namespace llvm ::AMDGPU;
42
42
using namespace llvm ::PatternMatch;
43
43
44
+ // / Map for newly created IR instructions and their uniformity.
45
+ using NewUniformityMap = DenseMap<const Value *, bool >;
46
+
47
+ // / Wrapper for querying uniformity info that first checks new instructions.
48
+ static bool isDivergentUseWithNew (const Use &U, const UniformityInfo &UI,
49
+ const NewUniformityMap &NewUMap) {
50
+ Value *V = U.get ();
51
+ if (auto It = NewUMap.find (V); It != NewUMap.end ())
52
+ return !It->second ; // divergent if marked false
53
+ return UI.isDivergentUse (U);
54
+ }
55
+
44
56
// / Optimizes uniform intrinsics.
45
57
static bool optimizeUniformIntrinsic (IntrinsicInst &II,
46
- const UniformityInfo &UI) {
58
+ const UniformityInfo &UI,
59
+ NewUniformityMap &NewUMap) {
47
60
llvm::Intrinsic::ID IID = II.getIntrinsicID ();
48
61
49
62
switch (IID) {
50
63
case Intrinsic::amdgcn_permlane64:
51
64
case Intrinsic::amdgcn_readfirstlane:
52
65
case Intrinsic::amdgcn_readlane: {
53
66
Value *Src = II.getArgOperand (0 );
54
- // Check if the argument use is divergent
55
- if (UI.isDivergentUse (II.getOperandUse (0 )))
67
+ if (isDivergentUseWithNew (II.getOperandUse (0 ), UI, NewUMap))
56
68
return false ;
57
69
LLVM_DEBUG (dbgs () << " Replacing " << II << " with " << *Src << ' \n ' );
58
70
II.replaceAllUsesWith (Src);
@@ -61,7 +73,7 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
61
73
}
62
74
case Intrinsic::amdgcn_ballot: {
63
75
Value *Src = II.getArgOperand (0 );
64
- if (UI. isDivergentUse (II.getOperandUse (0 )))
76
+ if (isDivergentUseWithNew (II.getOperandUse (0 ), UI, NewUMap ))
65
77
return false ;
66
78
LLVM_DEBUG (dbgs () << " Found uniform ballot intrinsic: " << II << ' \n ' );
67
79
@@ -78,15 +90,17 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
78
90
Value *OtherOp = Op0 == &II ? Op1 : Op0;
79
91
80
92
if (Pred == ICmpInst::ICMP_EQ && match (OtherOp, m_Zero ())) {
81
- // Case (icmp eq %ballot, 0) --> xor %ballot_arg, 1
93
+ // Case: (icmp eq %ballot, 0) -> xor %ballot_arg, 1
82
94
Instruction *NotOp =
83
95
BinaryOperator::CreateNot (Src, " " , ICmp->getIterator ());
96
+ // Record uniformity: Src is uniform, and NOT preserves uniformity.
97
+ NewUMap[NotOp] = true ;
84
98
LLVM_DEBUG (dbgs () << " Replacing ICMP_EQ: " << *NotOp << ' \n ' );
85
99
ICmp->replaceAllUsesWith (NotOp);
86
100
ICmp->eraseFromParent ();
87
101
Changed = true ;
88
102
} else if (Pred == ICmpInst::ICMP_NE && match (OtherOp, m_Zero ())) {
89
- // (icmp ne %ballot, 0) --> %ballot_arg
103
+ // Case: (icmp ne %ballot, 0) -> %ballot_arg
90
104
LLVM_DEBUG (dbgs () << " Replacing ICMP_NE with ballot argument: "
91
105
<< *Src << ' \n ' );
92
106
ICmp->replaceAllUsesWith (Src);
@@ -106,9 +120,11 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
106
120
return false ;
107
121
}
108
122
109
- // / Iterate over the Intrinsics use in the Module to optimise.
123
+ // / Iterate over intrinsics in the module to optimise.
110
124
static bool runUniformIntrinsicCombine (Module &M, ModuleAnalysisManager &AM) {
111
125
bool IsChanged = false ;
126
+ NewUniformityMap NewUMap;
127
+
112
128
FunctionAnalysisManager &FAM =
113
129
AM.getResult <FunctionAnalysisManagerModuleProxy>(M).getManager ();
114
130
for (Function &F : M) {
@@ -129,7 +145,7 @@ static bool runUniformIntrinsicCombine(Module &M, ModuleAnalysisManager &AM) {
129
145
continue ;
130
146
131
147
const auto &UI = FAM.getResult <UniformityInfoAnalysis>(*ParentF);
132
- IsChanged |= optimizeUniformIntrinsic (*II, UI);
148
+ IsChanged |= optimizeUniformIntrinsic (*II, UI, NewUMap );
133
149
}
134
150
}
135
151
return IsChanged;
0 commit comments