@@ -41,39 +41,50 @@ using namespace llvm;
41
41
using namespace llvm ::AMDGPU;
42
42
using namespace llvm ::PatternMatch;
43
43
44
- // / Map for newly created IR instructions and their uniformity.
45
- using NewUniformityMap = DenseMap<const Value *, bool >;
44
+ // / Tracks uniformity of newly created instructions.
45
+ // / Wraps a ValueMap so we can enforce consistent mark/erase usage.
46
+ struct UniformityTracker : DenseMap<const Value *, bool > {
47
+ // / Record that V has known uniformity.
48
+ void mark (Value *V, bool IsUniform) { (*this )[V] = IsUniform; }
49
+
50
+ // / Erase V from the map if it is an instruction with no uses anymore.
51
+ void eraseIfDead (Value *V) {
52
+ if (auto *I = dyn_cast<Instruction>(V); I && I->use_empty ())
53
+ this ->erase (V);
54
+ }
55
+ };
46
56
47
57
// / Wrapper for querying uniformity info that first checks new instructions.
48
58
static bool isDivergentUseWithNew (const Use &U, const UniformityInfo &UI,
49
- const NewUniformityMap &NewUMap ) {
59
+ const UniformityTracker &Tracker ) {
50
60
Value *V = U.get ();
51
- if (auto It = NewUMap .find (V); It != NewUMap .end ())
61
+ if (auto It = Tracker .find (V); It != Tracker .end ())
52
62
return !It->second ; // divergent if marked false
53
63
return UI.isDivergentUse (U);
54
64
}
55
65
56
66
// / Optimizes uniform intrinsics.
57
67
static bool optimizeUniformIntrinsic (IntrinsicInst &II,
58
68
const UniformityInfo &UI,
59
- NewUniformityMap &NewUMap ) {
69
+ UniformityTracker &Tracker ) {
60
70
llvm::Intrinsic::ID IID = II.getIntrinsicID ();
61
71
62
72
switch (IID) {
63
73
case Intrinsic::amdgcn_permlane64:
64
74
case Intrinsic::amdgcn_readfirstlane:
65
75
case Intrinsic::amdgcn_readlane: {
66
76
Value *Src = II.getArgOperand (0 );
67
- if (isDivergentUseWithNew (II.getOperandUse (0 ), UI, NewUMap ))
77
+ if (isDivergentUseWithNew (II.getOperandUse (0 ), UI, Tracker ))
68
78
return false ;
69
79
LLVM_DEBUG (dbgs () << " Replacing " << II << " with " << *Src << ' \n ' );
70
80
II.replaceAllUsesWith (Src);
81
+ Tracker.eraseIfDead (&II);
71
82
II.eraseFromParent ();
72
83
return true ;
73
84
}
74
85
case Intrinsic::amdgcn_ballot: {
75
86
Value *Src = II.getArgOperand (0 );
76
- if (isDivergentUseWithNew (II.getOperandUse (0 ), UI, NewUMap ))
87
+ if (isDivergentUseWithNew (II.getOperandUse (0 ), UI, Tracker ))
77
88
return false ;
78
89
LLVM_DEBUG (dbgs () << " Found uniform ballot intrinsic: " << II << ' \n ' );
79
90
@@ -93,25 +104,28 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
93
104
// Case: (icmp eq %ballot, 0) -> xor %ballot_arg, 1
94
105
Instruction *NotOp =
95
106
BinaryOperator::CreateNot (Src, " " , ICmp->getIterator ());
96
- // Record uniformity: Src is uniform, and NOT preserves uniformity.
97
- NewUMap[NotOp] = true ;
107
+ Tracker.mark (NotOp, true ); // NOT preserves uniformity
98
108
LLVM_DEBUG (dbgs () << " Replacing ICMP_EQ: " << *NotOp << ' \n ' );
99
109
ICmp->replaceAllUsesWith (NotOp);
110
+ Tracker.eraseIfDead (ICmp);
100
111
ICmp->eraseFromParent ();
101
112
Changed = true ;
102
113
} else if (Pred == ICmpInst::ICMP_NE && match (OtherOp, m_Zero ())) {
103
114
// Case: (icmp ne %ballot, 0) -> %ballot_arg
104
115
LLVM_DEBUG (dbgs () << " Replacing ICMP_NE with ballot argument: "
105
116
<< *Src << ' \n ' );
106
117
ICmp->replaceAllUsesWith (Src);
118
+ Tracker.eraseIfDead (ICmp);
107
119
ICmp->eraseFromParent ();
108
120
Changed = true ;
109
121
}
110
122
}
111
123
}
112
124
// Erase the intrinsic if it has no remaining uses.
113
- if (II.use_empty ())
125
+ if (II.use_empty ()) {
126
+ Tracker.eraseIfDead (&II);
114
127
II.eraseFromParent ();
128
+ }
115
129
return Changed;
116
130
}
117
131
default :
@@ -123,7 +137,7 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
123
137
// / Iterate over intrinsics in the module to optimise.
124
138
static bool runUniformIntrinsicCombine (Module &M, ModuleAnalysisManager &AM) {
125
139
bool IsChanged = false ;
126
- NewUniformityMap NewUMap ;
140
+ UniformityTracker Tracker ;
127
141
128
142
FunctionAnalysisManager &FAM =
129
143
AM.getResult <FunctionAnalysisManagerModuleProxy>(M).getManager ();
@@ -145,7 +159,7 @@ static bool runUniformIntrinsicCombine(Module &M, ModuleAnalysisManager &AM) {
145
159
continue ;
146
160
147
161
const auto &UI = FAM.getResult <UniformityInfoAnalysis>(*ParentF);
148
- IsChanged |= optimizeUniformIntrinsic (*II, UI, NewUMap );
162
+ IsChanged |= optimizeUniformIntrinsic (*II, UI, Tracker );
149
163
}
150
164
}
151
165
return IsChanged;
0 commit comments