@@ -88,14 +88,9 @@ ModulePass *llvm::createNVVMReflectPass(unsigned SmVersion) {
8888}
8989
9090static cl::opt<bool >
91- NVVMReflectEnabled (" nvvm-reflect-enable" , cl::init(false ), cl::Hidden,
91+ NVVMReflectEnabled (" nvvm-reflect-enable" , cl::init(true ), cl::Hidden,
9292 cl::desc(" NVVM reflection, enabled by default" ));
9393
94- char NVVMReflectLegacyPass::ID = 0 ;
95- INITIALIZE_PASS (NVVMReflectLegacyPass, " nvvm-reflect" ,
96- " Replace occurrences of __nvvm_reflect() calls with 0/1" , false ,
97- false )
98-
9994// Allow users to specify additional key/value pairs to reflect. These key/value
10095// pairs are the last to be added to the ReflectMap, and therefore will take
10196// precedence over initial values (i.e. __CUDA_FTZ from module medadata and
@@ -109,6 +104,11 @@ static cl::opt<bool> NVVMReflectDCE(
109104 " nvvm-reflect-dce" , cl::init(false ), cl::Hidden,
110105 cl::desc(" Delete dead blocks introduced by reflect call elimination" ));
111106
107+ char NVVMReflectLegacyPass::ID = 0 ;
108+ INITIALIZE_PASS (NVVMReflectLegacyPass, " nvvm-reflect" ,
109+ " Replace occurrences of __nvvm_reflect() calls with 0/1" , false ,
110+ false )
111+
112112// Set the ReflectMap with, first, the value of __CUDA_FTZ from module metadata,
113113// and then the key/value pairs from the command line.
114114void NVVMReflect::populateReflectMap(Module &M) {
@@ -188,6 +188,8 @@ bool NVVMReflect::handleReflectFunction(Module &M, StringRef ReflectName) {
188188 << " (" << ReflectArg << " ) with value " << ReflectVal
189189 << " \n " );
190190 auto *NewValue = ConstantInt::get (Call->getType (), ReflectVal);
191+ dbgs () << " NewValue: " << *NewValue << " \n " ;
192+ dbgs () << " Call: " << *Call << " \n " ;
191193 ReflectReplacements.push_back ({Call, NewValue});
192194 }
193195
@@ -216,35 +218,25 @@ NVVMReflect::findTransitivelyDeadBlocks(BasicBlock *DeadBB) {
216218
217219// / Replace calls to __nvvm_reflect with corresponding constant values. Then
218220// / clean up through constant folding and propagation and dead block
219- // / elimination.
220- // /
221- // / The purpose of this cleanup is not optimization because that could be
222- // / handled by later passes
223- // / (i.e. SCCP, SimplifyCFG, etc.), but for correctness. Reflect calls are most
224- // / commonly used to query the arch number and select a valid instruction for
225- // / the arch. Therefore, you need to eliminate blocks that become dead because
226- // / they may contain invalid instructions for the arch. The purpose of the
227- // / cleanup is to do the minimal amount of work to leave the code in a valid
228- // / state.
221+ // / elimination, if NVVMReflectDCE is enabled.
229222void NVVMReflect::replaceReflectCalls (
230223 SmallVector<std::pair<CallInst *, Constant *>, 8 > &ReflectReplacements,
231224 const DataLayout &DL) {
232225 SmallVector<Instruction *, 8 > Worklist;
233226 SetVector<BasicBlock *> DeadBlocks;
234227
235- // Replace an instruction with a constant and add all users to the worklist,
236- // then delete the instruction
228+ // Replace an instruction with a constant and add all users to the worklist
237229 auto ReplaceInstructionWithConst = [&](Instruction *I, Constant *C) {
238230 for (auto *U : I->users ())
239231 if (auto *UI = dyn_cast<Instruction>(U))
240232 Worklist.push_back (UI);
241233 I->replaceAllUsesWith (C);
242- if (isInstructionTriviallyDead (I))
243- I->eraseFromParent ();
244234 };
245235
246- for (auto &[Call, NewValue] : ReflectReplacements)
236+ for (auto &[Call, NewValue] : ReflectReplacements) {
247237 ReplaceInstructionWithConst (Call, NewValue);
238+ Call->eraseFromParent ();
239+ }
248240
249241 // Constant fold reflect results. If NVVMReflectDCE is enabled, we will
250242 // alternate between constant folding/propagation and dead block elimination.
@@ -257,6 +249,8 @@ void NVVMReflect::replaceReflectCalls(
257249 auto *I = Worklist.pop_back_val ();
258250 if (auto *C = ConstantFoldInstruction (I, DL)) {
259251 ReplaceInstructionWithConst (I, C);
252+ if (isInstructionTriviallyDead (I))
253+ I->eraseFromParent ();
260254 } else if (I->isTerminator ()) {
261255 BasicBlock *BB = I->getParent ();
262256 SmallVector<BasicBlock *, 8 > Succs (successors (BB));
0 commit comments