@@ -88,7 +88,7 @@ ModulePass *llvm::createNVVMReflectPass(unsigned SmVersion) {
8888}
8989
9090static cl::opt<bool >
91- NVVMReflectEnabled (" nvvm-reflect-enable" , cl::init(true ), cl::Hidden,
91+ NVVMReflectEnabled (" nvvm-reflect-enable" , cl::init(false ), cl::Hidden,
9292 cl::desc(" NVVM reflection, enabled by default" ));
9393
9494char NVVMReflectLegacyPass::ID = 0 ;
@@ -105,6 +105,10 @@ static cl::list<std::string> ReflectList(
105105 cl::desc(" A key=value pair. Replace __nvvm_reflect(name) with value." ),
106106 cl::ValueRequired);
107107
108+ static cl::opt<bool > NVVMReflectDCE (" nvvm-reflect-dce" , cl::init(false ),
109+ cl::Hidden,
110+ cl::desc(" Delete dead blocks introduced by reflect call elimination" ));
111+
108112// Set the ReflectMap with, first, the value of __CUDA_FTZ from module metadata,
109113// and then the key/value pairs from the command line.
110114void NVVMReflect::populateReflectMap (Module &M) {
@@ -241,8 +245,9 @@ void NVVMReflect::replaceReflectCalls(
241245
242246 for (auto &[Call, NewValue] : ReflectReplacements)
243247 ReplaceInstructionWithConst (Call, NewValue);
244-
245- // Alternate between constant folding/propagation and dead block elimination.
248+
249+ // Constant fold reflect results. If NVVMReflectDCE is enabled, we will
250+ // alternate between constant folding/propagation and dead block elimination.
246251 // Terminator folding may create new dead blocks. When those dead blocks are
247252 // deleted, their live successors may have PHIs that can be simplified, which
248253 // may yield more work for folding/propagation.
@@ -256,11 +261,12 @@ void NVVMReflect::replaceReflectCalls(
256261 BasicBlock *BB = I->getParent ();
257262 SmallVector<BasicBlock *, 8 > Succs (successors (BB));
258263 // Some blocks may become dead if the terminator is folded because
259- // a conditional branch is turned into a direct branch.
264+ // a conditional branch is turned into a direct branch. Add those dead blocks
265+ // to the dead blocks set if NVVMReflectDCE is enabled.
260266 if (ConstantFoldTerminator (BB)) {
261267 for (BasicBlock *Succ : Succs) {
262268 if (pred_empty (Succ) &&
263- Succ != &Succ->getParent ()->getEntryBlock ()) {
269+ Succ != &Succ->getParent ()->getEntryBlock () && NVVMReflectDCE ) {
264270 SetVector<BasicBlock *> TransitivelyDead =
265271 findTransitivelyDeadBlocks (Succ);
266272 DeadBlocks.insert (TransitivelyDead.begin (),
0 commit comments