Skip to content

Commit 32c5c40

Browse files
committed
NVVMReflectDCE option
1 parent 4ba9826 commit 32c5c40

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

llvm/lib/Target/NVPTX/NVVMReflect.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ ModulePass *llvm::createNVVMReflectPass(unsigned SmVersion) {
8888
}
8989

9090
static cl::opt<bool>
91-
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
91+
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(false), cl::Hidden,
9292
cl::desc("NVVM reflection, enabled by default"));
9393

9494
char NVVMReflectLegacyPass::ID = 0;
@@ -105,6 +105,10 @@ static cl::list<std::string> ReflectList(
105105
cl::desc("A key=value pair. Replace __nvvm_reflect(name) with value."),
106106
cl::ValueRequired);
107107

108+
static cl::opt<bool> NVVMReflectDCE("nvvm-reflect-dce", cl::init(false),
109+
cl::Hidden,
110+
cl::desc("Delete dead blocks introduced by reflect call elimination"));
111+
108112
// Set the ReflectMap with, first, the value of __CUDA_FTZ from module metadata,
109113
// and then the key/value pairs from the command line.
110114
void NVVMReflect::populateReflectMap(Module &M) {
@@ -241,8 +245,9 @@ void NVVMReflect::replaceReflectCalls(
241245

242246
for (auto &[Call, NewValue] : ReflectReplacements)
243247
ReplaceInstructionWithConst(Call, NewValue);
244-
245-
// Alternate between constant folding/propagation and dead block elimination.
248+
249+
// Constant fold reflect results. If NVVMReflectDCE is enabled, we will
250+
// alternate between constant folding/propagation and dead block elimination.
246251
// Terminator folding may create new dead blocks. When those dead blocks are
247252
// deleted, their live successors may have PHIs that can be simplified, which
248253
// may yield more work for folding/propagation.
@@ -256,11 +261,12 @@ void NVVMReflect::replaceReflectCalls(
256261
BasicBlock *BB = I->getParent();
257262
SmallVector<BasicBlock *, 8> Succs(successors(BB));
258263
// Some blocks may become dead if the terminator is folded because
259-
// a conditional branch is turned into a direct branch.
264+
// a conditional branch is turned into a direct branch. Add those dead blocks
265+
// to the dead blocks set if NVVMReflectDCE is enabled.
260266
if (ConstantFoldTerminator(BB)) {
261267
for (BasicBlock *Succ : Succs) {
262268
if (pred_empty(Succ) &&
263-
Succ != &Succ->getParent()->getEntryBlock()) {
269+
Succ != &Succ->getParent()->getEntryBlock() && NVVMReflectDCE) {
264270
SetVector<BasicBlock *> TransitivelyDead =
265271
findTransitivelyDeadBlocks(Succ);
266272
DeadBlocks.insert(TransitivelyDead.begin(),

0 commit comments

Comments
 (0)