Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ static cl::opt<bool>
EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden,
cl::desc("Enable reciprocal sqrt optimization"));

// FIXME: This is a WAR to recover lost performance from #155024.
// We still need to investigate the regression and find a more permanent
// solution.
static cl::opt<bool> EnableMADWide("nvptx-mad-wide-opt", cl::init(false),
cl::Hidden,
cl::desc("Enable MAD wide optimization"));

/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
Expand Down Expand Up @@ -84,6 +91,8 @@ bool NVPTXDAGToDAGISel::allowFMA() const {

bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }

bool NVPTXDAGToDAGISel::doMADWideOpt() const { return EnableMADWide; }

/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
void NVPTXDAGToDAGISel::Select(SDNode *N) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool useF32FTZ() const;
bool allowFMA() const;
bool doRsqrtOpt() const;
bool doMADWideOpt() const;

NVPTXScopes Scopes{};

Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def hasArchAccelFeatures : Predicate<"Subtarget->hasArchAccelFeatures()">;
def doF32FTZ : Predicate<"useF32FTZ()">;
def doNoF32FTZ : Predicate<"!useF32FTZ()">;
def doRsqrtOpt : Predicate<"doRsqrtOpt()">;
def doMADWideOpt : Predicate<"doMADWideOpt()">;

def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">;
def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
Expand Down Expand Up @@ -899,8 +900,15 @@ let Predicates = [hasOptEnabled] in {
defm MAD_LO_S32 : MADInst<"lo.s32", mul, I32RT, I32RT>;
defm MAD_LO_S64 : MADInst<"lo.s64", mul, I64RT, I64RT>;

// Generating mad.wide causes a regression:
// Generating mad.wide causes a regression in some cases:
// https://github.com/llvm/llvm-project/pull/150477#issuecomment-3191367837
// Only do so when the user requests it.
let Predicates = [doMADWideOpt] in {
defm MAD_WIDE_U16 : MADInst<"wide.u16", umul_wide, I32RT, I16RT>;
defm MAD_WIDE_S16 : MADInst<"wide.s16", smul_wide, I32RT, I16RT>;
defm MAD_WIDE_U32 : MADInst<"wide.u32", umul_wide, I64RT, I32RT>;
defm MAD_WIDE_S32 : MADInst<"wide.s32", smul_wide, I64RT, I32RT>;
}
}

//-----------------------------------
Expand Down
Loading