Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ def hasVote : Predicate<"Subtarget->hasVote()">;
def hasDouble : Predicate<"Subtarget->hasDouble()">;
def hasLDG : Predicate<"Subtarget->hasLDG()">;
def hasLDU : Predicate<"Subtarget->hasLDU()">;
def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">;
def noPTXASUnreachableBug : Predicate<"!Subtarget->hasPTXASUnreachableBug()">;

def doF32FTZ : Predicate<"useF32FTZ()">;
def doNoF32FTZ : Predicate<"!useF32FTZ()">;
Expand Down Expand Up @@ -3736,9 +3738,10 @@ def Callseq_End :
[(callseq_end timm:$amt1, timm:$amt2)]>;

// trap instruction
def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>, Requires<[noPTXASUnreachableBug]>;
// Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
// This won't be necessary in a future version of ptxas.
def trapinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>;
def trapexitinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>;
// brkpt instruction
def debugtrapinst : NVPTXInst<(outs), (ins), "brkpt;", [(debugtrap)]>;

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool hasDotInstructions() const {
return SmVersion >= 61 && PTXVersion >= 50;
}
bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,13 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createSROAPass());
}

const auto &Options = getNVPTXTargetMachine().Options;
addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
Options.NoTrapAfterNoreturn));
if (ST.hasPTXASUnreachableBug()) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Topic for discussion: should this be a CLI option?

I added the check for PTX Version <= 7.4 because it's the best proxy for querying whether ptxas comes from CUDA 11.4 or earlier I could find. (The highest version CUDA 11.4 ptxas supports is 7.4.)

Alternatively, we could put the onus on the user to set a flag indicating that they're using a copy of ptxas from CUDA 11.4 or prior.

CC @Artem-B for comment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For PTX > 8.3 it's not needed.

For PTX versions older than that, we should assume that it may be given to the ptxas with a bug. The problem is that we make the decision on how to generate code at the build time, but we may not know which ptxas will be used until the run time, in general case. We should conservatively assume that we do need to produce trap; exit; in that case.

If we eventually find a legitimate use case for such an option, we can add it then. For now, I do not see much point.

// Run LowerUnreachable to WAR a ptxas bug. See the commit description of
// 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details.
const auto &Options = getNVPTXTargetMachine().Options;
addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
Options.NoTrapAfterNoreturn));
}
}

bool NVPTXPassConfig::addInstSelector() {
Expand Down
23 changes: 15 additions & 8 deletions llvm/test/CodeGen/NVPTX/unreachable.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOTRAP
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-TRAP
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \
; RUN: | FileCheck %s --check-prefix=CHECK-BUG-FIXED
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}

Expand All @@ -24,25 +26,30 @@ define void @kernel_func() {
call void @throw()
; CHECK-TRAP-NOT: exit;
; CHECK-TRAP: trap;

; CHECK-NOTRAP-NOT: trap;
; CHECK: exit;

; CHECK-BUG-FIXED-NOT: exit;
; CHECK-BUG-FIXED: trap;
unreachable
}

; CHECK-LABEL: kernel_func_2
define void @kernel_func_2() {
; CHECK: trap; exit;
; CHECK-BUG-FIXED: trap;
; CHECK-BUG-FIXED-NOT: trap; exit;
call void @llvm.trap()

;; Make sure we avoid emitting two trap instructions.
; CHECK-NOT: trap;
; CHECK-NOT: exit;
; CHECK-BUG-FIXED-NOT: trap;
unreachable
}

attributes #0 = { noreturn }


!nvvm.annotations = !{!1}

!1 = !{ptr @kernel_func, !"kernel", i32 1}
Loading