diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 8f4eddb514274..8b34ce4f1001c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -139,6 +139,8 @@ def hasVote : Predicate<"Subtarget->hasVote()">; def hasDouble : Predicate<"Subtarget->hasDouble()">; def hasLDG : Predicate<"Subtarget->hasLDG()">; def hasLDU : Predicate<"Subtarget->hasLDU()">; +def hasPTXASUnreachableBug : Predicate<"Subtarget->hasPTXASUnreachableBug()">; +def noPTXASUnreachableBug : Predicate<"!Subtarget->hasPTXASUnreachableBug()">; def doF32FTZ : Predicate<"useF32FTZ()">; def doNoF32FTZ : Predicate<"!useF32FTZ()">; @@ -3736,9 +3738,10 @@ def Callseq_End : [(callseq_end timm:$amt1, timm:$amt2)]>; // trap instruction +def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>, Requires<[noPTXASUnreachableBug]>; // Emit an `exit` as well to convey to ptxas that `trap` exits the CFG. // This won't be necessary in a future version of ptxas. -def trapinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>; +def trapexitinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[hasPTXASUnreachableBug]>; // brkpt instruction def debugtrapinst : NVPTXInst<(outs), (ins), "brkpt;", [(debugtrap)]>; diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 8b9059bd60cbd..e785bbf830da6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -95,6 +95,14 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { bool hasDotInstructions() const { return SmVersion >= 61 && PTXVersion >= 50; } + // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction + // terminates a basic block. Instead, it would assume that control flow + // continued to the next instruction. The next instruction could be in the + // block that's lexically below it. This would lead to a phantom CFG edges + // being created within ptxas. This issue was fixed in CUDA 12.3. Thus, when + // PTX ISA versions 8.3+ we can confidently say that the bug will not be + // present. + bool hasPTXASUnreachableBug() const { return PTXVersion < 83; } bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; } unsigned int getFullSmVersion() const { return FullSmVersion; } unsigned int getSmVersion() const { return getFullSmVersion() / 10; } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 57b7fa783c14a..b79b4ff93efe4 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -368,9 +368,13 @@ void NVPTXPassConfig::addIRPasses() { addPass(createSROAPass()); } - const auto &Options = getNVPTXTargetMachine().Options; - addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable, - Options.NoTrapAfterNoreturn)); + if (ST.hasPTXASUnreachableBug()) { + // Run LowerUnreachable to WAR a ptxas bug. See the commit description of + // 1ee4d880e8760256c606fe55b7af85a4f70d006d for more details. + const auto &Options = getNVPTXTargetMachine().Options; + addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable, + Options.NoTrapAfterNoreturn)); + } } bool NVPTXPassConfig::addInstSelector() { diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll index f9118900cb737..6bd583c8d50d8 100644 --- a/llvm/test/CodeGen/NVPTX/unreachable.ll +++ b/llvm/test/CodeGen/NVPTX/unreachable.ll @@ -1,48 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP +; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable=false \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP +; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-UNREACHABLE ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP +; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTRAP +; RUN: | FileCheck %s --check-prefixes=CHECK,NO-TRAP-AFTER-NORETURN ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP +; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -no-trap-after-noreturn=false \ -; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-TRAP +; RUN: | FileCheck %s --check-prefixes=CHECK,TRAP +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs -trap-unreachable -mattr=+ptx83 \ +; RUN: | FileCheck %s --check-prefixes=BUG-FIXED ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} -; CHECK: .extern .func throw +target triple = "nvptx-unknown-cuda" + declare void @throw() #0 declare void @llvm.trap() #0 -; CHECK-LABEL: .entry kernel_func define void @kernel_func() { -; CHECK: call.uni -; CHECK: throw, +; NO-TRAP-UNREACHABLE-LABEL: kernel_func( +; NO-TRAP-UNREACHABLE: { +; NO-TRAP-UNREACHABLE-EMPTY: +; NO-TRAP-UNREACHABLE-EMPTY: +; NO-TRAP-UNREACHABLE-NEXT: // %bb.0: +; NO-TRAP-UNREACHABLE-NEXT: { // callseq 0, 0 +; NO-TRAP-UNREACHABLE-NEXT: call.uni +; NO-TRAP-UNREACHABLE-NEXT: throw, +; NO-TRAP-UNREACHABLE-NEXT: ( +; NO-TRAP-UNREACHABLE-NEXT: ); +; NO-TRAP-UNREACHABLE-NEXT: } // callseq 0 +; NO-TRAP-UNREACHABLE-NEXT: // begin inline asm +; NO-TRAP-UNREACHABLE-NEXT: exit; +; NO-TRAP-UNREACHABLE-NEXT: // end inline asm +; +; NO-TRAP-AFTER-NORETURN-LABEL: kernel_func( +; NO-TRAP-AFTER-NORETURN: { +; NO-TRAP-AFTER-NORETURN-EMPTY: +; NO-TRAP-AFTER-NORETURN-EMPTY: +; NO-TRAP-AFTER-NORETURN-NEXT: // %bb.0: +; NO-TRAP-AFTER-NORETURN-NEXT: { // callseq 0, 0 +; NO-TRAP-AFTER-NORETURN-NEXT: call.uni +; NO-TRAP-AFTER-NORETURN-NEXT: throw, +; NO-TRAP-AFTER-NORETURN-NEXT: ( +; NO-TRAP-AFTER-NORETURN-NEXT: ); +; NO-TRAP-AFTER-NORETURN-NEXT: } // callseq 0 +; NO-TRAP-AFTER-NORETURN-NEXT: // begin inline asm +; NO-TRAP-AFTER-NORETURN-NEXT: exit; +; NO-TRAP-AFTER-NORETURN-NEXT: // end inline asm +; NO-TRAP-AFTER-NORETURN-NEXT: trap; exit; +; +; TRAP-LABEL: kernel_func( +; TRAP: { +; TRAP-EMPTY: +; TRAP-EMPTY: +; TRAP-NEXT: // %bb.0: +; TRAP-NEXT: { // callseq 0, 0 +; TRAP-NEXT: call.uni +; TRAP-NEXT: throw, +; TRAP-NEXT: ( +; TRAP-NEXT: ); +; TRAP-NEXT: } // callseq 0 +; TRAP-NEXT: trap; exit; +; +; BUG-FIXED-LABEL: kernel_func( +; BUG-FIXED: { +; BUG-FIXED-EMPTY: +; BUG-FIXED-EMPTY: +; BUG-FIXED-NEXT: // %bb.0: +; BUG-FIXED-NEXT: { // callseq 0, 0 +; BUG-FIXED-NEXT: call.uni +; BUG-FIXED-NEXT: throw, +; BUG-FIXED-NEXT: ( +; BUG-FIXED-NEXT: ); +; BUG-FIXED-NEXT: } // callseq 0 +; BUG-FIXED-NEXT: trap; call void @throw() -; CHECK-TRAP-NOT: exit; -; CHECK-TRAP: trap; -; CHECK-NOTRAP-NOT: trap; -; CHECK: exit; unreachable } -; CHECK-LABEL: kernel_func_2 define void @kernel_func_2() { -; CHECK: trap; exit; +; CHECK-LABEL: kernel_func_2( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: trap; exit; +; +; BUG-FIXED-LABEL: kernel_func_2( +; BUG-FIXED: { +; BUG-FIXED-EMPTY: +; BUG-FIXED-EMPTY: +; BUG-FIXED-NEXT: // %bb.0: +; BUG-FIXED-NEXT: trap; call void @llvm.trap() - -;; Make sure we avoid emitting two trap instructions. -; CHECK-NOT: trap; -; CHECK-NOT: exit; +; Make sure we avoid emitting two trap instructions. unreachable } attributes #0 = { noreturn } - !nvvm.annotations = !{!1} - !1 = !{ptr @kernel_func, !"kernel", i32 1}