Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ void NVPTXSubtarget::anchor() {}
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
// Provide the default CPU if we don't have one.
TargetName = std::string(CPU.empty() ? "sm_30" : CPU);
TargetName = std::string(CPU);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could use a comment on why we may want to keep CPU empty in some cases.


ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
ParseSubtargetFeatures(CPU.empty() ? "sm_30" : CPU,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CPU.empty() ? "sm_30" : CPU -> getTargetName()

/*TuneCPU=*/CPU.empty() ? "sm_30" : CPU, FS);

// Re-map SM version numbers, SmVersion carries the regular SMs which do
// have relative order, while FullSmVersion allows distinguishing sm_90 from
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// - 0 represents base GPU model,
// - non-zero value identifies particular architecture-accelerated variant.
bool hasAAFeatures() const { return getFullSmVersion() % 10; }
std::string getTargetName() const { return TargetName; }
std::string getTargetName() const {
return TargetName.empty() ? "sm_30" : TargetName;
}
bool hasTargetName() const { return !TargetName.empty(); }

// Get maximum value of required alignments among the supported data types.
// From the PTX ISA doc, section 8.2.3:
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,10 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
// We do not want to fold out calls to nvvm.reflect early if the user
// has not provided a target architecture just yet.
if (Subtarget.hasTargetName())
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
// Note: NVVMIntrRangePass was causing numerical discrepancies at one
// point, if issues crop up, consider disabling.
FPM.addPass(NVVMIntrRangePass());
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/NVPTX/NVVMReflect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "NVPTX.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
Expand Down Expand Up @@ -219,7 +220,12 @@ bool NVVMReflect::runOnFunction(Function &F) {
return runNVVMReflect(F, SmVersion);
}

NVVMReflectPass::NVVMReflectPass() : NVVMReflectPass(0) {}
NVVMReflectPass::NVVMReflectPass() {
// Get the CPU string from the command line if not provided.
StringRef SM = codegen::getMCPU();
if (!SM.consume_front("sm_") || SM.consumeInteger(10, SmVersion))
SmVersion = 0;
}

PreservedAnalyses NVVMReflectPass::run(Function &F,
FunctionAnalysisManager &AM) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
; Verify that __nvvm_reflect() is replaced with an appropriate value.
;
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35

@"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect-ocl.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
;
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_20 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
; RUN: opt %s -S -passes='default<O2>' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: opt %s -S -passes='nvvm-reflect' -mtriple=nvptx64 -mcpu=sm_35 \
; RUN: | FileCheck %s --check-prefixes=COMMON,SM35

@"$str" = private addrspace(4) constant [12 x i8] c"__CUDA_ARCH\00"
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect-opaque.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK

; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK

@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
Expand Down Expand Up @@ -43,7 +43,7 @@ exit:

declare i32 @llvm.nvvm.reflect(ptr)

; CHECK-LABEL: define noundef i32 @intrinsic
; CHECK-LABEL: define i32 @intrinsic
define i32 @intrinsic() {
; CHECK-NOT: call i32 @llvm.nvvm.reflect
; USE_FTZ_0: ret i32 0
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK

; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2>' \
; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg' \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK

@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
Expand Down Expand Up @@ -43,7 +43,8 @@ exit:

declare i32 @llvm.nvvm.reflect(ptr)

; CHECK-LABEL: define noundef i32 @intrinsic
; CHECK-LABEL: define i32 @intrinsic

define i32 @intrinsic() {
; CHECK-NOT: call i32 @llvm.nvvm.reflect
; USE_FTZ_0: ret i32 0
Expand Down
Loading