File tree Expand file tree Collapse file tree 8 files changed +23
-37
lines changed Expand file tree Collapse file tree 8 files changed +23
-37
lines changed Original file line number Diff line number Diff line change @@ -34,18 +34,19 @@ void NVPTXSubtarget::anchor() {}
3434
3535NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies (StringRef CPU,
3636 StringRef FS) {
37- TargetName = std::string (CPU);
37+ // Provide the default CPU if we don't have one.
38+ TargetName = std::string (CPU.empty () ? " sm_30" : CPU);
3839
39- ParseSubtargetFeatures (getTargetName () , /* TuneCPU= */ getTargetName () , FS);
40+ ParseSubtargetFeatures (TargetName , /* TuneCPU*/ TargetName , FS);
4041
41- // Re-map SM version numbers, SmVersion carries the regular SMs which do
42- // have relative order, while FullSmVersion allows distinguishing sm_90 from
43- // sm_90a, which would *not* be a subset of sm_91.
44- SmVersion = getSmVersion ();
42+ // Re-map SM version numbers, SmVersion carries the regular SMs which do
43+ // have relative order, while FullSmVersion allows distinguishing sm_90 from
44+ // sm_90a, which would *not* be a subset of sm_91.
45+ SmVersion = getSmVersion ();
4546
46- // Set default to PTX 6.0 (CUDA 9.0)
47- if (PTXVersion == 0 ) {
48- PTXVersion = 60 ;
47+ // Set default to PTX 6.0 (CUDA 9.0)
48+ if (PTXVersion == 0 ) {
49+ PTXVersion = 60 ;
4950 }
5051
5152 return *this ;
Original file line number Diff line number Diff line change @@ -111,12 +111,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
111111 // - 0 represents base GPU model,
112112 // - non-zero value identifies particular architecture-accelerated variant.
113113 bool hasAAFeatures () const { return getFullSmVersion () % 10 ; }
114-
115- // If the user did not provide a target we default to the `sm_30` target.
116- std::string getTargetName () const {
117- return TargetName.empty () ? " sm_30" : TargetName;
118- }
119- bool hasTargetName () const { return !TargetName.empty (); }
114+ std::string getTargetName () const { return TargetName; }
120115
121116 bool hasNativeBF16Support (int Opcode) const ;
122117
Original file line number Diff line number Diff line change @@ -255,10 +255,7 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
255255 PB.registerPipelineStartEPCallback (
256256 [this ](ModulePassManager &PM, OptimizationLevel Level) {
257257 FunctionPassManager FPM;
258- // We do not want to fold out calls to nvvm.reflect early if the user
259- // has not provided a target architecture just yet.
260- if (Subtarget.hasTargetName ())
261- FPM.addPass (NVVMReflectPass (Subtarget.getSmVersion ()));
258+ FPM.addPass (NVVMReflectPass (Subtarget.getSmVersion ()));
262259 // Note: NVVMIntrRangePass was causing numerical discrepancies at one
263260 // point, if issues crop up, consider disabling.
264261 FPM.addPass (NVVMIntrRangePass ());
Original file line number Diff line number Diff line change 2121#include " NVPTX.h"
2222#include " llvm/ADT/SmallVector.h"
2323#include " llvm/Analysis/ConstantFolding.h"
24- #include " llvm/CodeGen/CommandFlags.h"
2524#include " llvm/IR/Constants.h"
2625#include " llvm/IR/DerivedTypes.h"
2726#include " llvm/IR/Function.h"
@@ -220,12 +219,7 @@ bool NVVMReflect::runOnFunction(Function &F) {
220219 return runNVVMReflect (F, SmVersion);
221220}
222221
223- NVVMReflectPass::NVVMReflectPass () {
224- // Get the CPU string from the command line if not provided.
225- StringRef SM = codegen::getMCPU ();
226- if (!SM.consume_front (" sm_" ) || SM.consumeInteger (10 , SmVersion))
227- SmVersion = 0 ;
228- }
222+ NVVMReflectPass::NVVMReflectPass () : NVVMReflectPass(0 ) {}
229223
230224PreservedAnalyses NVVMReflectPass::run (Function &F,
231225 FunctionAnalysisManager &AM) {
Original file line number Diff line number Diff line change 11; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
22; Verify that __nvvm_reflect() is replaced with an appropriate value.
33;
4- ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_20 \
4+ ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_20 \
55; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
6- ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_35 \
6+ ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_35 \
77; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
88
99@"$str" = private addrspace (1 ) constant [12 x i8 ] c "__CUDA_ARCH\00 "
Original file line number Diff line number Diff line change 11; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
22;
3- ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_20 \
3+ ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_20 \
44; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
5- ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_35 \
5+ ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_35 \
66; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
77
88@"$str" = private addrspace (4 ) constant [12 x i8 ] c "__CUDA_ARCH\00 "
Original file line number Diff line number Diff line change 33
44; RUN: cat %s > %t.noftz
55; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6- ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
6+ ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
77; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
88
99; RUN: cat %s > %t.ftz
1010; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11- ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
11+ ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
1212; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
1313
1414@str = private unnamed_addr addrspace (4 ) constant [11 x i8 ] c "__CUDA_FTZ\00 "
4343
4444declare i32 @llvm.nvvm.reflect (ptr )
4545
46- ; CHECK-LABEL: define i32 @intrinsic
46+ ; CHECK-LABEL: define noundef i32 @intrinsic
4747define i32 @intrinsic () {
4848; CHECK-NOT: call i32 @llvm.nvvm.reflect
4949; USE_FTZ_0: ret i32 0
Original file line number Diff line number Diff line change 33
44; RUN: cat %s > %t.noftz
55; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6- ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
6+ ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
77; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
88
99; RUN: cat %s > %t.ftz
1010; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11- ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
11+ ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
1212; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
1313
1414@str = private unnamed_addr addrspace (4 ) constant [11 x i8 ] c "__CUDA_FTZ\00 "
4343
4444declare i32 @llvm.nvvm.reflect (ptr )
4545
46- ; CHECK-LABEL: define i32 @intrinsic
47-
46+ ; CHECK-LABEL: define noundef i32 @intrinsic
4847define i32 @intrinsic () {
4948; CHECK-NOT: call i32 @llvm.nvvm.reflect
5049; USE_FTZ_0: ret i32 0
You can’t perform that action at this time.
0 commit comments