File tree Expand file tree Collapse file tree 8 files changed +37
-23
lines changed Expand file tree Collapse file tree 8 files changed +37
-23
lines changed Original file line number Diff line number Diff line change @@ -34,19 +34,18 @@ void NVPTXSubtarget::anchor() {}
3434
3535NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies (StringRef CPU,
3636 StringRef FS) {
37- // Provide the default CPU if we don't have one.
38- TargetName = std::string (CPU.empty () ? " sm_30" : CPU);
37+ TargetName = std::string (CPU);
3938
40- ParseSubtargetFeatures (TargetName , /* TuneCPU*/ TargetName , FS);
39+ ParseSubtargetFeatures (getTargetName () , /* TuneCPU= */ getTargetName () , FS);
4140
42- // Re-map SM version numbers, SmVersion carries the regular SMs which do
43- // have relative order, while FullSmVersion allows distinguishing sm_90 from
44- // sm_90a, which would *not* be a subset of sm_91.
45- SmVersion = getSmVersion ();
41+ // Re-map SM version numbers, SmVersion carries the regular SMs which do
42+ // have relative order, while FullSmVersion allows distinguishing sm_90 from
43+ // sm_90a, which would *not* be a subset of sm_91.
44+ SmVersion = getSmVersion ();
4645
47- // Set default to PTX 6.0 (CUDA 9.0)
48- if (PTXVersion == 0 ) {
49- PTXVersion = 60 ;
46+ // Set default to PTX 6.0 (CUDA 9.0)
47+ if (PTXVersion == 0 ) {
48+ PTXVersion = 60 ;
5049 }
5150
5251 return *this ;
Original file line number Diff line number Diff line change @@ -111,7 +111,12 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
111111 // - 0 represents base GPU model,
112112 // - non-zero value identifies particular architecture-accelerated variant.
113113 bool hasAAFeatures () const { return getFullSmVersion () % 10 ; }
114- std::string getTargetName () const { return TargetName; }
114+
115+ // If the user did not provide a target we default to the `sm_30` target.
116+ std::string getTargetName () const {
117+ return TargetName.empty () ? " sm_30" : TargetName;
118+ }
119+ bool hasTargetName () const { return !TargetName.empty (); }
115120
116121 // Get maximum value of required alignments among the supported data types.
117122 // From the PTX ISA doc, section 8.2.3:
Original file line number Diff line number Diff line change @@ -255,7 +255,10 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
255255 PB.registerPipelineStartEPCallback (
256256 [this ](ModulePassManager &PM, OptimizationLevel Level) {
257257 FunctionPassManager FPM;
258- FPM.addPass (NVVMReflectPass (Subtarget.getSmVersion ()));
258+ // We do not want to fold out calls to nvvm.reflect early if the user
259+ // has not provided a target architecture just yet.
260+ if (Subtarget.hasTargetName ())
261+ FPM.addPass (NVVMReflectPass (Subtarget.getSmVersion ()));
259262 // Note: NVVMIntrRangePass was causing numerical discrepancies at one
260263 // point, if issues crop up, consider disabling.
261264 FPM.addPass (NVVMIntrRangePass ());
Original file line number Diff line number Diff line change 2121#include " NVPTX.h"
2222#include " llvm/ADT/SmallVector.h"
2323#include " llvm/Analysis/ConstantFolding.h"
24+ #include " llvm/CodeGen/CommandFlags.h"
2425#include " llvm/IR/Constants.h"
2526#include " llvm/IR/DerivedTypes.h"
2627#include " llvm/IR/Function.h"
@@ -219,7 +220,12 @@ bool NVVMReflect::runOnFunction(Function &F) {
219220 return runNVVMReflect (F, SmVersion);
220221}
221222
222- NVVMReflectPass::NVVMReflectPass () : NVVMReflectPass(0 ) {}
223+ NVVMReflectPass::NVVMReflectPass () {
224+ // Get the CPU string from the command line if not provided.
225+ StringRef SM = codegen::getMCPU ();
226+ if (!SM.consume_front (" sm_" ) || SM.consumeInteger (10 , SmVersion))
227+ SmVersion = 0 ;
228+ }
223229
224230PreservedAnalyses NVVMReflectPass::run (Function &F,
225231 FunctionAnalysisManager &AM) {
Original file line number Diff line number Diff line change 11; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
22; Verify that __nvvm_reflect() is replaced with an appropriate value.
33;
4- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_20 \
4+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_20 \
55; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
6- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_35 \
6+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_35 \
77; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
88
99@"$str" = private addrspace (1 ) constant [12 x i8 ] c "__CUDA_ARCH\00 "
Original file line number Diff line number Diff line change 11; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
22;
3- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_20 \
3+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_20 \
44; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
5- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_35 \
5+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_35 \
66; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
77
88@"$str" = private addrspace (4 ) constant [12 x i8 ] c "__CUDA_ARCH\00 "
Original file line number Diff line number Diff line change 33
44; RUN: cat %s > %t.noftz
55; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6- ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
6+ ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
77; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
88
99; RUN: cat %s > %t.ftz
1010; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11- ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
11+ ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
1212; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
1313
1414@str = private unnamed_addr addrspace (4 ) constant [11 x i8 ] c "__CUDA_FTZ\00 "
4343
4444declare i32 @llvm.nvvm.reflect (ptr )
4545
46- ; CHECK-LABEL: define noundef i32 @intrinsic
46+ ; CHECK-LABEL: define i32 @intrinsic
4747define i32 @intrinsic () {
4848; CHECK-NOT: call i32 @llvm.nvvm.reflect
4949; USE_FTZ_0: ret i32 0
Original file line number Diff line number Diff line change 33
44; RUN: cat %s > %t.noftz
55; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6- ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
6+ ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
77; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
88
99; RUN: cat %s > %t.ftz
1010; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11- ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
11+ ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
1212; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
1313
1414@str = private unnamed_addr addrspace (4 ) constant [11 x i8 ] c "__CUDA_FTZ\00 "
4343
4444declare i32 @llvm.nvvm.reflect (ptr )
4545
46- ; CHECK-LABEL: define noundef i32 @intrinsic
46+ ; CHECK-LABEL: define i32 @intrinsic
47+
4748define i32 @intrinsic () {
4849; CHECK-NOT: call i32 @llvm.nvvm.reflect
4950; USE_FTZ_0: ret i32 0
You can’t perform that action at this time.
0 commit comments