File tree Expand file tree Collapse file tree 6 files changed +18
-12
lines changed Expand file tree Collapse file tree 6 files changed +18
-12
lines changed Original file line number Diff line number Diff line change @@ -255,7 +255,6 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
255255 PB.registerPipelineStartEPCallback (
256256 [this ](ModulePassManager &PM, OptimizationLevel Level) {
257257 FunctionPassManager FPM;
258- FPM.addPass (NVVMReflectPass (Subtarget.getSmVersion ()));
259258 // Note: NVVMIntrRangePass was causing numerical discrepancies at one
260259 // point, if issues crop up, consider disabling.
261260 FPM.addPass (NVVMIntrRangePass ());
Original file line number Diff line number Diff line change 2121#include " NVPTX.h"
2222#include " llvm/ADT/SmallVector.h"
2323#include " llvm/Analysis/ConstantFolding.h"
24+ #include " llvm/CodeGen/CommandFlags.h"
2425#include " llvm/IR/Constants.h"
2526#include " llvm/IR/DerivedTypes.h"
2627#include " llvm/IR/Function.h"
@@ -219,7 +220,12 @@ bool NVVMReflect::runOnFunction(Function &F) {
219220 return runNVVMReflect (F, SmVersion);
220221}
221222
222- NVVMReflectPass::NVVMReflectPass () : NVVMReflectPass(0 ) {}
223+ NVVMReflectPass::NVVMReflectPass () {
224+ // Get the CPU string from the command line if not provided.
225+ StringRef SM = codegen::getMCPU ();
226+ if (!SM.consume_front (" sm_" ) || SM.consumeInteger (10 , SmVersion))
227+ SmVersion = 0 ;
228+ }
223229
224230PreservedAnalyses NVVMReflectPass::run (Function &F,
225231 FunctionAnalysisManager &AM) {
Original file line number Diff line number Diff line change 11; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type.
22; Verify that __nvvm_reflect() is replaced with an appropriate value.
33;
4- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_20 \
4+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_20 \
55; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
6- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_35 \
6+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_35 \
77; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
88
99@"$str" = private addrspace (1 ) constant [12 x i8 ] c "__CUDA_ARCH\00 "
Original file line number Diff line number Diff line change 11; Verify that __nvvm_reflect_ocl() is replaced with an appropriate value
22;
3- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_20 \
3+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_20 \
44; RUN: | FileCheck %s --check-prefixes=COMMON,SM20
5- ; RUN: opt %s -S -passes='default<O2> ' -mtriple=nvptx64 -mcpu=sm_35 \
5+ ; RUN: opt %s -S -passes='nvvm-reflect ' -mtriple=nvptx64 -mcpu=sm_35 \
66; RUN: | FileCheck %s --check-prefixes=COMMON,SM35
77
88@"$str" = private addrspace (4 ) constant [12 x i8 ] c "__CUDA_ARCH\00 "
Original file line number Diff line number Diff line change 33
44; RUN: cat %s > %t.noftz
55; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6- ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
6+ ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
77; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
88
99; RUN: cat %s > %t.ftz
1010; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11- ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
11+ ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
1212; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
1313
1414@str = private unnamed_addr addrspace (4 ) constant [11 x i8 ] c "__CUDA_FTZ\00 "
4343
4444declare i32 @llvm.nvvm.reflect (ptr )
4545
46- ; CHECK-LABEL: define noundef i32 @intrinsic
46+ ; CHECK-LABEL: define i32 @intrinsic
4747define i32 @intrinsic () {
4848; CHECK-NOT: call i32 @llvm.nvvm.reflect
4949; USE_FTZ_0: ret i32 0
Original file line number Diff line number Diff line change 33
44; RUN: cat %s > %t.noftz
55; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
6- ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
6+ ; RUN: opt %t.noftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
77; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
88
99; RUN: cat %s > %t.ftz
1010; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
11- ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='default<O2> ' \
11+ ; RUN: opt %t.ftz -S -mtriple=nvptx-nvidia-cuda -passes='nvvm-reflect,simplifycfg ' \
1212; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
1313
1414@str = private unnamed_addr addrspace (4 ) constant [11 x i8 ] c "__CUDA_FTZ\00 "
4343
4444declare i32 @llvm.nvvm.reflect (ptr )
4545
46- ; CHECK-LABEL: define noundef i32 @intrinsic
46+ ; CHECK-LABEL: define i32 @intrinsic
47+
4748define i32 @intrinsic () {
4849; CHECK-NOT: call i32 @llvm.nvvm.reflect
4950; USE_FTZ_0: ret i32 0
You can’t perform that action at this time.
0 commit comments