Skip to content

Commit 9f2190b

Browse files
committed
bump default SM for CUDA to 75
1 parent 0ff23e8 commit 9f2190b

File tree

9 files changed

+20
-19
lines changed

9 files changed

+20
-19
lines changed

clang/include/clang/Basic/OffloadArch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ enum class OffloadArch {
167167
LNL_M,
168168
LAST,
169169

170-
CudaDefault = OffloadArch::SM_52,
170+
CudaDefault = OffloadArch::SM_75,
171171
HIPDefault = OffloadArch::GFX906,
172172
};
173173

clang/lib/Driver/Driver.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5927,11 +5927,11 @@ class OffloadingActionBuilder final {
59275927

59285928
// Handle defaults architectures
59295929
for (auto &Triple : SYCLTripleList) {
5930-
// For NVIDIA use SM_50 as a default
5930+
// For NVIDIA use SM_75 as a default
59315931
if (Triple.isNVPTX() && llvm::none_of(GpuArchList, [&](auto &P) {
59325932
return P.first.isNVPTX();
59335933
})) {
5934-
const char *DefaultArch = OffloadArchToString(OffloadArch::SM_50);
5934+
const char *DefaultArch = OffloadArchToString(OffloadArch::SM_75);
59355935
GpuArchList.emplace_back(Triple, DefaultArch);
59365936
}
59375937

@@ -7650,7 +7650,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
76507650
// The default arch is set for NVPTX if not provided. For AMDGPU, emit
76517651
// an error as the user is responsible to set the arch.
76527652
if (TC.getTriple().isNVPTX())
7653-
Archs.insert(OffloadArchToString(OffloadArch::SM_50));
7653+
Archs.insert(OffloadArchToString(OffloadArch::SM_75));
76547654
else if (TC.getTriple().isAMDGPU())
76557655
C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch)
76567656
<< 1 << TC.getTriple().str();

libdevice/cmake/modules/SYCLLibdevice.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ set(imf_build_archs)
9797
if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
9898
list(APPEND full_build_archs nvptx64-nvidia-cuda)
9999
set(compile_opts_nvptx64-nvidia-cuda "-fsycl-targets=nvptx64-nvidia-cuda"
100-
"-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib" "-fno-sycl-libspirv" "-Wno-unsafe-libspirv-not-linked")
100+
"-Xsycl-target-backend" "--cuda-gpu-arch=sm_75" "-fno-sycl-libspirv" "-Wno-unsafe-libspirv-not-linked")
101101
set(opt_flags_nvptx64-nvidia-cuda "-O3" "--nvvm-reflect-enable=false")
102102
endif()
103103
if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)

mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ struct GPUToNVVMPipelineOptions
2828
llvm::cl::init("nvptx64-nvidia-cuda")};
2929
PassOptions::Option<std::string> cubinChip{
3030
*this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."),
31-
llvm::cl::init("sm_50")};
31+
llvm::cl::init("sm_75")};
3232
PassOptions::Option<std::string> cubinFeatures{
3333
*this, "cubin-features",
3434
llvm::cl::desc("Features to use to serialize to cubin."),
35-
llvm::cl::init("+ptx60")};
35+
llvm::cl::init("+ptx63")};
3636
PassOptions::Option<std::string> cubinFormat{
3737
*this, "cubin-format",
3838
llvm::cl::desc("Compilation format to use to serialize to cubin."),

mlir/include/mlir/Dialect/GPU/Transforms/Passes.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,10 @@ def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
143143
/*default=*/ "\"nvptx64-nvidia-cuda\"",
144144
"Target triple.">,
145145
Option<"chip", "chip", "std::string",
146-
/*default=*/"\"sm_50\"",
146+
/*default=*/"\"sm_75\"",
147147
"Target chip.">,
148148
Option<"features", "features", "std::string",
149-
/*default=*/"\"+ptx60\"",
149+
/*default=*/"\"+ptx63\"",
150150
"Target features.">,
151151
Option<"optLevel", "O", "unsigned",
152152
/*default=*/"2",

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4697,8 +4697,8 @@ def NVVM_TargetAttr : NVVM_Attr<"NVVMTarget", "target",
46974697
let parameters = (ins
46984698
DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O,
46994699
StringRefParameter<"Target triple.", "\"nvptx64-nvidia-cuda\"">:$triple,
4700-
StringRefParameter<"Target chip.", "\"sm_50\"">:$chip,
4701-
StringRefParameter<"Target chip features.", "\"+ptx60\"">:$features,
4700+
StringRefParameter<"Target chip.", "\"sm_75\"">:$chip,
4701+
StringRefParameter<"Target chip features.", "\"+ptx63\"">:$features,
47024702
OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags,
47034703
OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link,
47044704
DefaultValuedParameter<"bool", "true", "Perform SM version check on Ops.">:$verifyTarget
@@ -4709,8 +4709,8 @@ def NVVM_TargetAttr : NVVM_Attr<"NVVMTarget", "target",
47094709
let builders = [
47104710
AttrBuilder<(ins CArg<"int", "2">:$optLevel,
47114711
CArg<"StringRef", "\"nvptx64-nvidia-cuda\"">:$triple,
4712-
CArg<"StringRef", "\"sm_50\"">:$chip,
4713-
CArg<"StringRef", "\"+ptx60\"">:$features,
4712+
CArg<"StringRef", "\"sm_75\"">:$chip,
4713+
CArg<"StringRef", "\"+ptx63\"">:$features,
47144714
CArg<"DictionaryAttr", "nullptr">:$targetFlags,
47154715
CArg<"ArrayAttr", "nullptr">:$linkFiles,
47164716
CArg<"bool", "true">:$verifyTarget), [{

mlir/test/python/dialects/gpu/dialect.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ def testObjectAttr():
5151
print(o)
5252

5353
object = (
54-
b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.0\n.target sm_50"
54+
b"//\n// Generated by LLVM NVPTX Back-End\n//\n\n.version 6.3\n.target sm_75"
5555
)
5656
o = gpu.ObjectAttr.get(target, format, object)
57-
# CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50">
57+
# CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.3\0A.target sm_75">
5858
print(o)
5959
assert o.object == object
6060

sycl-jit/jit-compiler/lib/translation/Translation.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,13 +197,14 @@ std::pair<std::string, std::string> Translator::getTargetCPUAndFeatureAttrs(
197197
if (CPU.empty()) {
198198
// Set to the lowest tested target according to the GetStartedGuide, section
199199
// "Build DPC++ toolchain with support for HIP AMD"
200-
CPU = Format == BinaryFormat::AMDGCN ? "gfx90a" : "sm_50";
200+
CPU = Format == BinaryFormat::AMDGCN ? "gfx90a" : "sm_75";
201201
if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) {
202202
CPU = KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString();
203203
}
204204
}
205205
if (Features.empty()) {
206-
Features = Format == BinaryFormat::PTX ? "+sm_50,+ptx76" : "";
206+
// Turing architecture + PTX 6.3
207+
Features = Format == BinaryFormat::PTX ? "+sm_75,+ptx63" : "";
207208
if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) {
208209
Features = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE)
209210
.getValueAsString();

sycl/doc/GetStartedGuide.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,8 @@ extensions that require sm_80 and later architectures also require at least CUDA
198198
11.0.
199199

200200
The CUDA backend should work on Windows or Linux operating systems with any GPU
201-
with compute capability (SM version) sm_50 or above. The default SM version for
202-
the NVIDIA CUDA backend is sm_50. Users of sm_3X devices can attempt to specify
201+
with compute capability (SM version) sm_75 or above. The default SM version for
202+
the NVIDIA CUDA backend is sm_75. Users of sm_3X devices can attempt to specify
203203
the target architecture [ahead of time](#aot-target-architectures), provided
204204
that they use a 11.X or earlier CUDA toolkit version, but some features may not be
205205
supported. The CUDA backend has been tested with different Ubuntu Linux

0 commit comments

Comments
 (0)