Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions clang/include/clang/Basic/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ enum class CudaVersion {
CUDA_124,
CUDA_125,
CUDA_126,
CUDA_127,
CUDA_128,
FULLY_SUPPORTED = CUDA_123,
PARTIALLY_SUPPORTED =
CUDA_126, // Partially supported. Proceed with a warning.
Expand Down Expand Up @@ -80,6 +82,11 @@ enum class OffloadArch {
SM_90,
SM_90a,
SM_100,
SM_100a,
SM_101,
SM_101a,
SM_120,
SM_120a,
GFX600,
GFX601,
GFX602,
Expand Down
15 changes: 13 additions & 2 deletions clang/lib/Basic/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(12, 4),
CUDA_ENTRY(12, 5),
CUDA_ENTRY(12, 6),
CUDA_ENTRY(12, 7),
CUDA_ENTRY(12, 8),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
Expand Down Expand Up @@ -98,6 +100,11 @@ static const OffloadArchToStringMap arch_names[] = {
SM(90), // Hopper
SM(90a), // Hopper
SM(100), // Blackwell
SM(100a), // Blackwell
SM(101), // Blackwell
SM(101a), // Blackwell
SM(120), // Blackwell
SM(120a), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
Expand Down Expand Up @@ -226,8 +233,12 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
case OffloadArch::SM_90a:
return CudaVersion::CUDA_120;
case OffloadArch::SM_100:
return CudaVersion::NEW; // TODO: use specific CUDA version once it's
// public.
case OffloadArch::SM_100a:
case OffloadArch::SM_101:
case OffloadArch::SM_101a:
case OffloadArch::SM_120:
case OffloadArch::SM_120a:
return CudaVersion::CUDA_128;
default:
llvm_unreachable("invalid enum");
}
Expand Down
21 changes: 18 additions & 3 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,

if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
// Set __CUDA_ARCH__ for the GPU specified.
std::string CUDAArchCode = [this] {
llvm::StringRef CUDAArchCode = [this] {
switch (GPU) {
case OffloadArch::GFX600:
case OffloadArch::GFX601:
Expand Down Expand Up @@ -285,12 +285,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
return "900";
case OffloadArch::SM_100:
return "1000";
case OffloadArch::SM_101:
case OffloadArch::SM_101a:
return "1010";
case OffloadArch::SM_120:
case OffloadArch::SM_120a:
return "1200";
}
llvm_unreachable("unhandled OffloadArch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
if (GPU == OffloadArch::SM_90a)
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
switch(GPU) {
case OffloadArch::SM_90a:
case OffloadArch::SM_100a:
case OffloadArch::SM_101a:
case OffloadArch::SM_120a:
Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
break;
default:
// Do nothing if this is not an enhanced architecture.
break;
}
}
}

Expand Down
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2277,6 +2277,11 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_90:
case OffloadArch::SM_90a:
case OffloadArch::SM_100:
case OffloadArch::SM_100a:
case OffloadArch::SM_101:
case OffloadArch::SM_101a:
case OffloadArch::SM_120:
case OffloadArch::SM_120a:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
Expand Down
5 changes: 5 additions & 0 deletions clang/test/Misc/target-invalid-cpu-note/nvptx.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@
// CHECK-SAME: {{^}}, sm_90
// CHECK-SAME: {{^}}, sm_90a
// CHECK-SAME: {{^}}, sm_100
// CHECK-SAME: {{^}}, sm_100a
// CHECK-SAME: {{^}}, sm_101
// CHECK-SAME: {{^}}, sm_101a
// CHECK-SAME: {{^}}, sm_120
// CHECK-SAME: {{^}}, sm_120a
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
Expand Down