diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 721e8981af6ff..001c44ace33af 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -44,6 +44,8 @@ enum class CudaVersion { CUDA_124, CUDA_125, CUDA_126, + CUDA_127, + CUDA_128, FULLY_SUPPORTED = CUDA_123, PARTIALLY_SUPPORTED = CUDA_126, // Partially supported. Proceed with a warning. @@ -80,6 +82,11 @@ enum class OffloadArch { SM_90, SM_90a, SM_100, + SM_100a, + SM_101, + SM_101a, + SM_120, + SM_120a, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 59c932468cd89..299799159135f 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -44,6 +44,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = { CUDA_ENTRY(12, 4), CUDA_ENTRY(12, 5), CUDA_ENTRY(12, 6), + CUDA_ENTRY(12, 7), + CUDA_ENTRY(12, 8), {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits::max())}, {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. }; @@ -98,6 +100,11 @@ static const OffloadArchToStringMap arch_names[] = { SM(90), // Hopper SM(90a), // Hopper SM(100), // Blackwell + SM(100a), // Blackwell + SM(101), // Blackwell + SM(101a), // Blackwell + SM(120), // Blackwell + SM(120a), // Blackwell GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -226,8 +233,12 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) { case OffloadArch::SM_90a: return CudaVersion::CUDA_120; case OffloadArch::SM_100: - return CudaVersion::NEW; // TODO: use specific CUDA version once it's - // public. + case OffloadArch::SM_100a: + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + case OffloadArch::SM_120: + case OffloadArch::SM_120a: + return CudaVersion::CUDA_128; default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 0897032c4b854..27c06c7101d87 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -180,7 +180,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. - std::string CUDAArchCode = [this] { + llvm::StringRef CUDAArchCode = [this] { switch (GPU) { case OffloadArch::GFX600: case OffloadArch::GFX601: @@ -285,12 +285,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, return "900"; case OffloadArch::SM_100: return "1000"; + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + return "1010"; + case OffloadArch::SM_120: + case OffloadArch::SM_120a: + return "1200"; } llvm_unreachable("unhandled OffloadArch"); }(); Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); - if (GPU == OffloadArch::SM_90a) - Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); + switch(GPU) { + case OffloadArch::SM_90a: + case OffloadArch::SM_100a: + case OffloadArch::SM_101a: + case OffloadArch::SM_120a: + Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1"); + break; + default: + // Do nothing if this is not an enhanced architecture. + break; + } } } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 73e3f9e256f0d..07fdf90986f50 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2277,6 +2277,11 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::SM_90: case OffloadArch::SM_90a: case OffloadArch::SM_100: + case OffloadArch::SM_100a: + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + case OffloadArch::SM_120: + case OffloadArch::SM_120a: case OffloadArch::GFX600: case OffloadArch::GFX601: case OffloadArch::GFX602: diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c index 44fe07065b242..a32bddf6cddc9 100644 --- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c +++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c @@ -27,6 +27,11 @@ // CHECK-SAME: {{^}}, sm_90 // CHECK-SAME: {{^}}, sm_90a // CHECK-SAME: {{^}}, sm_100 +// CHECK-SAME: {{^}}, sm_100a +// CHECK-SAME: {{^}}, sm_101 +// CHECK-SAME: {{^}}, sm_101a +// CHECK-SAME: {{^}}, sm_120 +// CHECK-SAME: {{^}}, sm_120a // CHECK-SAME: {{^}}, gfx600 // CHECK-SAME: {{^}}, gfx601 // CHECK-SAME: {{^}}, gfx602