Skip to content

Commit 84fd8db

Browse files
jodelekSebastian Jodlowski
authored andcommitted
[CUDA] Add support for sm101 and sm120 target architectures (llvm#127187)
Add support for sm101 and sm120 target architectures. It requires CUDA 12.8. --------- Co-authored-by: Sebastian Jodlowski <[email protected]>
1 parent a074091 commit 84fd8db

File tree

5 files changed

+48
-5
lines changed

5 files changed

+48
-5
lines changed

clang/include/clang/Basic/Cuda.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ enum class CudaVersion {
4444
CUDA_124,
4545
CUDA_125,
4646
CUDA_126,
47+
CUDA_127,
48+
CUDA_128,
4749
FULLY_SUPPORTED = CUDA_123,
4850
PARTIALLY_SUPPORTED =
4951
CUDA_126, // Partially supported. Proceed with a warning.
@@ -80,6 +82,11 @@ enum class OffloadArch {
8082
SM_90,
8183
SM_90a,
8284
SM_100,
85+
SM_100a,
86+
SM_101,
87+
SM_101a,
88+
SM_120,
89+
SM_120a,
8390
GFX600,
8491
GFX601,
8592
GFX602,

clang/lib/Basic/Cuda.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
4444
CUDA_ENTRY(12, 4),
4545
CUDA_ENTRY(12, 5),
4646
CUDA_ENTRY(12, 6),
47+
CUDA_ENTRY(12, 7),
48+
CUDA_ENTRY(12, 8),
4749
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
4850
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
4951
};
@@ -98,6 +100,11 @@ static const OffloadArchToStringMap arch_names[] = {
98100
SM(90), // Hopper
99101
SM(90a), // Hopper
100102
SM(100), // Blackwell
103+
SM(100a), // Blackwell
104+
SM(101), // Blackwell
105+
SM(101a), // Blackwell
106+
SM(120), // Blackwell
107+
SM(120a), // Blackwell
101108
GFX(600), // gfx600
102109
GFX(601), // gfx601
103110
GFX(602), // gfx602
@@ -226,8 +233,12 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
226233
case OffloadArch::SM_90a:
227234
return CudaVersion::CUDA_120;
228235
case OffloadArch::SM_100:
229-
return CudaVersion::NEW; // TODO: use specific CUDA version once it's
230-
// public.
236+
case OffloadArch::SM_100a:
237+
case OffloadArch::SM_101:
238+
case OffloadArch::SM_101a:
239+
case OffloadArch::SM_120:
240+
case OffloadArch::SM_120a:
241+
return CudaVersion::CUDA_128;
231242
default:
232243
llvm_unreachable("invalid enum");
233244
}

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
180180

181181
if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
182182
// Set __CUDA_ARCH__ for the GPU specified.
183-
std::string CUDAArchCode = [this] {
183+
llvm::StringRef CUDAArchCode = [this] {
184184
switch (GPU) {
185185
case OffloadArch::GFX600:
186186
case OffloadArch::GFX601:
@@ -285,12 +285,27 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
285285
return "900";
286286
case OffloadArch::SM_100:
287287
return "1000";
288+
case OffloadArch::SM_101:
289+
case OffloadArch::SM_101a:
290+
return "1010";
291+
case OffloadArch::SM_120:
292+
case OffloadArch::SM_120a:
293+
return "1200";
288294
}
289295
llvm_unreachable("unhandled OffloadArch");
290296
}();
291297
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
292-
if (GPU == OffloadArch::SM_90a)
293-
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
298+
switch(GPU) {
299+
case OffloadArch::SM_90a:
300+
case OffloadArch::SM_100a:
301+
case OffloadArch::SM_101a:
302+
case OffloadArch::SM_120a:
303+
Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
304+
break;
305+
default:
306+
// Do nothing if this is not an enhanced architecture.
307+
break;
308+
}
294309
}
295310
}
296311

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2277,6 +2277,11 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
22772277
case OffloadArch::SM_90:
22782278
case OffloadArch::SM_90a:
22792279
case OffloadArch::SM_100:
2280+
case OffloadArch::SM_100a:
2281+
case OffloadArch::SM_101:
2282+
case OffloadArch::SM_101a:
2283+
case OffloadArch::SM_120:
2284+
case OffloadArch::SM_120a:
22802285
case OffloadArch::GFX600:
22812286
case OffloadArch::GFX601:
22822287
case OffloadArch::GFX602:

clang/test/Misc/target-invalid-cpu-note/nvptx.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@
2727
// CHECK-SAME: {{^}}, sm_90
2828
// CHECK-SAME: {{^}}, sm_90a
2929
// CHECK-SAME: {{^}}, sm_100
30+
// CHECK-SAME: {{^}}, sm_100a
31+
// CHECK-SAME: {{^}}, sm_101
32+
// CHECK-SAME: {{^}}, sm_101a
33+
// CHECK-SAME: {{^}}, sm_120
34+
// CHECK-SAME: {{^}}, sm_120a
3035
// CHECK-SAME: {{^}}, gfx600
3136
// CHECK-SAME: {{^}}, gfx601
3237
// CHECK-SAME: {{^}}, gfx602

0 commit comments

Comments
 (0)