Skip to content

Commit 91c88a1

Browse files
Artem-Bkrishna2803
authored andcommitted
[CUDA] add support for targeting sm_103/sm_121 with CUDA-12.9 (llvm#151587)
1 parent fc2ea43 commit 91c88a1

File tree

9 files changed

+46
-7
lines changed

9 files changed

+46
-7
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.td

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,17 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
2121
!strconcat(f, "|", newer.Features));
2222
}
2323

24+
let Features = "sm_121a" in def SM_121a : SMFeatures;
2425
let Features = "sm_120a" in def SM_120a : SMFeatures;
26+
let Features = "sm_103a" in def SM_103a : SMFeatures;
2527
let Features = "sm_101a" in def SM_101a : SMFeatures;
2628
let Features = "sm_100a" in def SM_100a : SMFeatures;
2729
let Features = "sm_90a" in def SM_90a : SMFeatures;
2830

29-
def SM_120 : SM<"120", [SM_120a]>;
30-
def SM_101 : SM<"101", [SM_101a, SM_120]>;
31+
def SM_121 : SM<"121", [SM_121a]>;
32+
def SM_120 : SM<"120", [SM_120a, SM_121]>;
33+
def SM_103 : SM<"103", [SM_103a, SM_120]>;
34+
def SM_101 : SM<"101", [SM_101a, SM_103]>;
3135
def SM_100 : SM<"100", [SM_100a, SM_101]>;
3236
def SM_90 : SM<"90", [SM_90a, SM_100]>;
3337
def SM_89 : SM<"89", [SM_90]>;
@@ -50,8 +54,9 @@ class PTX<string version, PTXFeatures newer> : PTXFeatures {
5054
let Features = !strconcat("ptx", version, "|", newer.Features);
5155
}
5256

53-
let Features = "ptx87" in def PTX87 : PTXFeatures;
57+
let Features = "ptx88" in def PTX88 : PTXFeatures;
5458

59+
def PTX87 : PTX<"87", PTX88>;
5560
def PTX86 : PTX<"86", PTX87>;
5661
def PTX85 : PTX<"85", PTX86>;
5762
def PTX84 : PTX<"84", PTX85>;

clang/include/clang/Basic/Cuda.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,10 @@ enum class CudaVersion {
4747
CUDA_125,
4848
CUDA_126,
4949
CUDA_128,
50-
FULLY_SUPPORTED = CUDA_123,
50+
CUDA_129,
51+
FULLY_SUPPORTED = CUDA_128,
5152
PARTIALLY_SUPPORTED =
52-
CUDA_128, // Partially supported. Proceed with a warning.
53+
CUDA_129, // Partially supported. Proceed with a warning.
5354
NEW = 10000, // Too new. Issue a warning, but allow using it.
5455
};
5556
const char *CudaVersionToString(CudaVersion V);

clang/include/clang/Basic/OffloadArch.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,12 @@ enum class OffloadArch {
4545
SM_100a,
4646
SM_101,
4747
SM_101a,
48+
SM_103,
49+
SM_103a,
4850
SM_120,
4951
SM_120a,
52+
SM_121,
53+
SM_121a,
5054
GFX600,
5155
GFX601,
5256
GFX602,

clang/lib/Basic/Cuda.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
4444
CUDA_ENTRY(12, 5),
4545
CUDA_ENTRY(12, 6),
4646
CUDA_ENTRY(12, 8),
47+
CUDA_ENTRY(12, 9),
4748
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
4849
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
4950
};
@@ -119,6 +120,11 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
119120
case OffloadArch::SM_120:
120121
case OffloadArch::SM_120a:
121122
return CudaVersion::CUDA_128;
123+
case OffloadArch::SM_103:
124+
case OffloadArch::SM_103a:
125+
case OffloadArch::SM_121:
126+
case OffloadArch::SM_121a:
127+
return CudaVersion::CUDA_129;
122128
default:
123129
llvm_unreachable("invalid enum");
124130
}

clang/lib/Basic/OffloadArch.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,12 @@ static const OffloadArchToStringMap ArchNames[] = {
3333
SM(100a), // Blackwell
3434
SM(101), // Blackwell
3535
SM(101a), // Blackwell
36+
SM(103), // Blackwell
37+
SM(103a), // Blackwell
3638
SM(120), // Blackwell
3739
SM(120a), // Blackwell
40+
SM(121), // Blackwell
41+
SM(121a), // Blackwell
3842
GFX(600), // gfx600
3943
GFX(601), // gfx601
4044
GFX(602), // gfx602

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,16 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
295295
return "1000";
296296
case OffloadArch::SM_101:
297297
case OffloadArch::SM_101a:
298-
return "1010";
298+
return "1010";
299+
case OffloadArch::SM_103:
300+
case OffloadArch::SM_103a:
301+
return "1030";
299302
case OffloadArch::SM_120:
300303
case OffloadArch::SM_120a:
301-
return "1200";
304+
return "1200";
305+
case OffloadArch::SM_121:
306+
case OffloadArch::SM_121a:
307+
return "1210";
302308
}
303309
llvm_unreachable("unhandled OffloadArch");
304310
}();
@@ -307,7 +313,9 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
307313
case OffloadArch::SM_90a:
308314
case OffloadArch::SM_100a:
309315
case OffloadArch::SM_101a:
316+
case OffloadArch::SM_103a:
310317
case OffloadArch::SM_120a:
318+
case OffloadArch::SM_121a:
311319
Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
312320
break;
313321
default:

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2278,8 +2278,12 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
22782278
case OffloadArch::SM_100a:
22792279
case OffloadArch::SM_101:
22802280
case OffloadArch::SM_101a:
2281+
case OffloadArch::SM_103:
2282+
case OffloadArch::SM_103a:
22812283
case OffloadArch::SM_120:
22822284
case OffloadArch::SM_120a:
2285+
case OffloadArch::SM_121:
2286+
case OffloadArch::SM_121a:
22832287
case OffloadArch::GFX600:
22842288
case OffloadArch::GFX601:
22852289
case OffloadArch::GFX602:

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
8888
return CudaVersion::CUDA_126;
8989
if (raw_version < 12090)
9090
return CudaVersion::CUDA_128;
91+
if (raw_version < 13000)
92+
return CudaVersion::CUDA_129;
9193
return CudaVersion::NEW;
9294
}
9395

@@ -683,6 +685,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
683685
case CudaVersion::CUDA_##CUDA_VER: \
684686
PtxFeature = "+ptx" #PTX_VER; \
685687
break;
688+
CASE_CUDA_VERSION(129, 88);
686689
CASE_CUDA_VERSION(128, 87);
687690
CASE_CUDA_VERSION(126, 85);
688691
CASE_CUDA_VERSION(125, 85);

clang/test/Misc/target-invalid-cpu-note/nvptx.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,12 @@
3030
// CHECK-SAME: {{^}}, sm_100a
3131
// CHECK-SAME: {{^}}, sm_101
3232
// CHECK-SAME: {{^}}, sm_101a
33+
// CHECK-SAME: {{^}}, sm_103
34+
// CHECK-SAME: {{^}}, sm_103a
3335
// CHECK-SAME: {{^}}, sm_120
3436
// CHECK-SAME: {{^}}, sm_120a
37+
// CHECK-SAME: {{^}}, sm_121
38+
// CHECK-SAME: {{^}}, sm_121a
3539
// CHECK-SAME: {{^}}, gfx600
3640
// CHECK-SAME: {{^}}, gfx601
3741
// CHECK-SAME: {{^}}, gfx602

0 commit comments

Comments
 (0)