We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c2c4db8 commit 30a06e8Copy full SHA for 30a06e8
clang/docs/ReleaseNotes.rst
@@ -620,6 +620,8 @@ CUDA/HIP Language Changes
620
621
CUDA Support
622
^^^^^^^^^^^^
623
+- Clang now supports CUDA SDK up to 12.6
624
+- Added support for sm_100
625
626
AIX Support
627
^^^^^^^^^^^
clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -27,8 +27,10 @@
27
#pragma push_macro("SM_89")
28
#pragma push_macro("SM_90")
29
#pragma push_macro("SM_90a")
30
+#pragma push_macro("SM_100")
31
+#define SM_100 "sm_100"
32
#define SM_90a "sm_90a"
-#define SM_90 "sm_90|" SM_90a
33
+#define SM_90 "sm_90|" SM_90a "|" SM_100
34
#define SM_89 "sm_89|" SM_90
35
#define SM_87 "sm_87|" SM_89
36
#define SM_86 "sm_86|" SM_87
@@ -63,7 +65,9 @@
63
65
#pragma push_macro("PTX83")
64
66
#pragma push_macro("PTX84")
67
#pragma push_macro("PTX85")
-#define PTX85 "ptx85"
68
+#pragma push_macro("PTX86")
69
+#define PTX86 "ptx86"
70
+#define PTX85 "ptx85|" PTX86
71
#define PTX84 "ptx84|" PTX85
72
#define PTX83 "ptx83|" PTX84
73
#define PTX82 "ptx82|" PTX83
@@ -1086,6 +1090,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
1086
1090
#pragma pop_macro("SM_89")
1087
1091
#pragma pop_macro("SM_90")
1088
1092
#pragma pop_macro("SM_90a")
1093
+#pragma pop_macro("SM_100")
1089
1094
#pragma pop_macro("PTX42")
1095
#pragma pop_macro("PTX60")
1096
#pragma pop_macro("PTX61")
@@ -1108,3 +1113,4 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
1108
1113
#pragma pop_macro("PTX83")
1109
1114
#pragma pop_macro("PTX84")
1110
1115
#pragma pop_macro("PTX85")
1116
+#pragma pop_macro("PTX86")
clang/include/clang/Basic/Cuda.h
@@ -43,9 +43,10 @@ enum class CudaVersion {
43
CUDA_123,
44
CUDA_124,
45
CUDA_125,
46
+ CUDA_126,
47
FULLY_SUPPORTED = CUDA_123,
48
PARTIALLY_SUPPORTED =
- CUDA_125, // Partially supported. Proceed with a warning.
49
+ CUDA_126, // Partially supported. Proceed with a warning.
50
NEW = 10000, // Too new. Issue a warning, but allow using it.
51
};
52
const char *CudaVersionToString(CudaVersion V);
@@ -78,6 +79,7 @@ enum class OffloadArch {
78
79
SM_89,
80
SM_90,
81
SM_90a,
82
+ SM_100,
83
GFX600,
84
GFX601,
85
GFX602,
clang/lib/Basic/Cuda.cpp
@@ -43,6 +43,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(12, 3),
CUDA_ENTRY(12, 4),
CUDA_ENTRY(12, 5),
+ CUDA_ENTRY(12, 6),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
@@ -96,6 +97,7 @@ static const OffloadArchToStringMap arch_names[] = {
96
97
SM(89), // Ada Lovelace
98
SM(90), // Hopper
99
SM(90a), // Hopper
100
+ SM(100), // Blackwell
101
GFX(600), // gfx600
102
GFX(601), // gfx601
103
GFX(602), // gfx602
@@ -221,6 +223,9 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
221
223
return CudaVersion::CUDA_118;
222
224
case OffloadArch::SM_90a:
225
return CudaVersion::CUDA_120;
226
+ case OffloadArch::SM_100:
227
+ return CudaVersion::NEW; // TODO: use specific CUDA version once it's
228
+ // public.
229
default:
230
llvm_unreachable("invalid enum");
231
}
clang/lib/Basic/Targets/NVPTX.cpp
@@ -281,6 +281,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
281
case OffloadArch::SM_90:
282
283
return "900";
284
285
+ return "1000";
286
287
llvm_unreachable("unhandled OffloadArch");
288
}();
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2274,6 +2274,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
2274
case OffloadArch::SM_89:
2275
2276
2277
2278
case OffloadArch::GFX600:
2279
case OffloadArch::GFX601:
2280
case OffloadArch::GFX602:
clang/lib/Driver/ToolChains/Cuda.cpp
@@ -87,6 +87,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
87
return CudaVersion::CUDA_124;
88
if (raw_version < 12060)
89
return CudaVersion::CUDA_125;
90
+ if (raw_version < 12070)
91
+ return CudaVersion::CUDA_126;
92
return CudaVersion::NEW;
93
94
@@ -669,6 +671,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
669
671
case CudaVersion::CUDA_##CUDA_VER: \
670
672
PtxFeature = "+ptx" #PTX_VER; \
673
break;
674
+ CASE_CUDA_VERSION(126, 85);
675
CASE_CUDA_VERSION(125, 85);
676
CASE_CUDA_VERSION(124, 84);
677
CASE_CUDA_VERSION(123, 83);
@@ -691,6 +694,10 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
691
694
CASE_CUDA_VERSION(91, 61);
692
695
CASE_CUDA_VERSION(90, 60);
693
696
#undef CASE_CUDA_VERSION
697
+ // TODO: Use specific CUDA version once it's public.
698
+ case clang::CudaVersion::NEW:
699
+ PtxFeature = "+ptx86";
700
+ break;
701
702
PtxFeature = "+ptx42";
703
clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -26,6 +26,7 @@
26
// CHECK-SAME: {{^}}, sm_89
// CHECK-SAME: {{^}}, sm_90
// CHECK-SAME: {{^}}, sm_90a
+// CHECK-SAME: {{^}}, sm_100
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
llvm/lib/Target/NVPTX/NVPTX.td
@@ -35,14 +35,14 @@ class FeaturePTX<int version>:
"Use PTX version " # version>;
37
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
38
- 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
+ 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100] in
39
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
40
41
def SM90a: FeatureSM<"90a", 901>;
42
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
70, 71, 72, 73, 74, 75, 76, 77, 78,
- 80, 81, 82, 83, 84, 85] in
+ 80, 81, 82, 83, 84, 85, 86] in
def PTX#version: FeaturePTX<version>;
//===----------------------------------------------------------------------===//
@@ -73,6 +73,7 @@ def : Proc<"sm_87", [SM87, PTX74]>;
def : Proc<"sm_89", [SM89, PTX78]>;
74
def : Proc<"sm_90", [SM90, PTX78]>;
75
def : Proc<"sm_90a", [SM90a, PTX80]>;
76
+def : Proc<"sm_100", [SM100, PTX86]>;
77
def NVPTXInstrInfo : InstrInfo {
0 commit comments