33#include " llvm/ADT/StringRef.h"
44#include " llvm/ADT/Twine.h"
55#include " llvm/Support/ErrorHandling.h"
6+ #include " llvm/Support/FormatVariadic.h"
67#include " llvm/Support/VersionTuple.h"
78
89namespace clang {
@@ -11,40 +12,43 @@ struct CudaVersionMapEntry {
1112 const char *Name;
1213 CudaVersion Version;
1314 llvm::VersionTuple TVersion;
15+ PTXVersion PTX;
1416};
15- #define CUDA_ENTRY (major, minor ) \
17+ #define CUDA_ENTRY (major, minor, ptx ) \
1618 { \
1719 #major " ." #minor, CudaVersion::CUDA_##major##minor, \
18- llvm::VersionTuple (major, minor) \
20+ llvm::VersionTuple (major, minor), PTXVersion::ptx \
1921 }
2022
2123static const CudaVersionMapEntry CudaNameVersionMap[] = {
22- CUDA_ENTRY (7 , 0 ),
23- CUDA_ENTRY (7 , 5 ),
24- CUDA_ENTRY (8 , 0 ),
25- CUDA_ENTRY (9 , 0 ),
26- CUDA_ENTRY (9 , 1 ),
27- CUDA_ENTRY (9 , 2 ),
28- CUDA_ENTRY (10 , 0 ),
29- CUDA_ENTRY (10 , 1 ),
30- CUDA_ENTRY (10 , 2 ),
31- CUDA_ENTRY (11 , 0 ),
32- CUDA_ENTRY (11 , 1 ),
33- CUDA_ENTRY (11 , 2 ),
34- CUDA_ENTRY (11 , 3 ),
35- CUDA_ENTRY (11 , 4 ),
36- CUDA_ENTRY (11 , 5 ),
37- CUDA_ENTRY (11 , 6 ),
38- CUDA_ENTRY (11 , 7 ),
39- CUDA_ENTRY (11 , 8 ),
40- CUDA_ENTRY (12 , 0 ),
41- CUDA_ENTRY (12 , 1 ),
42- CUDA_ENTRY (12 , 2 ),
43- CUDA_ENTRY (12 , 3 ),
44- CUDA_ENTRY (12 , 4 ),
45- CUDA_ENTRY (12 , 5 ),
46- {" " , CudaVersion::NEW, llvm::VersionTuple (std::numeric_limits<int >::max ())},
47- {" unknown" , CudaVersion::UNKNOWN, {}} // End of list tombstone.
24+ CUDA_ENTRY (7 , 0 , PTX_42),
25+ CUDA_ENTRY (7 , 5 , PTX_43),
26+ CUDA_ENTRY (8 , 0 , PTX_50),
27+ CUDA_ENTRY (9 , 0 , PTX_60),
28+ CUDA_ENTRY (9 , 1 , PTX_61),
29+ CUDA_ENTRY (9 , 2 , PTX_62),
30+ CUDA_ENTRY (10 , 0 , PTX_63),
31+ CUDA_ENTRY (10 , 1 , PTX_64),
32+ CUDA_ENTRY (10 , 2 , PTX_65),
33+ CUDA_ENTRY (11 , 0 , PTX_70),
34+ CUDA_ENTRY (11 , 1 , PTX_71),
35+ CUDA_ENTRY (11 , 2 , PTX_72),
36+ CUDA_ENTRY (11 , 3 , PTX_73),
37+ CUDA_ENTRY (11 , 4 , PTX_74),
38+ CUDA_ENTRY (11 , 5 , PTX_75),
39+ CUDA_ENTRY (11 , 6 , PTX_76),
40+ CUDA_ENTRY (11 , 7 , PTX_77),
41+ CUDA_ENTRY (11 , 8 , PTX_78),
42+ CUDA_ENTRY (12 , 0 , PTX_80),
43+ CUDA_ENTRY (12 , 1 , PTX_81),
44+ CUDA_ENTRY (12 , 2 , PTX_82),
45+ CUDA_ENTRY (12 , 3 , PTX_83),
46+ CUDA_ENTRY (12 , 4 , PTX_84),
47+ CUDA_ENTRY (12 , 5 , PTX_85),
48+ {" " , CudaVersion::NEW, llvm::VersionTuple (std::numeric_limits<int >::max ()),
49+ PTXVersion::PTX_LAST},
50+ // End of list tombstone
51+ {" unknown" , CudaVersion::UNKNOWN, {}, PTXVersion::PTX_42}
4852};
4953#undef CUDA_ENTRY
5054
@@ -71,6 +75,20 @@ CudaVersion ToCudaVersion(llvm::VersionTuple Version) {
7175 return CudaVersion::UNKNOWN;
7276}
7377
78+ const std::string PTXVersionToFeature (PTXVersion V) {
79+ if (V > PTXVersion::PTX_UNKNOWN && V <= PTXVersion::PTX_LAST)
80+ return llvm::formatv (" +ptx{0}" , static_cast <unsigned >(V));
81+ return {};
82+ }
83+
84+ PTXVersion GetRequiredPTXVersion (CudaVersion V) {
85+ for (auto &I : CudaNameVersionMap)
86+ if (V == I.Version )
87+ return I.PTX ;
88+
89+ return PTXVersion::PTX_UNKNOWN;
90+ }
91+
7492namespace {
7593struct OffloadArchToStringMap {
7694 OffloadArch arch;
@@ -79,9 +97,11 @@ struct OffloadArchToStringMap {
7997};
8098} // namespace
8199
82- #define SM2 (sm, ca ) {OffloadArch::SM_##sm, " sm_" #sm, ca}
100+ #define SM2 (sm, ca ) \
101+ { OffloadArch::SM_##sm, " sm_" #sm, ca }
83102#define SM (sm ) SM2(sm, " compute_" #sm)
84- #define GFX (gpu ) {OffloadArch::GFX##gpu, " gfx" #gpu, " compute_amdgcn" }
103+ #define GFX (gpu ) \
104+ { OffloadArch::GFX##gpu, " gfx" #gpu, " compute_amdgcn" }
85105static const OffloadArchToStringMap arch_names[] = {
86106 // clang-format off
87107 {OffloadArch::UNUSED, " " , " " },
@@ -96,6 +116,7 @@ static const OffloadArchToStringMap arch_names[] = {
96116 SM (89 ), // Ada Lovelace
97117 SM (90 ), // Hopper
98118 SM (90a), // Hopper
119+ SM (custom), // Placeholder for a new arch.
99120 GFX (600 ), // gfx600
100121 GFX (601 ), // gfx601
101122 GFX (602 ), // gfx602
@@ -181,6 +202,18 @@ OffloadArch StringToOffloadArch(llvm::StringRef S) {
181202 return result->arch ;
182203}
183204
205+ unsigned CUDACustomSMToArchID (llvm::StringRef S) {
206+ if (!S.starts_with (" sm_" ))
207+ return 0 ;
208+ S = S.drop_front (3 ); // skip `sm_`
209+ if (S.ends_with (" a" ))
210+ S = S.drop_back (1 );
211+ unsigned ID;
212+ if (S.getAsInteger (10 , ID))
213+ return 0 ; // We've failed to parse the SM name
214+ return ID * 10 ;
215+ }
216+
184217CudaVersion MinVersionForOffloadArch (OffloadArch A) {
185218 if (A == OffloadArch::UNKNOWN)
186219 return CudaVersion::UNKNOWN;
@@ -221,6 +254,8 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
221254 return CudaVersion::CUDA_118;
222255 case OffloadArch::SM_90a:
223256 return CudaVersion::CUDA_120;
257+ case clang::OffloadArch::SM_custom:
258+ return CudaVersion::UNKNOWN;
224259 default :
225260 llvm_unreachable (" invalid enum" );
226261 }
0 commit comments