Skip to content

Commit 3713289

Browse files
committed
Update getCudaCoresPerSM for Blackwell GPUs
1 parent 515dff4 commit 3713289

File tree

1 file changed

+31
-21
lines changed

1 file changed

+31
-21
lines changed

HeterogeneousCore/CUDAServices/plugins/CUDAService.cc

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -79,59 +79,69 @@ void setCudaLimit(cudaLimit limit, const char* name, size_t request) {
7979
}
8080

8181
constexpr unsigned int getCudaCoresPerSM(unsigned int major, unsigned int minor) {
82-
switch (major * 10 + minor) {
82+
switch (major * 16 + minor) {
8383
// Fermi architecture
84-
case 20: // SM 2.0: GF100 class
84+
case 0x20: // SM 2.0: GF100 class
8585
return 32;
86-
case 21: // SM 2.1: GF10x class
86+
case 0x21: // SM 2.1: GF10x class
8787
return 48;
8888

8989
// Kepler architecture
90-
case 30: // SM 3.0: GK10x class
91-
case 32: // SM 3.2: GK10x class
92-
case 35: // SM 3.5: GK11x class
93-
case 37: // SM 3.7: GK21x class
90+
case 0x30: // SM 3.0: GK10x class
91+
case 0x32: // SM 3.2: GK10x class
92+
case 0x35: // SM 3.5: GK11x class
93+
case 0x37: // SM 3.7: GK21x class
9494
return 192;
9595

9696
// Maxwell architecture
97-
case 50: // SM 5.0: GM10x class
98-
case 52: // SM 5.2: GM20x class
99-
case 53: // SM 5.3: GM20x class
97+
case 0x50: // SM 5.0: GM10x class
98+
case 0x52: // SM 5.2: GM20x class
99+
case 0x53: // SM 5.3: GM20x class
100100
return 128;
101101

102102
// Pascal architecture
103-
case 60: // SM 6.0: GP100 class
103+
case 0x60: // SM 6.0: GP100 class
104104
return 64;
105-
case 61: // SM 6.1: GP10x class
106-
case 62: // SM 6.2: GP10x class
105+
case 0x61: // SM 6.1: GP10x class
106+
case 0x62: // SM 6.2: GP10x class
107107
return 128;
108108

109109
// Volta architecture
110-
case 70: // SM 7.0: GV100 class
111-
case 72: // SM 7.2: GV11b class
110+
case 0x70: // SM 7.0: GV100 class
111+
case 0x72: // SM 7.2: GV11b class
112112
return 64;
113113

114114
// Turing architecture
115-
case 75: // SM 7.5: TU10x class
115+
case 0x75: // SM 7.5: TU10x class
116116
return 64;
117117

118118
// Ampere architecture
119-
case 80: // SM 8.0: GA100 class
119+
case 0x80: // SM 8.0: GA100 class
120120
return 64;
121-
case 86: // SM 8.6: GA10x class
121+
case 0x86: // SM 8.6: GA10x class
122+
case 0x87: // SM 8.7: ?
122123
return 128;
123124

124125
// Ada Lovelace architectures
125-
case 89: // SM 8.9: AD10x class
126+
case 0x89: // SM 8.9: AD10x class
126127
return 128;
127128

128129
// Hopper architecture
129-
case 90: // SM 9.0: GH100 class
130+
case 0x90: // SM 9.0: GH100 class
131+
return 128;
132+
133+
// Blackwell architecture
134+
case 0xa0: // SM 10.0: GB100 class
135+
case 0xa1: // SM 10.1: GB102 class
136+
return 128;
137+
138+
// Blackwell 2.0 architecture
139+
case 0xc0: // SM 12.0: GB20x class
130140
return 128;
131141

132142
// unknown architecture, return a default value
133143
default:
134-
return 64;
144+
return 128;
135145
}
136146
}
137147

0 commit comments

Comments
 (0)