Skip to content

Commit 8eaf533

Browse files
committed
Add python script to detect nvidia CC
1 parent cae970e commit 8eaf533

File tree

2 files changed

+91
-2
lines changed

2 files changed

+91
-2
lines changed

cmake/nvidiacc.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
'''
5+
This script detects the compute capabilities (X.Y) of the the GPU cards and
6+
returns them in a comma-separated XY string, if multiple GPUs are detected.
7+
8+
The auto-detected compute capabilities (XY) are used by the CMakeLists.txt only
9+
if they are not explicitly set by the user. For the following situations:
10+
- the compiler does not support the XY value, e.g., XY=52;
11+
- the compiler is older than the device;
12+
- you don't have python3 in your system;
13+
- etc.
14+
you must set the compute capabilities explicitly when cmake is configured.
15+
16+
Reference:
17+
https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549
18+
by Jan Schlüter
19+
'''
20+
21+
22+
import ctypes
23+
import sys
24+
25+
26+
def CommaSeparatedCCString():
27+
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll', 'cuda.dll')
28+
for libname in libnames:
29+
try:
30+
cuda = ctypes.CDLL(libname)
31+
except OSError:
32+
continue
33+
else:
34+
break
35+
else:
36+
raise OSError('could not load any of: ' + ' '.join(libnames))
37+
38+
############################################################
39+
40+
def checkCall(f, *args):
41+
CUDA_SUCCESS = 0
42+
result = ctypes.c_int()
43+
44+
result = f(*args)
45+
if result != CUDA_SUCCESS:
46+
error_str = ctypes.c_char_p()
47+
cuda.cuGetErrorString(result, ctypes.byref(error_str))
48+
print('{} failed with error code {}: {}'.format(f.__name__, result, error_str.value.decode()))
49+
sys.exit(1)
50+
51+
############################################################
52+
53+
cc_set = set()
54+
55+
# from cuda.h
56+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75
57+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
58+
59+
checkCall(cuda.cuInit, 0)
60+
61+
nGpus = ctypes.c_int()
62+
checkCall(cuda.cuDeviceGetCount, ctypes.byref(nGpus))
63+
64+
for i in range(nGpus.value):
65+
device = ctypes.c_int()
66+
checkCall(cuda.cuDeviceGet, ctypes.byref(device), i)
67+
68+
cc_major = ctypes.c_int()
69+
cc_minor = ctypes.c_int()
70+
checkCall(cuda.cuDeviceGetAttribute, ctypes.byref(cc_major), CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device)
71+
checkCall(cuda.cuDeviceGetAttribute, ctypes.byref(cc_minor), CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device)
72+
cc = cc_major.value * 10 + cc_minor.value
73+
cc_set.add(cc)
74+
75+
cc_list = []
76+
for c in sorted(cc_set):
77+
cc_list.append(c)
78+
79+
cc_len = len(cc_list)
80+
if cc_len:
81+
cc_str = '{}'.format(cc_list[0])
82+
for i in range(2, cc_len):
83+
cc_str = cc_str + ',{}'.format(cc_list[i])
84+
return cc_str
85+
else:
86+
return ''
87+
88+
89+
if __name__ == '__main__':
90+
print(CommaSeparatedCCString())

src/cudart/gpucard.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,7 @@ static void getDeviceAttribute(DeviceAttribute& a, int device = 0)
114114
else
115115
found_cc = false;
116116

117-
// Number of CUDA cores per multiprocessor, not tabulated in
118-
// cuda-c-programming-guide;
117+
// Number of CUDA cores per multiprocessor, not tabulated in cuda-c-programming-guide;
119118
// documented in "Compute Capability - architecture"
120119
// 8.6: 128
121120
// 7.0 7.2 7.5 8.0: 64

0 commit comments

Comments
 (0)