|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +''' |
| 5 | +This script detects the compute capabilities (X.Y) of the the GPU cards and |
| 6 | +returns them in a comma-separated XY string, if multiple GPUs are detected. |
| 7 | +
|
| 8 | +The auto-detected compute capabilities (XY) are used by the CMakeLists.txt only |
| 9 | +if they are not explicitly set by the user. For the following situations: |
| 10 | + - the compiler does not support the XY value, e.g., XY=52; |
| 11 | + - the compiler is older than the device; |
| 12 | + - you don't have python3 in your system; |
| 13 | + - etc. |
| 14 | +you must set the compute capabilities explicitly when cmake is configured. |
| 15 | +
|
| 16 | +Reference: |
| 17 | +https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549 |
| 18 | +by Jan Schlüter |
| 19 | +''' |
| 20 | + |
| 21 | + |
| 22 | +import ctypes |
| 23 | +import sys |
| 24 | + |
| 25 | + |
| 26 | +def CommaSeparatedCCString(): |
| 27 | + libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll', 'cuda.dll') |
| 28 | + for libname in libnames: |
| 29 | + try: |
| 30 | + cuda = ctypes.CDLL(libname) |
| 31 | + except OSError: |
| 32 | + continue |
| 33 | + else: |
| 34 | + break |
| 35 | + else: |
| 36 | + raise OSError('could not load any of: ' + ' '.join(libnames)) |
| 37 | + |
| 38 | + ############################################################ |
| 39 | + |
| 40 | + def checkCall(f, *args): |
| 41 | + CUDA_SUCCESS = 0 |
| 42 | + result = ctypes.c_int() |
| 43 | + |
| 44 | + result = f(*args) |
| 45 | + if result != CUDA_SUCCESS: |
| 46 | + error_str = ctypes.c_char_p() |
| 47 | + cuda.cuGetErrorString(result, ctypes.byref(error_str)) |
| 48 | + print('{} failed with error code {}: {}'.format(f.__name__, result, error_str.value.decode())) |
| 49 | + sys.exit(1) |
| 50 | + |
| 51 | + ############################################################ |
| 52 | + |
| 53 | + cc_set = set() |
| 54 | + |
| 55 | + # from cuda.h |
| 56 | + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75 |
| 57 | + CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76 |
| 58 | + |
| 59 | + checkCall(cuda.cuInit, 0) |
| 60 | + |
| 61 | + nGpus = ctypes.c_int() |
| 62 | + checkCall(cuda.cuDeviceGetCount, ctypes.byref(nGpus)) |
| 63 | + |
| 64 | + for i in range(nGpus.value): |
| 65 | + device = ctypes.c_int() |
| 66 | + checkCall(cuda.cuDeviceGet, ctypes.byref(device), i) |
| 67 | + |
| 68 | + cc_major = ctypes.c_int() |
| 69 | + cc_minor = ctypes.c_int() |
| 70 | + checkCall(cuda.cuDeviceGetAttribute, ctypes.byref(cc_major), CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device) |
| 71 | + checkCall(cuda.cuDeviceGetAttribute, ctypes.byref(cc_minor), CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device) |
| 72 | + cc = cc_major.value * 10 + cc_minor.value |
| 73 | + cc_set.add(cc) |
| 74 | + |
| 75 | + cc_list = [] |
| 76 | + for c in sorted(cc_set): |
| 77 | + cc_list.append(c) |
| 78 | + |
| 79 | + cc_len = len(cc_list) |
| 80 | + if cc_len: |
| 81 | + cc_str = '{}'.format(cc_list[0]) |
| 82 | + for i in range(2, cc_len): |
| 83 | + cc_str = cc_str + ',{}'.format(cc_list[i]) |
| 84 | + return cc_str |
| 85 | + else: |
| 86 | + return '' |
| 87 | + |
| 88 | + |
| 89 | +if __name__ == '__main__': |
| 90 | + print(CommaSeparatedCCString()) |
0 commit comments