@@ -14,24 +14,77 @@ SET(CMAKE_CXX_STANDARD 11)
1414SET (CMAKE_CUDA_STANDARD 11)
1515# nvcc -o libdeepmd_op_cuda.so -I/usr/local/cub-1.8.0 -rdc=true -DHIGH_PREC=true -gencode arch=compute_61,code=sm_61 -shared -Xcompiler -fPIC deepmd_op.cu -L/usr/local/cuda/lib64 -lcudadevrt
1616# very important here! Include path to cub.
17+ # for searching device compute capability, https://developer.nvidia.com/cuda-gpus
1718include_directories (cub)
18- if (${CUDA_VERSION_MAJOR} STREQUAL "10" )
19+
20+ if (${CUDA_VERSION_MAJOR} GREATER "10" )
21+ message (STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR} )
22+ # nvcc flags
23+ set (CUDA_NVCC_FLAGS -gencode arch=compute_50,code=sm_50;
24+ -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
25+ -gencode arch=compute_53,code=sm_53;
26+ -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
27+ -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
28+ -gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
29+ -gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
30+ -O3; -Xcompiler -fPIC;
31+ )
32+ elseif (${CUDA_VERSION_MAJOR} STREQUAL "10" )
1933 message (STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR} )
2034 # nvcc flags
21- set (CUDA_NVCC_FLAGS -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
35+ set (CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30; # Tesla K10, Quadro K600 K420 K410,
36+ -gencode arch=compute_35,code=sm_35; # Tesla K20 K40, TITAN Z Black, GTX 780Ti 780
37+ -gencode arch=compute_37,code=sm_37; # Tesla K80
38+ -gencode arch=compute_50,code=sm_50; # Quadro 620 1200
39+ -gencode arch=compute_52,code=sm_52; # Tesla M40 M40, Quadro M6000 M5000 M4000 M2000, TITAN X, GTX 980Ti 980 970 960 950
40+ -gencode arch=compute_53,code=sm_53; # Jetson TX1, Tegra X1
41+ -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
2242 -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
2343 -gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
2444 -gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
2545 -O3; -Xcompiler -fPIC;
2646 )
27- else (${CUDA_VERSION_MAJOR} STREQUAL "10 " )
47+ elseif (${CUDA_VERSION_MAJOR} STREQUAL "9 " )
2848 message (STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR} )
2949 # nvcc flags
30- set (CUDA_NVCC_FLAGS -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
50+ set (CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
51+ -gencode arch=compute_35,code=sm_35;
52+ -gencode arch=compute_37,code=sm_37;
53+ -gencode arch=compute_50,code=sm_50;
54+ -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
55+ -gencode arch=compute_53,code=sm_53;
56+ -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
3157 -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
3258 -gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
59+ -gencode arch=compute_72,code=sm_72;
60+ -O3; -Xcompiler -fPIC;
61+ )
62+ elseif (${CUDA_VERSION_MAJOR} STREQUAL "8" )
63+ message (STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR} )
64+ # nvcc flags
65+ set (CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
66+ -gencode arch=compute_35,code=sm_35;
67+ -gencode arch=compute_37,code=sm_37;
68+ -gencode arch=compute_50,code=sm_50;
69+ -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
70+ -gencode arch=compute_53,code=sm_53;
71+ -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
72+ -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
73+ -O3; -Xcompiler -fPIC;
74+ )
75+ elseif (${CUDA_VERSION_MAJOR} STREQUAL "7" )
76+ message (STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR} )
77+ # nvcc flags
78+ set (CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
79+ -gencode arch=compute_35,code=sm_35;
80+ -gencode arch=compute_37,code=sm_37;
81+ -gencode arch=compute_50,code=sm_50;
82+ -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
83+ -gencode arch=compute_53,code=sm_53;
3384 -O3; -Xcompiler -fPIC;
3485 )
86+ else ()
87+ message (FATAL_ERROR "unsupported CUDA_VERSION " ${CUDA_VERSION} ", please use a newer version (>=7.0) of CUDA toolkit!" )
3588endif ()
3689
3790set (SOURCE_FILES
0 commit comments