Skip to content

Commit 3cd4867

Browse files
LuLu
authored andcommitted
fix bug of "unsupported architecture " when use some version's CUDA toolkit
1 parent 6eeca8f commit 3cd4867

File tree

1 file changed

+57
-4
lines changed

1 file changed

+57
-4
lines changed

source/op/cuda/CMakeLists.txt

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,77 @@ SET(CMAKE_CXX_STANDARD 11)
1414
SET(CMAKE_CUDA_STANDARD 11)
1515
# nvcc -o libdeepmd_op_cuda.so -I/usr/local/cub-1.8.0 -rdc=true -DHIGH_PREC=true -gencode arch=compute_61,code=sm_61 -shared -Xcompiler -fPIC deepmd_op.cu -L/usr/local/cuda/lib64 -lcudadevrt
1616
# very important here! Include path to cub.
17+
# for searching device compute capability, https://developer.nvidia.com/cuda-gpus
1718
include_directories(cub)
18-
if (${CUDA_VERSION_MAJOR} STREQUAL "10")
19+
20+
if (${CUDA_VERSION_MAJOR} GREATER "10")
21+
message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})
22+
# nvcc flags
23+
set(CUDA_NVCC_FLAGS -gencode arch=compute_50,code=sm_50;
24+
-gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
25+
-gencode arch=compute_53,code=sm_53;
26+
-gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
27+
-gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
28+
-gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
29+
-gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
30+
-O3; -Xcompiler -fPIC;
31+
)
32+
elseif (${CUDA_VERSION_MAJOR} STREQUAL "10")
1933
message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})
2034
# nvcc flags
21-
set(CUDA_NVCC_FLAGS -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
35+
set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30; # Tesla K10, Quadro K600 K420 K410,
36+
-gencode arch=compute_35,code=sm_35; # Tesla K20 K40, TITAN Z Black, GTX 780Ti 780
37+
-gencode arch=compute_37,code=sm_37; # Tesla K80
38+
-gencode arch=compute_50,code=sm_50; # Quadro 620 1200
39+
-gencode arch=compute_52,code=sm_52; # Tesla M40 M40, Quadro M6000 M5000 M4000 M2000, TITAN X, GTX 980Ti 980 970 960 950
40+
-gencode arch=compute_53,code=sm_53; # Jetson TX1, Tegra X1
41+
-gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
2242
-gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
2343
-gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
2444
-gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
2545
-O3; -Xcompiler -fPIC;
2646
)
27-
else (${CUDA_VERSION_MAJOR} STREQUAL "10")
47+
elseif (${CUDA_VERSION_MAJOR} STREQUAL "9")
2848
message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})
2949
# nvcc flags
30-
set(CUDA_NVCC_FLAGS -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
50+
set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
51+
-gencode arch=compute_35,code=sm_35;
52+
-gencode arch=compute_37,code=sm_37;
53+
-gencode arch=compute_50,code=sm_50;
54+
-gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
55+
-gencode arch=compute_53,code=sm_53;
56+
-gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
3157
-gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
3258
-gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
59+
-gencode arch=compute_72,code=sm_72;
60+
-O3; -Xcompiler -fPIC;
61+
)
62+
elseif (${CUDA_VERSION_MAJOR} STREQUAL "8")
63+
message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})
64+
# nvcc flags
65+
set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
66+
-gencode arch=compute_35,code=sm_35;
67+
-gencode arch=compute_37,code=sm_37;
68+
-gencode arch=compute_50,code=sm_50;
69+
-gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
70+
-gencode arch=compute_53,code=sm_53;
71+
-gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
72+
-gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
73+
-O3; -Xcompiler -fPIC;
74+
)
75+
elseif (${CUDA_VERSION_MAJOR} STREQUAL "7")
76+
message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})
77+
# nvcc flags
78+
set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
79+
-gencode arch=compute_35,code=sm_35;
80+
-gencode arch=compute_37,code=sm_37;
81+
-gencode arch=compute_50,code=sm_50;
82+
-gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
83+
-gencode arch=compute_53,code=sm_53;
3384
-O3; -Xcompiler -fPIC;
3485
)
86+
else ()
87+
message(FATAL_ERROR "unsupported CUDA_VERSION " ${CUDA_VERSION} ", please use a newer version (>=7.0) of CUDA toolkit!")
3588
endif()
3689

3790
set (SOURCE_FILES

0 commit comments

Comments
 (0)