@@ -14,14 +14,70 @@ SET(CMAKE_CXX_STANDARD 11)
1414SET (CMAKE_CUDA_STANDARD 11)
1515# nvcc -o libdeepmd_op_cuda.so -I/usr/local/cub-1.8.0 -rdc=true -DHIGH_PREC=true -gencode arch=compute_61,code=sm_61 -shared -Xcompiler -fPIC deepmd_op.cu -L/usr/local/cuda/lib64 -lcudadevrt
1616# very important here! Include path to cub.
17- include_directories (cub)
18- # nvcc flags
19- set (CUDA_NVCC_FLAGS -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
20- -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
21- -gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
22- -gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
23- -O3; -Xcompiler -fPIC;
24- )
17+ # for searching device compute capability, https://developer.nvidia.com/cuda-gpus
18+ include_directories (cub)
19+
20+ message (STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR} )
21+
22+ if (${CUDA_VERSION_MAJOR} GREATER "10" )
23+ # nvcc flags
24+ set (CUDA_NVCC_FLAGS -gencode arch=compute_50,code=sm_50;
25+ -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
26+ -gencode arch=compute_53,code=sm_53;
27+ -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
28+ -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
29+ -gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
30+ -gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
31+ -O3; -Xcompiler -fPIC;
32+ )
33+ elseif (${CUDA_VERSION_MAJOR} STREQUAL "10" )
34+ set (CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30; # Tesla K10, Quadro K600 K420 K410,
35+ -gencode arch=compute_35,code=sm_35; # Tesla K20 K40, TITAN Z Black, GTX 780Ti 780
36+ -gencode arch=compute_37,code=sm_37; # Tesla K80
37+ -gencode arch=compute_50,code=sm_50; # Quadro 620 1200
38+ -gencode arch=compute_52,code=sm_52; # Tesla M40 M40, Quadro M6000 M5000 M4000 M2000, TITAN X, GTX 980Ti 980 970 960 950
39+ -gencode arch=compute_53,code=sm_53; # Jetson TX1, Tegra X1
40+ -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
41+ -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
42+ -gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
43+ -gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
44+ -O3; -Xcompiler -fPIC;
45+ )
46+ elseif (${CUDA_VERSION_MAJOR} STREQUAL "9" )
47+ set (CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
48+ -gencode arch=compute_35,code=sm_35;
49+ -gencode arch=compute_37,code=sm_37;
50+ -gencode arch=compute_50,code=sm_50;
51+ -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
52+ -gencode arch=compute_53,code=sm_53;
53+ -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
54+ -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
55+ -gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
56+ -O3; -Xcompiler -fPIC;
57+ )
58+ elseif (${CUDA_VERSION_MAJOR} STREQUAL "8" )
59+ set (CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
60+ -gencode arch=compute_35,code=sm_35;
61+ -gencode arch=compute_37,code=sm_37;
62+ -gencode arch=compute_50,code=sm_50;
63+ -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
64+ -gencode arch=compute_53,code=sm_53;
65+ -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
66+ -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
67+ -O3; -Xcompiler -fPIC;
68+ )
69+ elseif (${CUDA_VERSION_MAJOR} STREQUAL "7" )
70+ set (CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
71+ -gencode arch=compute_35,code=sm_35;
72+ -gencode arch=compute_37,code=sm_37;
73+ -gencode arch=compute_50,code=sm_50;
74+ -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
75+ -gencode arch=compute_53,code=sm_53;
76+ -O3; -Xcompiler -fPIC;
77+ )
78+ else ()
79+ message (FATAL_ERROR "unsupported CUDA_VERSION " ${CUDA_VERSION} ", please use a newer version (>=7.0) of CUDA toolkit!" )
80+ endif ()
2581
2682set (SOURCE_FILES
2783 descrpt_se_a.cu descrpt_se_r.cu prod_force_se_a.cu prod_force_se_r.cu prod_virial_se_a.cu prod_virial_se_r.cu
0 commit comments