Skip to content

Commit bb39b15

Browse files
authored
Merge pull request #148 from denghuilu/devel-submit
fix bug of "unsupported architecture compute_75" when use CUDA-9.0
2 parents 33bc642 + db582f3 commit bb39b15

File tree

3 files changed

+64
-36
lines changed

3 files changed

+64
-36
lines changed

source/op/cuda/CMakeLists.txt

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,70 @@ SET(CMAKE_CXX_STANDARD 11)
1414
SET(CMAKE_CUDA_STANDARD 11)
1515
# nvcc -o libdeepmd_op_cuda.so -I/usr/local/cub-1.8.0 -rdc=true -DHIGH_PREC=true -gencode arch=compute_61,code=sm_61 -shared -Xcompiler -fPIC deepmd_op.cu -L/usr/local/cuda/lib64 -lcudadevrt
1616
# very important here! Include path to cub.
17-
include_directories(cub)
18-
# nvcc flags
19-
set(CUDA_NVCC_FLAGS -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
20-
-gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
21-
-gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
22-
-gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
23-
-O3; -Xcompiler -fPIC;
24-
)
17+
# for searching device compute capability, https://developer.nvidia.com/cuda-gpus
18+
include_directories(cub)
19+
20+
message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})
21+
22+
if (${CUDA_VERSION_MAJOR} GREATER "10")
23+
# nvcc flags
24+
set(CUDA_NVCC_FLAGS -gencode arch=compute_50,code=sm_50;
25+
-gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
26+
-gencode arch=compute_53,code=sm_53;
27+
-gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
28+
-gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
29+
-gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
30+
-gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
31+
-O3; -Xcompiler -fPIC;
32+
)
33+
elseif (${CUDA_VERSION_MAJOR} STREQUAL "10")
34+
set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30; # Tesla K10, Quadro K600 K420 K410,
35+
-gencode arch=compute_35,code=sm_35; # Tesla K20 K40, TITAN Z Black, GTX 780Ti 780
36+
-gencode arch=compute_37,code=sm_37; # Tesla K80
37+
-gencode arch=compute_50,code=sm_50; # Quadro 620 1200
38+
-gencode arch=compute_52,code=sm_52; # Tesla M40 M40, Quadro M6000 M5000 M4000 M2000, TITAN X, GTX 980Ti 980 970 960 950
39+
-gencode arch=compute_53,code=sm_53; # Jetson TX1, Tegra X1
40+
-gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
41+
-gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
42+
-gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
43+
-gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
44+
-O3; -Xcompiler -fPIC;
45+
)
46+
elseif (${CUDA_VERSION_MAJOR} STREQUAL "9")
47+
set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
48+
-gencode arch=compute_35,code=sm_35;
49+
-gencode arch=compute_37,code=sm_37;
50+
-gencode arch=compute_50,code=sm_50;
51+
-gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
52+
-gencode arch=compute_53,code=sm_53;
53+
-gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
54+
-gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
55+
-gencode arch=compute_70,code=sm_70; # Volta - GV100/Tesla V100, GTX 1180 (GV104)
56+
-O3; -Xcompiler -fPIC;
57+
)
58+
elseif (${CUDA_VERSION_MAJOR} STREQUAL "8")
59+
set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
60+
-gencode arch=compute_35,code=sm_35;
61+
-gencode arch=compute_37,code=sm_37;
62+
-gencode arch=compute_50,code=sm_50;
63+
-gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
64+
-gencode arch=compute_53,code=sm_53;
65+
-gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
66+
-gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
67+
-O3; -Xcompiler -fPIC;
68+
)
69+
elseif (${CUDA_VERSION_MAJOR} STREQUAL "7")
70+
set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
71+
-gencode arch=compute_35,code=sm_35;
72+
-gencode arch=compute_37,code=sm_37;
73+
-gencode arch=compute_50,code=sm_50;
74+
-gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
75+
-gencode arch=compute_53,code=sm_53;
76+
-O3; -Xcompiler -fPIC;
77+
)
78+
else ()
79+
message(FATAL_ERROR "unsupported CUDA_VERSION " ${CUDA_VERSION} ", please use a newer version (>=7.0) of CUDA toolkit!")
80+
endif()
2581

2682
set (SOURCE_FILES
2783
descrpt_se_a.cu descrpt_se_r.cu prod_force_se_a.cu prod_force_se_r.cu prod_virial_se_a.cu prod_virial_se_r.cu

source/op/cuda/descrpt_se_a.cu

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,20 +40,6 @@ inline void cudaAssert(cudaError_t code, const char *file, int line, bool abort=
4040
}
4141
}
4242

43-
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
44-
static __inline__ __device__ double atomicAdd(double* address, double val) {
45-
unsigned long long int* address_as_ull = (unsigned long long int*)address;
46-
unsigned long long int old = *address_as_ull, assumed;
47-
do {
48-
assumed = old;
49-
old = atomicCAS(address_as_ull, assumed,
50-
__double_as_longlong(val + __longlong_as_double(assumed)));
51-
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) } while (assumed != old);
52-
} while (assumed != old);
53-
return __longlong_as_double(old);
54-
}
55-
#endif
56-
5743
template <
5844
typename Key,
5945
int BLOCK_THREADS,

source/op/cuda/descrpt_se_r.cu

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,20 +41,6 @@ inline void cudaAssert(cudaError_t code, const char *file, int line, bool abort=
4141
}
4242
}
4343

44-
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
45-
static __inline__ __device__ double atomicAdd(double* address, double val) {
46-
unsigned long long int* address_as_ull = (unsigned long long int*)address;
47-
unsigned long long int old = *address_as_ull, assumed;
48-
do {
49-
assumed = old;
50-
old = atomicCAS(address_as_ull, assumed,
51-
__double_as_longlong(val + __longlong_as_double(assumed)));
52-
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) } while (assumed != old);
53-
} while (assumed != old);
54-
return __longlong_as_double(old);
55-
}
56-
#endif
57-
5844
template <
5945
typename Key,
6046
int BLOCK_THREADS,

0 commit comments

Comments
 (0)