@@ -104,12 +104,12 @@ function(select_nvcc_arch_flags out_variable)
104
104
elseif (${CUDA_ARCH_NAME} STREQUAL "Pascal" )
105
105
set (cuda_arch_bin "60 61" )
106
106
elseif (${CUDA_ARCH_NAME} STREQUAL "Volta" )
107
- if (NOT ${CMAKE_CUDA_COMPILER_VERSION } LESS 10.0 )
107
+ if (NOT ${CUDA_VERSION } LESS 10.0 )
108
108
add_definitions ("-DSUPPORTS_CUDA_FP16" )
109
109
endif ()
110
110
set (cuda_arch_bin "70" )
111
111
elseif (${CUDA_ARCH_NAME} STREQUAL "Turing" )
112
- if (NOT ${CMAKE_CUDA_COMPILER_VERSION } LESS 10.0 )
112
+ if (NOT ${CUDA_VERSION } LESS 10.0 )
113
113
add_definitions ("-DSUPPORTS_CUDA_FP16" )
114
114
endif ()
115
115
set (cuda_arch_bin "75" )
@@ -142,52 +142,52 @@ function(select_nvcc_arch_flags out_variable)
142
142
foreach (arch ${cuda_arch_bin} )
143
143
if (arch MATCHES "([0-9]+)\\ (([0-9]+)\\ )" )
144
144
# User explicitly specified PTX for the concrete BIN
145
- string (APPEND nvcc_flags " -gencode arch=compute_${CMAKE_MATCH_2} ,code=sm_${CMAKE_MATCH_1} " )
146
- string (APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1} " )
145
+ list (APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1} )
146
+ list (APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1} )
147
147
else ()
148
148
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
149
- string (APPEND nvcc_flags " -gencode arch=compute_${arch} ,code=sm_${arch} " )
150
- string (APPEND nvcc_archs_readable " sm_${arch} " )
149
+ list (APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch} )
150
+ list (APPEND nvcc_archs_readable sm_${arch} )
151
151
endif ()
152
152
endforeach ()
153
153
154
154
# Tell NVCC to add PTX intermediate code for the specified architectures
155
155
foreach (arch ${cuda_arch_ptx} )
156
- string (APPEND nvcc_flags " -gencode arch=compute_${arch} ,code=compute_${arch} " )
157
- string (APPEND nvcc_archs_readable " compute_${arch} " )
156
+ list (APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch} )
157
+ list (APPEND nvcc_archs_readable compute_${arch} )
158
158
endforeach ()
159
159
160
160
string (REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable} " )
161
161
set (${out_variable} ${nvcc_flags} PARENT_SCOPE )
162
162
set (${out_variable} _readable ${nvcc_archs_readable} PARENT_SCOPE )
163
163
endfunction ()
164
164
165
- message (STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION } )
166
- if (${CMAKE_CUDA_COMPILER_VERSION } LESS 7.0 )
165
+ message (STATUS "CUDA detected: " ${CUDA_VERSION } )
166
+ if (${CUDA_VERSION } LESS 7.0 )
167
167
set (paddle_known_gpu_archs ${paddle_known_gpu_archs} )
168
- elseif (${CMAKE_CUDA_COMPILER_VERSION } LESS 8.0 ) # CUDA 7.x
168
+ elseif (${CUDA_VERSION } LESS 8.0 ) # CUDA 7.x
169
169
set (paddle_known_gpu_archs ${paddle_known_gpu_archs7} )
170
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
171
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
172
- elseif (${CMAKE_CUDA_COMPILER_VERSION } LESS 9.0 ) # CUDA 8.x
170
+ list ( APPEND CUDA_NVCC_FLAGS " -D_MWAITXINTRIN_H_INCLUDED" )
171
+ list ( APPEND CUDA_NVCC_FLAGS " -D__STRICT_ANSI__" )
172
+ elseif (${CUDA_VERSION } LESS 9.0 ) # CUDA 8.x
173
173
set (paddle_known_gpu_archs ${paddle_known_gpu_archs8} )
174
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
175
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
174
+ list ( APPEND CUDA_NVCC_FLAGS " -D_MWAITXINTRIN_H_INCLUDED" )
175
+ list ( APPEND CUDA_NVCC_FLAGS " -D__STRICT_ANSI__" )
176
176
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
177
177
# warning for now.
178
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets" )
179
- elseif (${CMAKE_CUDA_COMPILER_VERSION } LESS 10.0 ) # CUDA 9.x
178
+ list ( APPEND CUDA_NVCC_FLAGS " -Wno-deprecated-gpu-targets" )
179
+ elseif (${CUDA_VERSION } LESS 10.0 ) # CUDA 9.x
180
180
set (paddle_known_gpu_archs ${paddle_known_gpu_archs9} )
181
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
182
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
183
- elseif (${CMAKE_CUDA_COMPILER_VERSION } LESS 11.0 ) # CUDA 10.x
181
+ list ( APPEND CUDA_NVCC_FLAGS " -D_MWAITXINTRIN_H_INCLUDED" )
182
+ list ( APPEND CUDA_NVCC_FLAGS " -D__STRICT_ANSI__" )
183
+ elseif (${CUDA_VERSION } LESS 11.0 ) # CUDA 10.x
184
184
set (paddle_known_gpu_archs ${paddle_known_gpu_archs10} )
185
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED" )
186
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__" )
185
+ list ( APPEND CUDA_NVCC_FLAGS " -D_MWAITXINTRIN_H_INCLUDED" )
186
+ list ( APPEND CUDA_NVCC_FLAGS " -D__STRICT_ANSI__" )
187
187
endif ()
188
+ add_definitions ("-DPADDLE_CUDA_BINVER=\" ${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR} \" " )
188
189
189
- message (STATUS "PADDLE_CUDA_BINVER=${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR} " )
190
-
190
+ include_directories (${CUDA_INCLUDE_DIRS} )
191
191
if (NOT WITH_DSO )
192
192
if (WIN32 )
193
193
set_property (GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${CUDA_cusolver_LIBRARY} )
@@ -196,24 +196,37 @@ endif(NOT WITH_DSO)
196
196
197
197
# setting nvcc arch flags
198
198
select_nvcc_arch_flags (NVCC_FLAGS_EXTRA )
199
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA} " )
200
- message (STATUS "NVCC_FLAGS_EXTRA : ${NVCC_FLAGS_EXTRA } " )
199
+ list ( APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA} )
200
+ message (STATUS "Added CUDA NVCC flags for : ${NVCC_FLAGS_EXTRA_readable } " )
201
201
202
202
# Set C++11 support
203
203
set (CUDA_PROPAGATE_HOST_FLAGS OFF )
204
+
204
205
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
205
206
# So, don't set these flags here.
206
- if (NOT WIN32 ) # windows msvc2015 support c++11 natively.
207
- # -std=c++11 -fPIC not recoginize by msvc, -Xcompiler will be added by cmake.
208
- set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++11" )
207
+ if (NOT WIN32 ) # windows msvc2015 support c++11 natively.
208
+ # -std=c++11 -fPIC not recoginize by msvc, -Xcompiler will be added by cmake.
209
+ list (APPEND CUDA_NVCC_FLAGS "-std=c++11" )
210
+ list (APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC" )
209
211
endif (NOT WIN32 )
210
212
211
- # in cuda9, suppress cuda warning on eigen
212
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} -w" )
213
+ # in cuda9, suppress cuda warning on eigen
214
+ list ( APPEND CUDA_NVCC_FLAGS " -w" )
213
215
# Set :expt-relaxed-constexpr to suppress Eigen warnings
214
- set ( CMAKE_CUDA_FLAGS " ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr" )
216
+ list ( APPEND CUDA_NVCC_FLAGS " --expt-relaxed-constexpr" )
215
217
216
- if (WIN32 )
218
+ if (NOT WIN32 )
219
+ if (CMAKE_BUILD_TYPE STREQUAL "Debug" )
220
+ list (APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG} )
221
+ elseif (CMAKE_BUILD_TYPE STREQUAL "Release" )
222
+ list (APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE} )
223
+ elseif (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" )
224
+ list (APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO} )
225
+ elseif (CMAKE_BUILD_TYPE STREQUAL "MinSizeRel" )
226
+ # nvcc 9 does not support -Os. Use Release flags instead
227
+ list (APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE} )
228
+ endif ()
229
+ else (NOT WIN32 )
217
230
list (APPEND CUDA_NVCC_FLAGS "-Xcompiler \" /wd 4244 /wd 4267 /wd 4819\" " )
218
231
list (APPEND CUDA_NVCC_FLAGS "--compiler-options;/bigobj" )
219
232
if (CMAKE_BUILD_TYPE STREQUAL "Debug" )
@@ -223,9 +236,9 @@ if (WIN32)
223
236
elseif (CMAKE_BUILD_TYPE STREQUAL "Release" )
224
237
list (APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG" )
225
238
else ()
226
- message (FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build." )
227
- endif ()
228
- endif (WIN32 )
239
+ message (FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build." )
240
+ endif ()
241
+ endif (NOT WIN32 )
229
242
230
243
mark_as_advanced (CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD )
231
244
mark_as_advanced (CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION )
0 commit comments