1
- if (WIN32 AND NOT MSVC )
1
+ if (( WIN32 AND NOT MSVC ) OR OPENCV_CMAKE_FORCE_CUDA )
2
2
message (STATUS "CUDA compilation is disabled (due to only Visual Studio compiler supported on your platform)." )
3
3
return ()
4
4
endif ()
5
5
6
- if (NOT UNIX AND CV_CLANG)
6
+ if (( NOT UNIX AND CV_CLANG) OR OPENCV_CMAKE_FORCE_CUDA )
7
7
message (STATUS "CUDA compilation is disabled (due to Clang unsupported on your platform)." )
8
8
return ()
9
9
endif ()
10
10
11
+ #set(OPENCV_CMAKE_CUDA_DEBUG 1)
11
12
12
13
if (((NOT CMAKE_VERSION VERSION_LESS "3.9.0" ) # requires https://gitlab.kitware.com/cmake/cmake/merge_requests/663
13
14
OR OPENCV_CUDA_FORCE_EXTERNAL_CMAKE_MODULE)
@@ -43,7 +44,7 @@ if(CUDA_FOUND)
43
44
endif ()
44
45
45
46
if (WITH_NVCUVID)
46
- macro (SEARCH_NVCUVID_HEADER _filename _result)
47
+ macro (ocv_cuda_SEARCH_NVCUVID_HEADER _filename _result)
47
48
# place header file under CUDA_TOOLKIT_TARGET_DIR or CUDA_TOOLKIT_ROOT_DIR
48
49
find_path (_header_result
49
50
${_filename}
@@ -60,8 +61,8 @@ if(CUDA_FOUND)
60
61
endif ()
61
62
unset (_header_result CACHE )
62
63
endmacro ()
63
- SEARCH_NVCUVID_HEADER ("nvcuvid.h" HAVE_NVCUVID_HEADER)
64
- SEARCH_NVCUVID_HEADER ("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER)
64
+ ocv_cuda_SEARCH_NVCUVID_HEADER ("nvcuvid.h" HAVE_NVCUVID_HEADER)
65
+ ocv_cuda_SEARCH_NVCUVID_HEADER ("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER)
65
66
find_cuda_helper_libs(nvcuvid)
66
67
if (WIN32 )
67
68
find_cuda_helper_libs(nvcuvenc)
@@ -102,32 +103,89 @@ if(CUDA_FOUND)
102
103
unset (CUDA_ARCH_PTX CACHE )
103
104
endif ()
104
105
106
+ if (OPENCV_CUDA_DETECTION_NVCC_FLAGS MATCHES "-ccbin" )
107
+ # already specified by user
108
+ elseif (CUDA_HOST_COMPILER AND EXISTS "${CUDA_HOST_COMPILER} " )
109
+ LIST (APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${CUDA_HOST_COMPILER} " )
110
+ elseif (WIN32 AND CMAKE_LINKER) # Workaround for VS cl.exe not being in the env. path
111
+ get_filename_component (host_compiler_bindir ${CMAKE_LINKER} DIRECTORY )
112
+ LIST (APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${host_compiler_bindir} " )
113
+ else ()
114
+ if (CUDA_HOST_COMPILER)
115
+ message (STATUS "CUDA: CUDA_HOST_COMPILER='${CUDA_HOST_COMPILER} ' is not valid, autodetection may not work. Specify OPENCV_CUDA_DETECTION_NVCC_FLAGS with -ccbin option for fix that" )
116
+ endif ()
117
+ endif ()
118
+
105
119
macro (ocv_filter_available_architecture result_list)
106
- if (DEFINED CUDA_SUPPORTED_CC)
107
- set (${result_list} "${CUDA_SUPPORTED_CC} " )
120
+ set (__cache_key_check "${ARGN} : ${CUDA_NVCC_EXECUTABLE} ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} " )
121
+ if (DEFINED OPENCV_CACHE_CUDA_SUPPORTED_CC AND OPENCV_CACHE_CUDA_SUPPORTED_CC_check STREQUAL __cache_key_check)
122
+ set (${result_list} "${OPENCV_CACHE_CUDA_SUPPORTED_CC} " )
108
123
else ()
109
124
set (CC_LIST ${ARGN} )
110
125
foreach (target_arch ${CC_LIST} )
111
126
string (REPLACE "." "" target_arch_short "${target_arch} " )
112
127
set (NVCC_OPTION "-gencode;arch=compute_${target_arch_short} ,code=sm_${target_arch_short} " )
113
- execute_process ( COMMAND "${CUDA_NVCC_EXECUTABLE} " ${NVCC_OPTION} "${OpenCV_SOURCE_DIR} /cmake/checks/OpenCVDetectCudaArch.cu"
114
- WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY} /CMakeTmp/"
115
- RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
116
- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
128
+ set (_cmd "${CUDA_NVCC_EXECUTABLE} " ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} ${NVCC_OPTION} "${OpenCV_SOURCE_DIR} /cmake/checks/OpenCVDetectCudaArch.cu" --compile)
129
+ execute_process (
130
+ COMMAND ${_cmd}
131
+ WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY} /CMakeTmp/"
132
+ RESULT_VARIABLE _nvcc_res
133
+ OUTPUT_VARIABLE _nvcc_out
134
+ ERROR_VARIABLE _nvcc_err
135
+ #ERROR_QUIET
136
+ OUTPUT_STRIP_TRAILING_WHITESPACE
137
+ )
138
+ if (OPENCV_CMAKE_CUDA_DEBUG)
139
+ message (WARNING "COMMAND: ${_cmd} " )
140
+ message (STATUS "Result: ${_nvcc_res} " )
141
+ message (STATUS "Out: ${_nvcc_out} " )
142
+ message (STATUS "Err: ${_nvcc_err} " )
143
+ endif ()
117
144
if (_nvcc_res EQUAL 0)
118
- set ( ${result_list} "${ ${ result_list}} ${target_arch} " )
145
+ LIST ( APPEND ${ result_list} " ${target_arch} " )
119
146
endif ()
120
147
endforeach ()
121
148
string (STRIP "${${result_list} }" ${result_list} )
122
- set (CUDA_SUPPORTED_CC ${${result_list} } CACHE INTERNAL "List of supported compute capability" )
149
+ if (" ${${result_list} }" STREQUAL " " )
150
+ message (WARNING "CUDA: Autodetection arch list is empty. Please enable OPENCV_CMAKE_CUDA_DEBUG=1 and check/specify OPENCV_CUDA_DETECTION_NVCC_FLAGS variable" )
151
+ endif ()
152
+
153
+ # cache detected values
154
+ set (OPENCV_CACHE_CUDA_SUPPORTED_CC ${${result_list} } CACHE INTERNAL "" )
155
+ set (OPENCV_CACHE_CUDA_SUPPORTED_CC_check "${__cache_key_check} " CACHE INTERNAL "" )
123
156
endif ()
124
157
endmacro ()
125
158
126
159
macro (ocv_detect_native_cuda_arch status output )
127
- execute_process ( COMMAND "${CUDA_NVCC_EXECUTABLE} " ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR} /cmake/checks/OpenCVDetectCudaArch.cu" "--run"
128
- WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY} /CMakeTmp/"
129
- RESULT_VARIABLE ${status} OUTPUT_VARIABLE ${output}
130
- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
160
+ set (OPENCV_CUDA_DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE} " ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} "${OpenCV_SOURCE_DIR} /cmake/checks/OpenCVDetectCudaArch.cu" "--run" )
161
+ set (__cache_key_check "${OPENCV_CUDA_DETECT_ARCHS_COMMAND} " )
162
+ if (DEFINED OPENCV_CACHE_CUDA_ACTIVE_CC AND OPENCV_CACHE_CUDA_ACTIVE_CC_check STREQUAL __cache_key_check)
163
+ set (${output} "${OPENCV_CACHE_CUDA_ACTIVE_CC} " )
164
+ set (${status} 0)
165
+ else ()
166
+ execute_process (
167
+ COMMAND ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}
168
+ WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY} /CMakeTmp/"
169
+ RESULT_VARIABLE ${status}
170
+ OUTPUT_VARIABLE _nvcc_out
171
+ ERROR_VARIABLE _nvcc_err
172
+ ERROR_QUIET
173
+ OUTPUT_STRIP_TRAILING_WHITESPACE
174
+ )
175
+ if (OPENCV_CMAKE_CUDA_DEBUG)
176
+ message (WARNING "COMMAND: ${OPENCV_CUDA_DETECT_ARCHS_COMMAND} " )
177
+ message (STATUS "Result: ${${status} }" )
178
+ message (STATUS "Out: ${_nvcc_out} " )
179
+ message (STATUS "Err: ${_nvcc_err} " )
180
+ endif ()
181
+ string (REGEX REPLACE ".*\n " "" ${output} "${_nvcc_out} " ) #Strip leading warning messages, if any
182
+
183
+ if (${status} EQUAL 0)
184
+ # cache detected values
185
+ set (OPENCV_CACHE_CUDA_ACTIVE_CC ${${result_list} } CACHE INTERNAL "" )
186
+ set (OPENCV_CACHE_CUDA_ACTIVE_CC_check "${__cache_key_check} " CACHE INTERNAL "" )
187
+ endif ()
188
+ endif ()
131
189
endmacro ()
132
190
133
191
macro (ocv_wipeout_deprecated _arch_bin_list)
@@ -156,14 +214,21 @@ if(CUDA_FOUND)
156
214
else ()
157
215
string (REGEX MATCHALL "[0-9]+\\ .[0-9]" __cuda_arch_bin "${_nvcc_out} " )
158
216
endif ()
217
+ elseif (CUDA_ARCH_BIN)
218
+ message (STATUS "CUDA: Using CUDA_ARCH_BIN=${CUDA_ARCH_BIN} " )
219
+ set (__cuda_arch_bin ${CUDA_ARCH_BIN} )
159
220
endif ()
160
221
161
222
if (NOT DEFINED __cuda_arch_bin)
162
223
if (ARM)
163
224
set (__cuda_arch_bin "3.2" )
164
225
set (__cuda_arch_ptx "" )
165
226
elseif (AARCH64)
166
- ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
227
+ if (NOT CMAKE_CROSSCOMPILING )
228
+ ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
229
+ else ()
230
+ set (_nvcc_res -1) # emulate error, see below
231
+ endif ()
167
232
if (NOT _nvcc_res EQUAL 0)
168
233
message (STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures." )
169
234
# TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0)
@@ -197,11 +262,9 @@ if(CUDA_FOUND)
197
262
string (REGEX REPLACE "\\ ." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN} " )
198
263
string (REGEX REPLACE "\\ ." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX} " )
199
264
200
- # Ckeck if user specified 1.0 compute capability: we don't support it
201
- string (REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX} " )
202
- set (CUDA_ARCH_BIN_OR_PTX_10 0)
203
- if (NOT ${HAS_ARCH_10} STREQUAL "" )
204
- set (CUDA_ARCH_BIN_OR_PTX_10 1)
265
+ # Check if user specified 1.0 compute capability: we don't support it
266
+ if (" ${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX} " MATCHES " 1.0" )
267
+ message (SEND_ERROR "CUDA: 1.0 compute capability is not supported - exclude it from ARCH/PTX list are re-run CMake" )
205
268
endif ()
206
269
207
270
# NVCC flags to be set
@@ -312,6 +375,16 @@ if(CUDA_FOUND)
312
375
313
376
if (UNIX OR APPLE )
314
377
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC)
378
+ if (
379
+ ENABLE_CXX11
380
+ AND NOT " ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_DEBUG} ${CUDA_NVCC_FLAGS} " MATCHES "-std="
381
+ )
382
+ if (CUDA_VERSION VERSION_LESS "11.0" )
383
+ list (APPEND CUDA_NVCC_FLAGS "--std=c++11" )
384
+ else ()
385
+ list (APPEND CUDA_NVCC_FLAGS "--std=c++14" )
386
+ endif ()
387
+ endif ()
315
388
endif ()
316
389
if (APPLE )
317
390
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
@@ -379,7 +452,53 @@ if(HAVE_CUDA)
379
452
if (CMAKE_GENERATOR MATCHES "Visual Studio"
380
453
AND NOT OPENCV_SKIP_CUDA_CMAKE_SUPPRESS_REGENERATION
381
454
)
382
- message (WARNING "CUDA with MSVS generator is detected. Disabling CMake re-run checks (CMAKE_SUPPRESS_REGENERATION=ON). You need to run CMake manually if updates are required." )
455
+ message (STATUS "CUDA: MSVS generator is detected. Disabling CMake re-run checks (CMAKE_SUPPRESS_REGENERATION=ON). You need to run CMake manually if updates are required." )
383
456
set (CMAKE_SUPPRESS_REGENERATION ON )
384
457
endif ()
385
458
endif ()
459
+
460
+
461
+ # ----------------------------------------------------------------------------
462
+ # Add CUDA libraries (needed for apps/tools, samples)
463
+ # ----------------------------------------------------------------------------
464
+ if (HAVE_CUDA)
465
+ # details: https://github.com/NVIDIA/nvidia-docker/issues/775
466
+ if (" ${CUDA_CUDA_LIBRARY} " MATCHES "/stubs/libcuda.so" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND)
467
+ set (CUDA_STUB_ENABLED_LINK_WORKAROUND 1)
468
+ if (EXISTS "${CUDA_CUDA_LIBRARY} " AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND_RPATH_LINK)
469
+ set (CUDA_STUB_TARGET_PATH "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY} /" )
470
+ execute_process (COMMAND ${CMAKE_COMMAND} -E create_symlink "${CUDA_CUDA_LIBRARY} " "${CUDA_STUB_TARGET_PATH} /libcuda.so.1"
471
+ RESULT_VARIABLE CUDA_STUB_SYMLINK_RESULT)
472
+ if (NOT CUDA_STUB_SYMLINK_RESULT EQUAL 0)
473
+ execute_process (COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_CUDA_LIBRARY} " "${CUDA_STUB_TARGET_PATH} /libcuda.so.1"
474
+ RESULT_VARIABLE CUDA_STUB_COPY_RESULT)
475
+ if (NOT CUDA_STUB_COPY_RESULT EQUAL 0)
476
+ set (CUDA_STUB_ENABLED_LINK_WORKAROUND 0)
477
+ endif ()
478
+ endif ()
479
+ if (CUDA_STUB_ENABLED_LINK_WORKAROUND)
480
+ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath-link,\" ${CUDA_STUB_TARGET_PATH} \" " )
481
+ endif ()
482
+ else ()
483
+ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-shlib-undefined" )
484
+ endif ()
485
+ if (NOT CUDA_STUB_ENABLED_LINK_WORKAROUND)
486
+ message (WARNING "CUDA: workaround for stubs/libcuda.so.1 is not applied" )
487
+ endif ()
488
+ endif ()
489
+
490
+ set (OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY} )
491
+ if (HAVE_CUBLAS)
492
+ set (OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_cublas_LIBRARY} )
493
+ endif ()
494
+ if (HAVE_CUFFT)
495
+ set (OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_cufft_LIBRARY} )
496
+ endif ()
497
+ foreach (p ${CUDA_LIBS_PATH} )
498
+ if (MSVC AND CMAKE_GENERATOR MATCHES "Ninja|JOM" )
499
+ set (OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CMAKE_LIBRARY_PATH_FLAG} "${p} " )
500
+ else ()
501
+ set (OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CMAKE_LIBRARY_PATH_FLAG}${p} )
502
+ endif ()
503
+ endforeach ()
504
+ endif ()
0 commit comments