Skip to content

Commit 5487bca

Browse files
committed
Merge pull request opencv#17777 from alalek:backport_cuda_cmake
2 parents b90a2a8 + e0f9eac commit 5487bca

File tree

3 files changed

+143
-30
lines changed

3 files changed

+143
-30
lines changed

cmake/OpenCVDetectCUDA.cmake

Lines changed: 143 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
if(WIN32 AND NOT MSVC)
1+
if((WIN32 AND NOT MSVC) OR OPENCV_CMAKE_FORCE_CUDA)
22
message(STATUS "CUDA compilation is disabled (due to only Visual Studio compiler supported on your platform).")
33
return()
44
endif()
55

6-
if(NOT UNIX AND CV_CLANG)
6+
if((NOT UNIX AND CV_CLANG) OR OPENCV_CMAKE_FORCE_CUDA)
77
message(STATUS "CUDA compilation is disabled (due to Clang unsupported on your platform).")
88
return()
99
endif()
1010

11+
#set(OPENCV_CMAKE_CUDA_DEBUG 1)
1112

1213
if(((NOT CMAKE_VERSION VERSION_LESS "3.9.0") # requires https://gitlab.kitware.com/cmake/cmake/merge_requests/663
1314
OR OPENCV_CUDA_FORCE_EXTERNAL_CMAKE_MODULE)
@@ -43,7 +44,7 @@ if(CUDA_FOUND)
4344
endif()
4445

4546
if(WITH_NVCUVID)
46-
macro(SEARCH_NVCUVID_HEADER _filename _result)
47+
macro(ocv_cuda_SEARCH_NVCUVID_HEADER _filename _result)
4748
# place header file under CUDA_TOOLKIT_TARGET_DIR or CUDA_TOOLKIT_ROOT_DIR
4849
find_path(_header_result
4950
${_filename}
@@ -60,8 +61,8 @@ if(CUDA_FOUND)
6061
endif()
6162
unset(_header_result CACHE)
6263
endmacro()
63-
SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER)
64-
SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER)
64+
ocv_cuda_SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER)
65+
ocv_cuda_SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER)
6566
find_cuda_helper_libs(nvcuvid)
6667
if(WIN32)
6768
find_cuda_helper_libs(nvcuvenc)
@@ -102,32 +103,89 @@ if(CUDA_FOUND)
102103
unset(CUDA_ARCH_PTX CACHE)
103104
endif()
104105

106+
if(OPENCV_CUDA_DETECTION_NVCC_FLAGS MATCHES "-ccbin")
107+
# already specified by user
108+
elseif(CUDA_HOST_COMPILER AND EXISTS "${CUDA_HOST_COMPILER}")
109+
LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${CUDA_HOST_COMPILER}")
110+
elseif(WIN32 AND CMAKE_LINKER) # Workaround for VS cl.exe not being in the env. path
111+
get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY)
112+
LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${host_compiler_bindir}")
113+
else()
114+
if(CUDA_HOST_COMPILER)
115+
message(STATUS "CUDA: CUDA_HOST_COMPILER='${CUDA_HOST_COMPILER}' is not valid, autodetection may not work. Specify OPENCV_CUDA_DETECTION_NVCC_FLAGS with -ccbin option for fix that")
116+
endif()
117+
endif()
118+
105119
macro(ocv_filter_available_architecture result_list)
106-
if(DEFINED CUDA_SUPPORTED_CC)
107-
set(${result_list} "${CUDA_SUPPORTED_CC}")
120+
set(__cache_key_check "${ARGN} : ${CUDA_NVCC_EXECUTABLE} ${OPENCV_CUDA_DETECTION_NVCC_FLAGS}")
121+
if(DEFINED OPENCV_CACHE_CUDA_SUPPORTED_CC AND OPENCV_CACHE_CUDA_SUPPORTED_CC_check STREQUAL __cache_key_check)
122+
set(${result_list} "${OPENCV_CACHE_CUDA_SUPPORTED_CC}")
108123
else()
109124
set(CC_LIST ${ARGN})
110125
foreach(target_arch ${CC_LIST})
111126
string(REPLACE "." "" target_arch_short "${target_arch}")
112127
set(NVCC_OPTION "-gencode;arch=compute_${target_arch_short},code=sm_${target_arch_short}")
113-
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu"
114-
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
115-
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
116-
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
128+
set(_cmd "${CUDA_NVCC_EXECUTABLE}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" --compile)
129+
execute_process(
130+
COMMAND ${_cmd}
131+
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
132+
RESULT_VARIABLE _nvcc_res
133+
OUTPUT_VARIABLE _nvcc_out
134+
ERROR_VARIABLE _nvcc_err
135+
#ERROR_QUIET
136+
OUTPUT_STRIP_TRAILING_WHITESPACE
137+
)
138+
if(OPENCV_CMAKE_CUDA_DEBUG)
139+
message(WARNING "COMMAND: ${_cmd}")
140+
message(STATUS "Result: ${_nvcc_res}")
141+
message(STATUS "Out: ${_nvcc_out}")
142+
message(STATUS "Err: ${_nvcc_err}")
143+
endif()
117144
if(_nvcc_res EQUAL 0)
118-
set(${result_list} "${${result_list}} ${target_arch}")
145+
LIST(APPEND ${result_list} "${target_arch}")
119146
endif()
120147
endforeach()
121148
string(STRIP "${${result_list}}" ${result_list})
122-
set(CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "List of supported compute capability")
149+
if(" ${${result_list}}" STREQUAL " ")
150+
message(WARNING "CUDA: Autodetection arch list is empty. Please enable OPENCV_CMAKE_CUDA_DEBUG=1 and check/specify OPENCV_CUDA_DETECTION_NVCC_FLAGS variable")
151+
endif()
152+
153+
# cache detected values
154+
set(OPENCV_CACHE_CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "")
155+
set(OPENCV_CACHE_CUDA_SUPPORTED_CC_check "${__cache_key_check}" CACHE INTERNAL "")
123156
endif()
124157
endmacro()
125158

126159
macro(ocv_detect_native_cuda_arch status output)
127-
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
128-
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
129-
RESULT_VARIABLE ${status} OUTPUT_VARIABLE ${output}
130-
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
160+
set(OPENCV_CUDA_DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run")
161+
set(__cache_key_check "${OPENCV_CUDA_DETECT_ARCHS_COMMAND}")
162+
if(DEFINED OPENCV_CACHE_CUDA_ACTIVE_CC AND OPENCV_CACHE_CUDA_ACTIVE_CC_check STREQUAL __cache_key_check)
163+
set(${output} "${OPENCV_CACHE_CUDA_ACTIVE_CC}")
164+
set(${status} 0)
165+
else()
166+
execute_process(
167+
COMMAND ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}
168+
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
169+
RESULT_VARIABLE ${status}
170+
OUTPUT_VARIABLE _nvcc_out
171+
ERROR_VARIABLE _nvcc_err
172+
ERROR_QUIET
173+
OUTPUT_STRIP_TRAILING_WHITESPACE
174+
)
175+
if(OPENCV_CMAKE_CUDA_DEBUG)
176+
message(WARNING "COMMAND: ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}")
177+
message(STATUS "Result: ${${status}}")
178+
message(STATUS "Out: ${_nvcc_out}")
179+
message(STATUS "Err: ${_nvcc_err}")
180+
endif()
181+
string(REGEX REPLACE ".*\n" "" ${output} "${_nvcc_out}") #Strip leading warning messages, if any
182+
183+
if(${status} EQUAL 0)
184+
# cache detected values
185+
set(OPENCV_CACHE_CUDA_ACTIVE_CC ${${result_list}} CACHE INTERNAL "")
186+
set(OPENCV_CACHE_CUDA_ACTIVE_CC_check "${__cache_key_check}" CACHE INTERNAL "")
187+
endif()
188+
endif()
131189
endmacro()
132190

133191
macro(ocv_wipeout_deprecated _arch_bin_list)
@@ -156,14 +214,21 @@ if(CUDA_FOUND)
156214
else()
157215
string(REGEX MATCHALL "[0-9]+\\.[0-9]" __cuda_arch_bin "${_nvcc_out}")
158216
endif()
217+
elseif(CUDA_ARCH_BIN)
218+
message(STATUS "CUDA: Using CUDA_ARCH_BIN=${CUDA_ARCH_BIN}")
219+
set(__cuda_arch_bin ${CUDA_ARCH_BIN})
159220
endif()
160221

161222
if(NOT DEFINED __cuda_arch_bin)
162223
if(ARM)
163224
set(__cuda_arch_bin "3.2")
164225
set(__cuda_arch_ptx "")
165226
elseif(AARCH64)
166-
ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
227+
if(NOT CMAKE_CROSSCOMPILING)
228+
ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
229+
else()
230+
set(_nvcc_res -1) # emulate error, see below
231+
endif()
167232
if(NOT _nvcc_res EQUAL 0)
168233
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
169234
# TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0)
@@ -197,11 +262,9 @@ if(CUDA_FOUND)
197262
string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}")
198263
string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
199264

200-
# Ckeck if user specified 1.0 compute capability: we don't support it
201-
string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}")
202-
set(CUDA_ARCH_BIN_OR_PTX_10 0)
203-
if(NOT ${HAS_ARCH_10} STREQUAL "")
204-
set(CUDA_ARCH_BIN_OR_PTX_10 1)
265+
# Check if user specified 1.0 compute capability: we don't support it
266+
if(" ${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}" MATCHES " 1.0")
267+
message(SEND_ERROR "CUDA: 1.0 compute capability is not supported - exclude it from ARCH/PTX list are re-run CMake")
205268
endif()
206269

207270
# NVCC flags to be set
@@ -312,6 +375,16 @@ if(CUDA_FOUND)
312375

313376
if(UNIX OR APPLE)
314377
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC)
378+
if(
379+
ENABLE_CXX11
380+
AND NOT " ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_DEBUG} ${CUDA_NVCC_FLAGS}" MATCHES "-std="
381+
)
382+
if(CUDA_VERSION VERSION_LESS "11.0")
383+
list(APPEND CUDA_NVCC_FLAGS "--std=c++11")
384+
else()
385+
list(APPEND CUDA_NVCC_FLAGS "--std=c++14")
386+
endif()
387+
endif()
315388
endif()
316389
if(APPLE)
317390
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only)
@@ -379,7 +452,53 @@ if(HAVE_CUDA)
379452
if(CMAKE_GENERATOR MATCHES "Visual Studio"
380453
AND NOT OPENCV_SKIP_CUDA_CMAKE_SUPPRESS_REGENERATION
381454
)
382-
message(WARNING "CUDA with MSVS generator is detected. Disabling CMake re-run checks (CMAKE_SUPPRESS_REGENERATION=ON). You need to run CMake manually if updates are required.")
455+
message(STATUS "CUDA: MSVS generator is detected. Disabling CMake re-run checks (CMAKE_SUPPRESS_REGENERATION=ON). You need to run CMake manually if updates are required.")
383456
set(CMAKE_SUPPRESS_REGENERATION ON)
384457
endif()
385458
endif()
459+
460+
461+
# ----------------------------------------------------------------------------
462+
# Add CUDA libraries (needed for apps/tools, samples)
463+
# ----------------------------------------------------------------------------
464+
if(HAVE_CUDA)
465+
# details: https://github.com/NVIDIA/nvidia-docker/issues/775
466+
if(" ${CUDA_CUDA_LIBRARY}" MATCHES "/stubs/libcuda.so" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND)
467+
set(CUDA_STUB_ENABLED_LINK_WORKAROUND 1)
468+
if(EXISTS "${CUDA_CUDA_LIBRARY}" AND NOT OPENCV_SKIP_CUDA_STUB_WORKAROUND_RPATH_LINK)
469+
set(CUDA_STUB_TARGET_PATH "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/")
470+
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink "${CUDA_CUDA_LIBRARY}" "${CUDA_STUB_TARGET_PATH}/libcuda.so.1"
471+
RESULT_VARIABLE CUDA_STUB_SYMLINK_RESULT)
472+
if(NOT CUDA_STUB_SYMLINK_RESULT EQUAL 0)
473+
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CUDA_CUDA_LIBRARY}" "${CUDA_STUB_TARGET_PATH}/libcuda.so.1"
474+
RESULT_VARIABLE CUDA_STUB_COPY_RESULT)
475+
if(NOT CUDA_STUB_COPY_RESULT EQUAL 0)
476+
set(CUDA_STUB_ENABLED_LINK_WORKAROUND 0)
477+
endif()
478+
endif()
479+
if(CUDA_STUB_ENABLED_LINK_WORKAROUND)
480+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath-link,\"${CUDA_STUB_TARGET_PATH}\"")
481+
endif()
482+
else()
483+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-shlib-undefined")
484+
endif()
485+
if(NOT CUDA_STUB_ENABLED_LINK_WORKAROUND)
486+
message(WARNING "CUDA: workaround for stubs/libcuda.so.1 is not applied")
487+
endif()
488+
endif()
489+
490+
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
491+
if(HAVE_CUBLAS)
492+
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_cublas_LIBRARY})
493+
endif()
494+
if(HAVE_CUFFT)
495+
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CUDA_cufft_LIBRARY})
496+
endif()
497+
foreach(p ${CUDA_LIBS_PATH})
498+
if(MSVC AND CMAKE_GENERATOR MATCHES "Ninja|JOM")
499+
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CMAKE_LIBRARY_PATH_FLAG}"${p}")
500+
else()
501+
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${CMAKE_LIBRARY_PATH_FLAG}${p})
502+
endif()
503+
endforeach()
504+
endif()

cmake/templates/cvconfig.h.in

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
/* Compile for 'real' NVIDIA GPU architectures */
1414
#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
1515

16-
/* Create PTX or BIN for 1.0 compute capability */
17-
#cmakedefine CUDA_ARCH_BIN_OR_PTX_10
18-
1916
/* NVIDIA GPU features are used */
2017
#define CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES}"
2118

modules/core/include/opencv2/core/private.cuda.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,6 @@
8282
# error "Insufficient Cuda Runtime library version, please update it."
8383
# endif
8484

85-
# if defined(CUDA_ARCH_BIN_OR_PTX_10)
86-
# error "OpenCV CUDA module doesn't support NVIDIA compute capability 1.0"
87-
# endif
8885
#endif
8986

9087
//! @cond IGNORED

0 commit comments

Comments
 (0)