Skip to content

Commit 52ac366

Browse files
committed
Merge pull request opencv#17745 from alalek:cuda_fix_ccbin
2 parents 524a2ff + 8bf1b9a commit 52ac366

File tree

3 files changed

+84
-42
lines changed

3 files changed

+84
-42
lines changed

cmake/OpenCVDetectCUDA.cmake

Lines changed: 84 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
1-
if(WIN32 AND NOT MSVC)
1+
if((WIN32 AND NOT MSVC) OR OPENCV_CMAKE_FORCE_CUDA)
22
message(STATUS "CUDA compilation is disabled (due to only Visual Studio compiler supported on your platform).")
33
return()
44
endif()
55

6-
if(NOT UNIX AND CV_CLANG)
6+
if((NOT UNIX AND CV_CLANG) OR OPENCV_CMAKE_FORCE_CUDA)
77
message(STATUS "CUDA compilation is disabled (due to Clang unsupported on your platform).")
88
return()
99
endif()
1010

11-
if(CUDA_HOST_COMPILER)
12-
# respect the CUDA_HOST_COMPILER if specified manually
13-
set(PREFERRED_CUDA_HOST_COMPILER "${CUDA_HOST_COMPILER}")
14-
endif()
11+
#set(OPENCV_CMAKE_CUDA_DEBUG 1)
12+
1513
if(((NOT CMAKE_VERSION VERSION_LESS "3.9.0") # requires https://gitlab.kitware.com/cmake/cmake/merge_requests/663
1614
OR OPENCV_CUDA_FORCE_EXTERNAL_CMAKE_MODULE)
1715
AND NOT OPENCV_CUDA_FORCE_BUILTIN_CMAKE_MODULE)
@@ -56,7 +54,7 @@ if(CUDA_FOUND)
5654
endif()
5755

5856
if(WITH_NVCUVID)
59-
macro(SEARCH_NVCUVID_HEADER _filename _result)
57+
macro(ocv_cuda_SEARCH_NVCUVID_HEADER _filename _result)
6058
# place header file under CUDA_TOOLKIT_TARGET_DIR or CUDA_TOOLKIT_ROOT_DIR
6159
find_path(_header_result
6260
${_filename}
@@ -73,8 +71,8 @@ if(CUDA_FOUND)
7371
endif()
7472
unset(_header_result CACHE)
7573
endmacro()
76-
SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER)
77-
SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER)
74+
ocv_cuda_SEARCH_NVCUVID_HEADER("nvcuvid.h" HAVE_NVCUVID_HEADER)
75+
ocv_cuda_SEARCH_NVCUVID_HEADER("dynlink_nvcuvid.h" HAVE_DYNLINK_NVCUVID_HEADER)
7876
find_cuda_helper_libs(nvcuvid)
7977
if(WIN32)
8078
find_cuda_helper_libs(nvcuvenc)
@@ -115,44 +113,89 @@ if(CUDA_FOUND)
115113
unset(CUDA_ARCH_PTX CACHE)
116114
endif()
117115

118-
if(PREFERRED_CUDA_HOST_COMPILER)
119-
LIST(APPEND CUDA_NVCC_FLAGS -ccbin "${PREFERRED_CUDA_HOST_COMPILER}")
116+
if(OPENCV_CUDA_DETECTION_NVCC_FLAGS MATCHES "-ccbin")
117+
# already specified by user
118+
elseif(CUDA_HOST_COMPILER AND EXISTS "${CUDA_HOST_COMPILER}")
119+
LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${CUDA_HOST_COMPILER}")
120+
elseif(WIN32 AND CMAKE_LINKER) # Workaround for VS cl.exe not being in the env. path
121+
get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY)
122+
LIST(APPEND OPENCV_CUDA_DETECTION_NVCC_FLAGS -ccbin "${host_compiler_bindir}")
120123
else()
121-
if(WIN32 AND CMAKE_LINKER) #Workaround for VS cl.exe not being in the env. path
122-
get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY)
123-
LIST(APPEND CUDA_NVCC_FLAGS -ccbin ${host_compiler_bindir})
124+
if(CUDA_HOST_COMPILER)
125+
message(STATUS "CUDA: CUDA_HOST_COMPILER='${CUDA_HOST_COMPILER}' is not valid, autodetection may not work. Specify OPENCV_CUDA_DETECTION_NVCC_FLAGS with -ccbin option for fix that")
124126
endif()
125127
endif()
126128

127-
SET(DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run")
128-
129129
macro(ocv_filter_available_architecture result_list)
130-
if(DEFINED CUDA_SUPPORTED_CC)
131-
set(${result_list} "${CUDA_SUPPORTED_CC}")
130+
set(__cache_key_check "${ARGN} : ${CUDA_NVCC_EXECUTABLE} ${OPENCV_CUDA_DETECTION_NVCC_FLAGS}")
131+
if(DEFINED OPENCV_CACHE_CUDA_SUPPORTED_CC AND OPENCV_CACHE_CUDA_SUPPORTED_CC_check STREQUAL __cache_key_check)
132+
set(${result_list} "${OPENCV_CACHE_CUDA_SUPPORTED_CC}")
132133
else()
133134
set(CC_LIST ${ARGN})
134135
foreach(target_arch ${CC_LIST})
135136
string(REPLACE "." "" target_arch_short "${target_arch}")
136137
set(NVCC_OPTION "-gencode;arch=compute_${target_arch_short},code=sm_${target_arch_short}")
137-
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu"
138-
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
139-
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
140-
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
138+
set(_cmd "${CUDA_NVCC_EXECUTABLE}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} ${NVCC_OPTION} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" --compile)
139+
execute_process(
140+
COMMAND ${_cmd}
141+
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
142+
RESULT_VARIABLE _nvcc_res
143+
OUTPUT_VARIABLE _nvcc_out
144+
ERROR_VARIABLE _nvcc_err
145+
#ERROR_QUIET
146+
OUTPUT_STRIP_TRAILING_WHITESPACE
147+
)
148+
if(OPENCV_CMAKE_CUDA_DEBUG)
149+
message(WARNING "COMMAND: ${_cmd}")
150+
message(STATUS "Result: ${_nvcc_res}")
151+
message(STATUS "Out: ${_nvcc_out}")
152+
message(STATUS "Err: ${_nvcc_err}")
153+
endif()
141154
if(_nvcc_res EQUAL 0)
142-
set(${result_list} "${${result_list}} ${target_arch}")
155+
LIST(APPEND ${result_list} "${target_arch}")
143156
endif()
144157
endforeach()
145158
string(STRIP "${${result_list}}" ${result_list})
146-
set(CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "List of supported compute capability")
159+
if(" ${${result_list}}" STREQUAL " ")
160+
message(WARNING "CUDA: Autodetection arch list is empty. Please enable OPENCV_CMAKE_CUDA_DEBUG=1 and check/specify OPENCV_CUDA_DETECTION_NVCC_FLAGS variable")
161+
endif()
162+
163+
# cache detected values
164+
set(OPENCV_CACHE_CUDA_SUPPORTED_CC ${${result_list}} CACHE INTERNAL "")
165+
set(OPENCV_CACHE_CUDA_SUPPORTED_CC_check "${__cache_key_check}" CACHE INTERNAL "")
147166
endif()
148167
endmacro()
149168

150169
macro(ocv_detect_native_cuda_arch status output)
151-
execute_process( COMMAND ${DETECT_ARCHS_COMMAND}
152-
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
153-
RESULT_VARIABLE ${status} OUTPUT_VARIABLE _nvcc_out
154-
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
155-
string(REGEX REPLACE ".*\n" "" ${output} "${_nvcc_out}") #Strip leading warning messages, if any
170+
set(OPENCV_CUDA_DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE}" ${OPENCV_CUDA_DETECTION_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run")
171+
set(__cache_key_check "${OPENCV_CUDA_DETECT_ARCHS_COMMAND}")
172+
if(DEFINED OPENCV_CACHE_CUDA_ACTIVE_CC AND OPENCV_CACHE_CUDA_ACTIVE_CC_check STREQUAL __cache_key_check)
173+
set(${output} "${OPENCV_CACHE_CUDA_ACTIVE_CC}")
174+
set(${status} 0)
175+
else()
176+
execute_process(
177+
COMMAND ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}
178+
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
179+
RESULT_VARIABLE ${status}
180+
OUTPUT_VARIABLE _nvcc_out
181+
ERROR_VARIABLE _nvcc_err
182+
ERROR_QUIET
183+
OUTPUT_STRIP_TRAILING_WHITESPACE
184+
)
185+
if(OPENCV_CMAKE_CUDA_DEBUG)
186+
message(WARNING "COMMAND: ${OPENCV_CUDA_DETECT_ARCHS_COMMAND}")
187+
message(STATUS "Result: ${${status}}")
188+
message(STATUS "Out: ${_nvcc_out}")
189+
message(STATUS "Err: ${_nvcc_err}")
190+
endif()
191+
string(REGEX REPLACE ".*\n" "" ${output} "${_nvcc_out}") #Strip leading warning messages, if any
192+
193+
if(${status} EQUAL 0)
194+
# cache detected values
195+
set(OPENCV_CACHE_CUDA_ACTIVE_CC ${${result_list}} CACHE INTERNAL "")
196+
set(OPENCV_CACHE_CUDA_ACTIVE_CC_check "${__cache_key_check}" CACHE INTERNAL "")
197+
endif()
198+
endif()
156199
endmacro()
157200

158201
macro(ocv_wipeout_deprecated _arch_bin_list)
@@ -181,14 +224,21 @@ if(CUDA_FOUND)
181224
else()
182225
string(REGEX MATCHALL "[0-9]+\\.[0-9]" __cuda_arch_bin "${_nvcc_out}")
183226
endif()
227+
elseif(CUDA_ARCH_BIN)
228+
message(STATUS "CUDA: Using CUDA_ARCH_BIN=${CUDA_ARCH_BIN}")
229+
set(__cuda_arch_bin ${CUDA_ARCH_BIN})
184230
endif()
185231

186232
if(NOT DEFINED __cuda_arch_bin)
187233
if(ARM)
188234
set(__cuda_arch_bin "3.2")
189235
set(__cuda_arch_ptx "")
190236
elseif(AARCH64)
191-
ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
237+
if(NOT CMAKE_CROSSCOMPILING)
238+
ocv_detect_native_cuda_arch(_nvcc_res _nvcc_out)
239+
else()
240+
set(_nvcc_res -1) # emulate error, see below
241+
endif()
192242
if(NOT _nvcc_res EQUAL 0)
193243
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
194244
# TX1 (5.3) TX2 (6.2) Xavier (7.2) V100 (7.0)
@@ -222,11 +272,9 @@ if(CUDA_FOUND)
222272
string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}")
223273
string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
224274

225-
# Ckeck if user specified 1.0 compute capability: we don't support it
226-
string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}")
227-
set(CUDA_ARCH_BIN_OR_PTX_10 0)
228-
if(NOT ${HAS_ARCH_10} STREQUAL "")
229-
set(CUDA_ARCH_BIN_OR_PTX_10 1)
275+
# Check if user specified 1.0 compute capability: we don't support it
276+
if(" ${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}" MATCHES " 1.0")
277+
message(SEND_ERROR "CUDA: 1.0 compute capability is not supported - exclude it from ARCH/PTX list are re-run CMake")
230278
endif()
231279

232280
# NVCC flags to be set
@@ -421,7 +469,7 @@ if(HAVE_CUDA)
421469
if(CMAKE_GENERATOR MATCHES "Visual Studio"
422470
AND NOT OPENCV_SKIP_CUDA_CMAKE_SUPPRESS_REGENERATION
423471
)
424-
message(WARNING "CUDA with MSVS generator is detected. Disabling CMake re-run checks (CMAKE_SUPPRESS_REGENERATION=ON). You need to run CMake manually if updates are required.")
472+
message(STATUS "CUDA: MSVS generator is detected. Disabling CMake re-run checks (CMAKE_SUPPRESS_REGENERATION=ON). You need to run CMake manually if updates are required.")
425473
set(CMAKE_SUPPRESS_REGENERATION ON)
426474
endif()
427475
endif()

cmake/templates/cvconfig.h.in

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
/* Compile for 'real' NVIDIA GPU architectures */
1414
#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
1515

16-
/* Create PTX or BIN for 1.0 compute capability */
17-
#cmakedefine CUDA_ARCH_BIN_OR_PTX_10
18-
1916
/* NVIDIA GPU features are used */
2017
#define CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES}"
2118

modules/core/include/opencv2/core/private.cuda.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,6 @@
8282
# error "Insufficient Cuda Runtime library version, please update it."
8383
# endif
8484

85-
# if defined(CUDA_ARCH_BIN_OR_PTX_10)
86-
# error "OpenCV CUDA module doesn't support NVIDIA compute capability 1.0"
87-
# endif
8885
#endif
8986

9087
//! @cond IGNORED

0 commit comments

Comments
 (0)