Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 26 additions & 119 deletions cmake/CUDAArchitectures.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,15 @@ function(flamegpu_visualiser_set_cuda_architectures)
# Query NVCC for the acceptable SM values, this is used in multiple places
if(NOT DEFINED SUPPORTED_CUDA_ARCHITECTURES_NVCC)
execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--help" OUTPUT_VARIABLE NVCC_HELP_STR ERROR_VARIABLE NVCC_HELP_STR)
# Match all comptue_XX or sm_XXs
string(REGEX MATCHALL "'(sm|compute)_[0-9]+'" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${NVCC_HELP_STR}" )
# Strip just the numeric component
string(REGEX REPLACE "'(sm|compute)_([0-9]+)'" "\\2" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${SUPPORTED_CUDA_ARCHITECTURES_NVCC}" )
# Match all comptue_XX or sm_XX or lto_XX values, including single lowercase letter suffixes (a or f only supported so far, but allowing flexibility)
string(REGEX MATCHALL "'(sm|compute|lto)_([0-9]+[a-z]?)'" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${NVCC_HELP_STR}" )
# Strip out just the portiaon after the _, which is the value for CMAKE_CUDA_ARCHITECTURES
string(REGEX REPLACE "'(sm|compute|lto)_([0-9]+[a-z]?)'" "\\2" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${SUPPORTED_CUDA_ARCHITECTURES_NVCC}" )
# Remove dupes and sort to build the correct list of supported CUDA_ARCH.
list(REMOVE_DUPLICATES SUPPORTED_CUDA_ARCHITECTURES_NVCC)
list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES_NVCC "")
list(SORT SUPPORTED_CUDA_ARCHITECTURES_NVCC)
# Store the supported arch's once and only once. This could be a cache var given the cuda compiler should not be able to change without clearing th cache?
# Store the supported arch's once and only once in the parent scope.
set(SUPPORTED_CUDA_ARCHITECTURES_NVCC ${SUPPORTED_CUDA_ARCHITECTURES_NVCC} PARENT_SCOPE)
endif()
list(LENGTH SUPPORTED_CUDA_ARCHITECTURES_NVCC SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT)
Expand Down Expand Up @@ -166,134 +166,41 @@ function(flamegpu_visualiser_set_cuda_architectures)
set(using_keyword_arch TRUE)
endif()
# Cmake 3.18+ expects a list of 1 or more <sm>, <sm>-real or <sm>-virtual.
# CMake isn't aware of the exact SMS supported by the CUDA version afiak, but we have already queired nvcc for this (once and only once)
# If nvcc parsing worked and a single keyword option is not being used, attempt the validation:
if(SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT GREATER 0 AND NOT using_keyword_arch)
# Transform a copy of the list of supported architectures, to hopefully just contain numbers
# CMake isn't aware of the exact SMS supported by the CUDA version afiak, but we have already queried nvcc for this (once and only once)
# If nvcc parsing worked and a single keyword option is not being used, attempt the validation
# But do not emit this more than once per invocation of CMake. parent scope wasn't enough in this case so using a global property
get_property(
WARNING_EMITTED
GLOBAL PROPERTY
__flamegpu_visualiser_set_cuda_architectures_warning_emitted
)
if(SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT GREATER 0 AND NOT using_keyword_arch AND NOT WARNING_EMITTED)
# Transform a copy of the list of requested architectures, removing -real and -virutal CMake components
set(archs ${CMAKE_CUDA_ARCHITECTURES})
list(TRANSFORM archs REPLACE "(\-real|\-virtual)" "")
# If any of the specified architectures are not in the nvcc reported list, error.
# If any of the specified architectures are not in the nvcc reported list, raise a warning
foreach(ARCH IN LISTS archs)
if(NOT ARCH IN_LIST SUPPORTED_CUDA_ARCHITECTURES_NVCC)
message(FATAL_ERROR
" CMAKE_CUDA_ARCHITECTURES value `${ARCH}` is not supported by nvcc ${CMAKE_CUDA_COMPILER_VERSION}.\n"
message(WARNING
" CMAKE_CUDA_ARCHITECTURES value `${ARCH}` may not be supported by nvcc ${CMAKE_CUDA_COMPILER_VERSION}, compilation may fail\n"
" Supported architectures based on nvcc --help: \n"
" ${SUPPORTED_CUDA_ARCHITECTURES_NVCC}\n")
endif()
endforeach()
unset(archs)
# set the global property so this is not re-emitted for each project()
set_property(
GLOBAL PROPERTY
__flamegpu_visualiser_set_cuda_architectures_warning_emitted
TRUE
)
endif()
else()
# Otherwise, set a mulit-arch default for good compatibility and performacne
# If we're using CMake >= 3.23, we can just use all-major, though we then have to find the minimum a different way?
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.23")
set(CMAKE_CUDA_ARCHITECTURES "all-major")
else()
# For CMake < 3.23, we have to make our own all-major equivalent.
# If we have nvcc help outut, we can generate this from all the elements that end with a 0 (and the first element if it does not.)
if(SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT GREATER 0)
# If the lowest support arch is not major, add it to the default
list(GET SUPPORTED_CUDA_ARCHITECTURES_NVCC 0 lowest_supported)
if(NOT lowest_supported MATCHES "0$")
list(APPEND default_archs ${lowest_supported})
endif()
unset(lowest_supported)
# For each architecture, if it is major add it to the default list
foreach(ARCH IN LISTS SUPPORTED_CUDA_ARCHITECTURES_NVCC)
if(ARCH MATCHES "0$")
list(APPEND default_archs ${ARCH})
endif()
endforeach()
else()
# If nvcc help output parsing failed, just use an informed guess option from CUDA 11.8
set(default_archs "35;50;60;70;80")
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.8)
list(APPEND default_archs "90")
endif()
message(AUTHOR_WARNING
" ${CMAKE_CURRENT_FUNCTION} failed to parse NVCC --help output for default architecture generation\n"
" Using ${default_archs} based on CUDA 11.0 to 11.8."
)
endif()
# We actually want real for each arch, then virtual for the final, but only for library-provided values, to only embed one arch worth of ptx.
# So grab the last element of the list
list(GET default_archs -1 final)
# append -real to each element, to not embed ptx for that arch too
list(TRANSFORM default_archs APPEND "-real")
# add the -virtual version of the final element
list(APPEND default_archs "${final}-virtual")
# Set the value
set(CMAKE_CUDA_ARCHITECTURES ${default_archs})
#unset local vars
unset(default_archs)
endif()
# As we require CMake >= 3.23 (and CUDA >= 12 which includes all-major), we can just use all-major
set(CMAKE_CUDA_ARCHITECTURES "all-major")
endif()
# Promote the value to the parent's scope, where it is needed on the first invokation (might be fine with cache, but just incase)
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PARENT_SCOPE)
# Promote the value to the cache for reconfigure persistence, as the enable_language sets it on the cache
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" CACHE STRING "CUDA architectures" FORCE)
endfunction()

#[[[
# Get the minimum CUDA Architecture from the current CMAKE_CUDA_ARCHITECTURES value if possible
#
# Gets the minimum CUDA architectuyre from teh current value of CMAKE_CUDA_ARCHITECTURES if possible, storing the result in the pass-by-reference return value
# Supports CMAKE_CUDA_ARCHITECTURE values including integers, -real post-fixed integers, -virtual post-fixed integers, all-major and all.
# Does not support native, instead returning -1.
# all or all-major are supported by querying nvcc --help to detect the minimum built for.
#
# CUDA must be enabled as a language prior to this method being called, and CMAKE_CUDA_ARCHITECTURES must be defined and non-empty
#
# :param minimum_architecture: the minimum architecture set in CMAKE_CUDA_ARCHITECTURES
# :type NO_VALIDATE_ARCHITECTURES: integer
#]]
function(flamegpu_visualiser_get_minimum_cuda_architecture minimum_architecture)
if(DEFINED CMAKE_CUDA_ARCHITECTURES)
# Cannot deal with native gracefully
if("native" IN_LIST CMAKE_CUDA_ARCHITECTURES)
# If it's native, we would need to exeucte some CUDA code to detect this. For now set -1.
set(flamegpu_visualiser_minimum_cuda_architecture -1)
# if all/all-major is specified, detect via nvcc --help. It must be the only option (CMake doens't validate this and generates bad gencodes otherwise)
elseif("all-major" IN_LIST CMAKE_CUDA_ARCHITECTURES OR "all" IN_LIST CMAKE_CUDA_ARCHITECTURES)
# Query NVCC for the acceptable SM values.
if(NOT DEFINED SUPPORTED_CUDA_ARCHITECTURES_NVCC)
execute_process(COMMAND ${CMAKE_CUDA_COMPILER} "--help" OUTPUT_VARIABLE NVCC_HELP_STR ERROR_VARIABLE NVCC_HELP_STR)
# Match all comptue_XX or sm_XXs
string(REGEX MATCHALL "'(sm|compute)_[0-9]+'" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${NVCC_HELP_STR}" )
# Strip just the numeric component
string(REGEX REPLACE "'(sm|compute)_([0-9]+)'" "\\2" SUPPORTED_CUDA_ARCHITECTURES_NVCC "${SUPPORTED_CUDA_ARCHITECTURES_NVCC}" )
# Remove dupes and sort to build the correct list of supported CUDA_ARCH.
list(REMOVE_DUPLICATES SUPPORTED_CUDA_ARCHITECTURES_NVCC)
list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES_NVCC "")
list(SORT SUPPORTED_CUDA_ARCHITECTURES_NVCC)
# Store the supported arch's once and only once. This could be a cache var given the cuda compiler should not be able to change without clearing th cache?
set(SUPPORTED_CUDA_ARCHITECTURES_NVCC ${SUPPORTED_CUDA_ARCHITECTURES_NVCC} PARENT_SCOPE)
endif()
list(LENGTH SUPPORTED_CUDA_ARCHITECTURES_NVCC SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT)
if(SUPPORTED_CUDA_ARCHITECTURES_NVCC_COUNT GREATER 0)
# For both all and all-major, the lowest arch should be the lowest supported. This is true for CUDA <= 11.8 atleast.
list(GET SUPPORTED_CUDA_ARCHITECTURES_NVCC 0 lowest)
set(flamegpu_visualiser_minimum_cuda_architecture ${lowest})
else()
# If nvcc didn't give anything useful, set -1.
set(flamegpu_visualiser_minimum_cuda_architecture -1)
endif()
else()
# Otherwise it should just be a list of one or more <sm>/<sm>-real/<sm-virtual>
# Copy the list
set(archs ${CMAKE_CUDA_ARCHITECTURES})
# Replace occurances of -real and -virtual
list(TRANSFORM archs REPLACE "(\-real|\-virtual)" "")
# Sort the list numerically (natural option
list(SORT archs COMPARE NATURAL ORDER ASCENDING)
# Get the first element
list(GET archs 0 lowest)
# Set the value for later returning
set(flamegpu_visualiser_minimum_cuda_architecture ${lowest})
endif()
# Set the return value as required, effectively pass by reference.
set(${minimum_architecture} ${flamegpu_visualiser_minimum_cuda_architecture} PARENT_SCOPE)
else()
message(FATAL_ERROR "${CMAKE_CURRENT_FUNCTION}: CMAKE_CUDA_ARCHITECTURES is not set or is empty")
endif()
endfunction()